diff --git a/docker-compose.yml b/docker-compose.yml index 8a551c0..2407e17 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,20 @@ services: volumes: - ${RUNTIME_PATH}/data:/app/data + stock-lab: + build: + context: ./stock-lab + args: + APP_VERSION: ${APP_VERSION:-dev} + container_name: stock-lab + restart: unless-stopped + ports: + - "18500:8000" + environment: + - TZ=${TZ:-Asia/Seoul} + volumes: + - ${STOCK_DATA_PATH:-./data/stock}:/app/data + travel-proxy: build: ./travel-proxy container_name: travel-proxy diff --git a/nginx/default.conf b/nginx/default.conf index 2cf0ed9..e3c5832 100644 --- a/nginx/default.conf +++ b/nginx/default.conf @@ -54,6 +54,17 @@ server { proxy_pass http://travel-proxy:8000/api/travel/; } + # stock API + location /api/stock/ { + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://stock-lab:8000/api/stock/; + } + + # API 프록시 (여기가 포인트: /api/ 중복 제거) location /api/ { proxy_http_version 1.1; diff --git a/scripts/deploy.sh b/scripts/deploy.sh index 14299cd..7e7c666 100644 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -44,7 +44,7 @@ rsync -a --delete \ bash "$SRC/scripts/deploy-nas.sh" cd "$DST" -docker-compose up -d --build backend travel-proxy frontend +docker-compose up -d --build backend travel-proxy stock-lab frontend # [Permission Fix] # deployer가 root로 돌면서 생성한 파일들의 소유권을 호스트 사용자로 변경 diff --git a/stock-lab/Dockerfile b/stock-lab/Dockerfile new file mode 100644 index 0000000..4345e2e --- /dev/null +++ b/stock-lab/Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.12-alpine + +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/stock-lab/app/db.py b/stock-lab/app/db.py new file mode 100644 index 0000000..b91aace --- /dev/null +++ b/stock-lab/app/db.py @@ -0,0 +1,54 @@ +import sqlite3 +import os +import hashlib +from typing import List, Dict, Any + +DB_PATH = "/app/data/stock.db" + +def _conn() -> sqlite3.Connection: + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + return conn + +def init_db(): + with _conn() as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS articles ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + hash TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + link TEXT, + summary TEXT, + press TEXT, + pub_date TEXT, + crawled_at TEXT + ) + """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_crawled ON articles(crawled_at DESC)") + +def save_articles(articles: List[Dict[str, str]]) -> int: + count = 0 + with _conn() as conn: + for a in articles: + # 중복 체크용 해시 (제목+링크) + unique_str = f"{a['title']}|{a['link']}" + h = hashlib.md5(unique_str.encode()).hexdigest() + + try: + conn.execute(""" + INSERT INTO articles (hash, title, link, summary, press, pub_date, crawled_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (h, a['title'], a['link'], a['summary'], a['press'], a['date'], a['crawled_at'])) + count += 1 + except sqlite3.IntegrityError: + pass # 이미 존재함 + return count + +def get_latest_articles(limit: int = 20) -> List[Dict[str, Any]]: + with _conn() as conn: + rows = conn.execute( + "SELECT * FROM articles ORDER BY crawled_at DESC, id DESC LIMIT ?", + (limit,) + ).fetchall() + return [dict(r) for r in rows] diff --git a/stock-lab/app/main.py b/stock-lab/app/main.py new file mode 100644 index 0000000..5756537 --- /dev/null +++ b/stock-lab/app/main.py @@ -0,0 +1,47 @@ +import os +from fastapi import FastAPI +from apscheduler.schedulers.background import BackgroundScheduler + +from .db import init_db, save_articles, get_latest_articles +from .scraper import fetch_market_news + +app = FastAPI() +scheduler = BackgroundScheduler(timezone=os.getenv("TZ", "Asia/Seoul")) + +@app.on_event("startup") +def on_startup(): + init_db() + + # 매일 아침 8시 뉴스 스크랩 + scheduler.add_job(run_scraping_job, "cron", hour="8", minute="0") + + # 앱 시작 시에도 한 번 실행 (데이터 없으면) + if not get_latest_articles(1): + run_scraping_job() + + scheduler.start() + +def run_scraping_job(): + print("[StockLab] Starting news scraping...") + articles = fetch_market_news() + count = save_articles(articles) + print(f"[StockLab] Saved {count} new articles.") + +@app.get("/health") +def health(): + return {"ok": True} + +@app.get("/api/stock/news") +def get_news(limit: int = 20): + """최신 주식 뉴스 조회""" + return get_latest_articles(limit) + +@app.post("/api/admin/stock/scrap") +def trigger_scrap(): + """수동 스크랩 트리거""" + run_scraping_job() + return {"ok": True} + +@app.get("/api/version") +def version(): + return {"version": os.getenv("APP_VERSION", "dev")} diff --git a/stock-lab/app/scraper.py b/stock-lab/app/scraper.py new file mode 100644 index 0000000..9d1ec62 --- /dev/null +++ b/stock-lab/app/scraper.py @@ -0,0 +1,78 @@ +import requests +from bs4 import BeautifulSoup +from typing import List, Dict +import time + +# 네이버 파이낸스 주요 뉴스 +NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver" + +def fetch_market_news() -> List[Dict[str, str]]: + """ + 네이버 금융 '주요 뉴스' 크롤링 + 반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...] + """ + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" + } + resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10) + resp.raise_for_status() + + soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949") + + # 주요 뉴스 리스트 추출 + # 구조: div.mainNewsList > ul > li + articles = [] + news_list = soup.select(".mainNewsList ul li") + + for li in news_list: + # 썸네일 있을 수도 있고 없을 수도 있음 + dl = li.select_one("dl") + if not dl: + continue + + # 제목 (dd.articleSubject > a) + subject_tag = dl.select_one(".articleSubject a") + if not subject_tag: + continue + + title = subject_tag.get_text(strip=True) + link = "https://finance.naver.com" + subject_tag["href"] + + # 요약 (dd.articleSummary) + summary_tag = dl.select_one(".articleSummary") + summary = "" + press = "" + date = "" + + if summary_tag: + # 불필요한 태그 제거 + for child in summary_tag.select(".press, .wdate"): + if "press" in child.get("class", []): + press = child.get_text(strip=True) + if "wdate" in child.get("class", []): + date = child.get_text(strip=True) + child.extract() + summary = summary_tag.get_text(strip=True) + + articles.append({ + "title": title, + "link": link, + "summary": summary, + "press": press, + "date": date, + "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S") + }) + + return articles + + except Exception as e: + print(f"[StockLab] Scraping failed: {e}") + return [] + +def fetch_major_indices() -> Dict[str, Any]: + """ + KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈) + """ + # ... (추후 구현, 일단 빈 딕셔너리) + return {} diff --git a/stock-lab/requirements.txt b/stock-lab/requirements.txt new file mode 100644 index 0000000..1e8c0ba --- /dev/null +++ b/stock-lab/requirements.txt @@ -0,0 +1,5 @@ +fastapi +uvicorn +requests +beautifulsoup4 +apscheduler