feat: add stock-lab service for financial news scraping and analysis

This commit is contained in:
2026-01-26 02:56:52 +09:00
parent 421e52b205
commit d8e4e0461c
8 changed files with 219 additions and 1 deletions

View File

@@ -17,6 +17,20 @@ services:
volumes: volumes:
- ${RUNTIME_PATH}/data:/app/data - ${RUNTIME_PATH}/data:/app/data
stock-lab:
build:
context: ./stock-lab
args:
APP_VERSION: ${APP_VERSION:-dev}
container_name: stock-lab
restart: unless-stopped
ports:
- "18500:8000"
environment:
- TZ=${TZ:-Asia/Seoul}
volumes:
- ${STOCK_DATA_PATH:-./data/stock}:/app/data
travel-proxy: travel-proxy:
build: ./travel-proxy build: ./travel-proxy
container_name: travel-proxy container_name: travel-proxy

View File

@@ -54,6 +54,17 @@ server {
proxy_pass http://travel-proxy:8000/api/travel/; proxy_pass http://travel-proxy:8000/api/travel/;
} }
# stock API
location /api/stock/ {
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://stock-lab:8000/api/stock/;
}
# API 프록시 (여기가 포인트: /api/ 중복 제거) # API 프록시 (여기가 포인트: /api/ 중복 제거)
location /api/ { location /api/ {
proxy_http_version 1.1; proxy_http_version 1.1;

View File

@@ -44,7 +44,7 @@ rsync -a --delete \
bash "$SRC/scripts/deploy-nas.sh" bash "$SRC/scripts/deploy-nas.sh"
cd "$DST" cd "$DST"
docker-compose up -d --build backend travel-proxy frontend docker-compose up -d --build backend travel-proxy stock-lab frontend
# [Permission Fix] # [Permission Fix]
# deployer가 root로 돌면서 생성한 파일들의 소유권을 호스트 사용자로 변경 # deployer가 root로 돌면서 생성한 파일들의 소유권을 호스트 사용자로 변경

9
stock-lab/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM python:3.12-alpine
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

54
stock-lab/app/db.py Normal file
View File

@@ -0,0 +1,54 @@
import sqlite3
import os
import hashlib
from typing import List, Dict, Any
DB_PATH = "/app/data/stock.db"
def _conn() -> sqlite3.Connection:
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
return conn
def init_db():
with _conn() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
hash TEXT UNIQUE NOT NULL,
title TEXT NOT NULL,
link TEXT,
summary TEXT,
press TEXT,
pub_date TEXT,
crawled_at TEXT
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_crawled ON articles(crawled_at DESC)")
def save_articles(articles: List[Dict[str, str]]) -> int:
count = 0
with _conn() as conn:
for a in articles:
# 중복 체크용 해시 (제목+링크)
unique_str = f"{a['title']}|{a['link']}"
h = hashlib.md5(unique_str.encode()).hexdigest()
try:
conn.execute("""
INSERT INTO articles (hash, title, link, summary, press, pub_date, crawled_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (h, a['title'], a['link'], a['summary'], a['press'], a['date'], a['crawled_at']))
count += 1
except sqlite3.IntegrityError:
pass # 이미 존재함
return count
def get_latest_articles(limit: int = 20) -> List[Dict[str, Any]]:
with _conn() as conn:
rows = conn.execute(
"SELECT * FROM articles ORDER BY crawled_at DESC, id DESC LIMIT ?",
(limit,)
).fetchall()
return [dict(r) for r in rows]

47
stock-lab/app/main.py Normal file
View File

@@ -0,0 +1,47 @@
import os
from fastapi import FastAPI
from apscheduler.schedulers.background import BackgroundScheduler
from .db import init_db, save_articles, get_latest_articles
from .scraper import fetch_market_news
app = FastAPI()
scheduler = BackgroundScheduler(timezone=os.getenv("TZ", "Asia/Seoul"))
@app.on_event("startup")
def on_startup():
init_db()
# 매일 아침 8시 뉴스 스크랩
scheduler.add_job(run_scraping_job, "cron", hour="8", minute="0")
# 앱 시작 시에도 한 번 실행 (데이터 없으면)
if not get_latest_articles(1):
run_scraping_job()
scheduler.start()
def run_scraping_job():
print("[StockLab] Starting news scraping...")
articles = fetch_market_news()
count = save_articles(articles)
print(f"[StockLab] Saved {count} new articles.")
@app.get("/health")
def health():
return {"ok": True}
@app.get("/api/stock/news")
def get_news(limit: int = 20):
"""최신 주식 뉴스 조회"""
return get_latest_articles(limit)
@app.post("/api/admin/stock/scrap")
def trigger_scrap():
"""수동 스크랩 트리거"""
run_scraping_job()
return {"ok": True}
@app.get("/api/version")
def version():
return {"version": os.getenv("APP_VERSION", "dev")}

78
stock-lab/app/scraper.py Normal file
View File

@@ -0,0 +1,78 @@
import requests
from bs4 import BeautifulSoup
from typing import List, Dict
import time
# 네이버 파이낸스 주요 뉴스
NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver"
def fetch_market_news() -> List[Dict[str, str]]:
"""
네이버 금융 '주요 뉴스' 크롤링
반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...]
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
}
resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949")
# 주요 뉴스 리스트 추출
# 구조: div.mainNewsList > ul > li
articles = []
news_list = soup.select(".mainNewsList ul li")
for li in news_list:
# 썸네일 있을 수도 있고 없을 수도 있음
dl = li.select_one("dl")
if not dl:
continue
# 제목 (dd.articleSubject > a)
subject_tag = dl.select_one(".articleSubject a")
if not subject_tag:
continue
title = subject_tag.get_text(strip=True)
link = "https://finance.naver.com" + subject_tag["href"]
# 요약 (dd.articleSummary)
summary_tag = dl.select_one(".articleSummary")
summary = ""
press = ""
date = ""
if summary_tag:
# 불필요한 태그 제거
for child in summary_tag.select(".press, .wdate"):
if "press" in child.get("class", []):
press = child.get_text(strip=True)
if "wdate" in child.get("class", []):
date = child.get_text(strip=True)
child.extract()
summary = summary_tag.get_text(strip=True)
articles.append({
"title": title,
"link": link,
"summary": summary,
"press": press,
"date": date,
"crawled_at": time.strftime("%Y-%m-%d %H:%M:%S")
})
return articles
except Exception as e:
print(f"[StockLab] Scraping failed: {e}")
return []
def fetch_major_indices() -> Dict[str, Any]:
"""
KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈)
"""
# ... (추후 구현, 일단 빈 딕셔너리)
return {}

View File

@@ -0,0 +1,5 @@
fastapi
uvicorn
requests
beautifulsoup4
apscheduler