feat: add stock-lab service for financial news scraping and analysis
This commit is contained in:
@@ -17,6 +17,20 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ${RUNTIME_PATH}/data:/app/data
|
- ${RUNTIME_PATH}/data:/app/data
|
||||||
|
|
||||||
|
stock-lab:
|
||||||
|
build:
|
||||||
|
context: ./stock-lab
|
||||||
|
args:
|
||||||
|
APP_VERSION: ${APP_VERSION:-dev}
|
||||||
|
container_name: stock-lab
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "18500:8000"
|
||||||
|
environment:
|
||||||
|
- TZ=${TZ:-Asia/Seoul}
|
||||||
|
volumes:
|
||||||
|
- ${STOCK_DATA_PATH:-./data/stock}:/app/data
|
||||||
|
|
||||||
travel-proxy:
|
travel-proxy:
|
||||||
build: ./travel-proxy
|
build: ./travel-proxy
|
||||||
container_name: travel-proxy
|
container_name: travel-proxy
|
||||||
|
|||||||
@@ -54,6 +54,17 @@ server {
|
|||||||
proxy_pass http://travel-proxy:8000/api/travel/;
|
proxy_pass http://travel-proxy:8000/api/travel/;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# stock API
|
||||||
|
location /api/stock/ {
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
proxy_pass http://stock-lab:8000/api/stock/;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# API 프록시 (여기가 포인트: /api/ 중복 제거)
|
# API 프록시 (여기가 포인트: /api/ 중복 제거)
|
||||||
location /api/ {
|
location /api/ {
|
||||||
proxy_http_version 1.1;
|
proxy_http_version 1.1;
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ rsync -a --delete \
|
|||||||
bash "$SRC/scripts/deploy-nas.sh"
|
bash "$SRC/scripts/deploy-nas.sh"
|
||||||
|
|
||||||
cd "$DST"
|
cd "$DST"
|
||||||
docker-compose up -d --build backend travel-proxy frontend
|
docker-compose up -d --build backend travel-proxy stock-lab frontend
|
||||||
|
|
||||||
# [Permission Fix]
|
# [Permission Fix]
|
||||||
# deployer가 root로 돌면서 생성한 파일들의 소유권을 호스트 사용자로 변경
|
# deployer가 root로 돌면서 생성한 파일들의 소유권을 호스트 사용자로 변경
|
||||||
|
|||||||
9
stock-lab/Dockerfile
Normal file
9
stock-lab/Dockerfile
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
FROM python:3.12-alpine
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
54
stock-lab/app/db.py
Normal file
54
stock-lab/app/db.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import sqlite3
|
||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
DB_PATH = "/app/data/stock.db"
|
||||||
|
|
||||||
|
def _conn() -> sqlite3.Connection:
|
||||||
|
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||||
|
conn = sqlite3.connect(DB_PATH)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
with _conn() as conn:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS articles (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
hash TEXT UNIQUE NOT NULL,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
link TEXT,
|
||||||
|
summary TEXT,
|
||||||
|
press TEXT,
|
||||||
|
pub_date TEXT,
|
||||||
|
crawled_at TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_crawled ON articles(crawled_at DESC)")
|
||||||
|
|
||||||
|
def save_articles(articles: List[Dict[str, str]]) -> int:
|
||||||
|
count = 0
|
||||||
|
with _conn() as conn:
|
||||||
|
for a in articles:
|
||||||
|
# 중복 체크용 해시 (제목+링크)
|
||||||
|
unique_str = f"{a['title']}|{a['link']}"
|
||||||
|
h = hashlib.md5(unique_str.encode()).hexdigest()
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO articles (hash, title, link, summary, press, pub_date, crawled_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (h, a['title'], a['link'], a['summary'], a['press'], a['date'], a['crawled_at']))
|
||||||
|
count += 1
|
||||||
|
except sqlite3.IntegrityError:
|
||||||
|
pass # 이미 존재함
|
||||||
|
return count
|
||||||
|
|
||||||
|
def get_latest_articles(limit: int = 20) -> List[Dict[str, Any]]:
|
||||||
|
with _conn() as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM articles ORDER BY crawled_at DESC, id DESC LIMIT ?",
|
||||||
|
(limit,)
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
47
stock-lab/app/main.py
Normal file
47
stock-lab/app/main.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import os
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
|
from .db import init_db, save_articles, get_latest_articles
|
||||||
|
from .scraper import fetch_market_news
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
scheduler = BackgroundScheduler(timezone=os.getenv("TZ", "Asia/Seoul"))
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def on_startup():
|
||||||
|
init_db()
|
||||||
|
|
||||||
|
# 매일 아침 8시 뉴스 스크랩
|
||||||
|
scheduler.add_job(run_scraping_job, "cron", hour="8", minute="0")
|
||||||
|
|
||||||
|
# 앱 시작 시에도 한 번 실행 (데이터 없으면)
|
||||||
|
if not get_latest_articles(1):
|
||||||
|
run_scraping_job()
|
||||||
|
|
||||||
|
scheduler.start()
|
||||||
|
|
||||||
|
def run_scraping_job():
|
||||||
|
print("[StockLab] Starting news scraping...")
|
||||||
|
articles = fetch_market_news()
|
||||||
|
count = save_articles(articles)
|
||||||
|
print(f"[StockLab] Saved {count} new articles.")
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.get("/api/stock/news")
|
||||||
|
def get_news(limit: int = 20):
|
||||||
|
"""최신 주식 뉴스 조회"""
|
||||||
|
return get_latest_articles(limit)
|
||||||
|
|
||||||
|
@app.post("/api/admin/stock/scrap")
|
||||||
|
def trigger_scrap():
|
||||||
|
"""수동 스크랩 트리거"""
|
||||||
|
run_scraping_job()
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
@app.get("/api/version")
|
||||||
|
def version():
|
||||||
|
return {"version": os.getenv("APP_VERSION", "dev")}
|
||||||
78
stock-lab/app/scraper.py
Normal file
78
stock-lab/app/scraper.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from typing import List, Dict
|
||||||
|
import time
|
||||||
|
|
||||||
|
# 네이버 파이낸스 주요 뉴스
|
||||||
|
NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver"
|
||||||
|
|
||||||
|
def fetch_market_news() -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
네이버 금융 '주요 뉴스' 크롤링
|
||||||
|
반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
|
||||||
|
}
|
||||||
|
resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949")
|
||||||
|
|
||||||
|
# 주요 뉴스 리스트 추출
|
||||||
|
# 구조: div.mainNewsList > ul > li
|
||||||
|
articles = []
|
||||||
|
news_list = soup.select(".mainNewsList ul li")
|
||||||
|
|
||||||
|
for li in news_list:
|
||||||
|
# 썸네일 있을 수도 있고 없을 수도 있음
|
||||||
|
dl = li.select_one("dl")
|
||||||
|
if not dl:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 제목 (dd.articleSubject > a)
|
||||||
|
subject_tag = dl.select_one(".articleSubject a")
|
||||||
|
if not subject_tag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = subject_tag.get_text(strip=True)
|
||||||
|
link = "https://finance.naver.com" + subject_tag["href"]
|
||||||
|
|
||||||
|
# 요약 (dd.articleSummary)
|
||||||
|
summary_tag = dl.select_one(".articleSummary")
|
||||||
|
summary = ""
|
||||||
|
press = ""
|
||||||
|
date = ""
|
||||||
|
|
||||||
|
if summary_tag:
|
||||||
|
# 불필요한 태그 제거
|
||||||
|
for child in summary_tag.select(".press, .wdate"):
|
||||||
|
if "press" in child.get("class", []):
|
||||||
|
press = child.get_text(strip=True)
|
||||||
|
if "wdate" in child.get("class", []):
|
||||||
|
date = child.get_text(strip=True)
|
||||||
|
child.extract()
|
||||||
|
summary = summary_tag.get_text(strip=True)
|
||||||
|
|
||||||
|
articles.append({
|
||||||
|
"title": title,
|
||||||
|
"link": link,
|
||||||
|
"summary": summary,
|
||||||
|
"press": press,
|
||||||
|
"date": date,
|
||||||
|
"crawled_at": time.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
})
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[StockLab] Scraping failed: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def fetch_major_indices() -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈)
|
||||||
|
"""
|
||||||
|
# ... (추후 구현, 일단 빈 딕셔너리)
|
||||||
|
return {}
|
||||||
5
stock-lab/requirements.txt
Normal file
5
stock-lab/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
requests
|
||||||
|
beautifulsoup4
|
||||||
|
apscheduler
|
||||||
Reference in New Issue
Block a user