feat: add stock-lab service for financial news scraping and analysis
This commit is contained in:
9
stock-lab/Dockerfile
Normal file
9
stock-lab/Dockerfile
Normal file
@@ -0,0 +1,9 @@
|
||||
FROM python:3.12-alpine
|
||||
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
54
stock-lab/app/db.py
Normal file
54
stock-lab/app/db.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import hashlib
|
||||
from typing import List, Dict, Any
|
||||
|
||||
DB_PATH = "/app/data/stock.db"
|
||||
|
||||
def _conn() -> sqlite3.Connection:
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def init_db():
|
||||
with _conn() as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS articles (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
hash TEXT UNIQUE NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
link TEXT,
|
||||
summary TEXT,
|
||||
press TEXT,
|
||||
pub_date TEXT,
|
||||
crawled_at TEXT
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_crawled ON articles(crawled_at DESC)")
|
||||
|
||||
def save_articles(articles: List[Dict[str, str]]) -> int:
|
||||
count = 0
|
||||
with _conn() as conn:
|
||||
for a in articles:
|
||||
# 중복 체크용 해시 (제목+링크)
|
||||
unique_str = f"{a['title']}|{a['link']}"
|
||||
h = hashlib.md5(unique_str.encode()).hexdigest()
|
||||
|
||||
try:
|
||||
conn.execute("""
|
||||
INSERT INTO articles (hash, title, link, summary, press, pub_date, crawled_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""", (h, a['title'], a['link'], a['summary'], a['press'], a['date'], a['crawled_at']))
|
||||
count += 1
|
||||
except sqlite3.IntegrityError:
|
||||
pass # 이미 존재함
|
||||
return count
|
||||
|
||||
def get_latest_articles(limit: int = 20) -> List[Dict[str, Any]]:
|
||||
with _conn() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM articles ORDER BY crawled_at DESC, id DESC LIMIT ?",
|
||||
(limit,)
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
47
stock-lab/app/main.py
Normal file
47
stock-lab/app/main.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
from fastapi import FastAPI
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
|
||||
from .db import init_db, save_articles, get_latest_articles
|
||||
from .scraper import fetch_market_news
|
||||
|
||||
app = FastAPI()
|
||||
scheduler = BackgroundScheduler(timezone=os.getenv("TZ", "Asia/Seoul"))
|
||||
|
||||
@app.on_event("startup")
|
||||
def on_startup():
|
||||
init_db()
|
||||
|
||||
# 매일 아침 8시 뉴스 스크랩
|
||||
scheduler.add_job(run_scraping_job, "cron", hour="8", minute="0")
|
||||
|
||||
# 앱 시작 시에도 한 번 실행 (데이터 없으면)
|
||||
if not get_latest_articles(1):
|
||||
run_scraping_job()
|
||||
|
||||
scheduler.start()
|
||||
|
||||
def run_scraping_job():
|
||||
print("[StockLab] Starting news scraping...")
|
||||
articles = fetch_market_news()
|
||||
count = save_articles(articles)
|
||||
print(f"[StockLab] Saved {count} new articles.")
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"ok": True}
|
||||
|
||||
@app.get("/api/stock/news")
|
||||
def get_news(limit: int = 20):
|
||||
"""최신 주식 뉴스 조회"""
|
||||
return get_latest_articles(limit)
|
||||
|
||||
@app.post("/api/admin/stock/scrap")
|
||||
def trigger_scrap():
|
||||
"""수동 스크랩 트리거"""
|
||||
run_scraping_job()
|
||||
return {"ok": True}
|
||||
|
||||
@app.get("/api/version")
|
||||
def version():
|
||||
return {"version": os.getenv("APP_VERSION", "dev")}
|
||||
78
stock-lab/app/scraper.py
Normal file
78
stock-lab/app/scraper.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List, Dict
|
||||
import time
|
||||
|
||||
# 네이버 파이낸스 주요 뉴스
|
||||
NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver"
|
||||
|
||||
def fetch_market_news() -> List[Dict[str, str]]:
|
||||
"""
|
||||
네이버 금융 '주요 뉴스' 크롤링
|
||||
반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...]
|
||||
"""
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
|
||||
}
|
||||
resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949")
|
||||
|
||||
# 주요 뉴스 리스트 추출
|
||||
# 구조: div.mainNewsList > ul > li
|
||||
articles = []
|
||||
news_list = soup.select(".mainNewsList ul li")
|
||||
|
||||
for li in news_list:
|
||||
# 썸네일 있을 수도 있고 없을 수도 있음
|
||||
dl = li.select_one("dl")
|
||||
if not dl:
|
||||
continue
|
||||
|
||||
# 제목 (dd.articleSubject > a)
|
||||
subject_tag = dl.select_one(".articleSubject a")
|
||||
if not subject_tag:
|
||||
continue
|
||||
|
||||
title = subject_tag.get_text(strip=True)
|
||||
link = "https://finance.naver.com" + subject_tag["href"]
|
||||
|
||||
# 요약 (dd.articleSummary)
|
||||
summary_tag = dl.select_one(".articleSummary")
|
||||
summary = ""
|
||||
press = ""
|
||||
date = ""
|
||||
|
||||
if summary_tag:
|
||||
# 불필요한 태그 제거
|
||||
for child in summary_tag.select(".press, .wdate"):
|
||||
if "press" in child.get("class", []):
|
||||
press = child.get_text(strip=True)
|
||||
if "wdate" in child.get("class", []):
|
||||
date = child.get_text(strip=True)
|
||||
child.extract()
|
||||
summary = summary_tag.get_text(strip=True)
|
||||
|
||||
articles.append({
|
||||
"title": title,
|
||||
"link": link,
|
||||
"summary": summary,
|
||||
"press": press,
|
||||
"date": date,
|
||||
"crawled_at": time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
})
|
||||
|
||||
return articles
|
||||
|
||||
except Exception as e:
|
||||
print(f"[StockLab] Scraping failed: {e}")
|
||||
return []
|
||||
|
||||
def fetch_major_indices() -> Dict[str, Any]:
|
||||
"""
|
||||
KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈)
|
||||
"""
|
||||
# ... (추후 구현, 일단 빈 딕셔너리)
|
||||
return {}
|
||||
5
stock-lab/requirements.txt
Normal file
5
stock-lab/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
beautifulsoup4
|
||||
apscheduler
|
||||
Reference in New Issue
Block a user