feat: add stock-lab service for financial news scraping and analysis
This commit is contained in:
78
stock-lab/app/scraper.py
Normal file
78
stock-lab/app/scraper.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List, Dict
|
||||
import time
|
||||
|
||||
# 네이버 파이낸스 주요 뉴스
|
||||
NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver"
|
||||
|
||||
def fetch_market_news() -> List[Dict[str, str]]:
|
||||
"""
|
||||
네이버 금융 '주요 뉴스' 크롤링
|
||||
반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...]
|
||||
"""
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
|
||||
}
|
||||
resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949")
|
||||
|
||||
# 주요 뉴스 리스트 추출
|
||||
# 구조: div.mainNewsList > ul > li
|
||||
articles = []
|
||||
news_list = soup.select(".mainNewsList ul li")
|
||||
|
||||
for li in news_list:
|
||||
# 썸네일 있을 수도 있고 없을 수도 있음
|
||||
dl = li.select_one("dl")
|
||||
if not dl:
|
||||
continue
|
||||
|
||||
# 제목 (dd.articleSubject > a)
|
||||
subject_tag = dl.select_one(".articleSubject a")
|
||||
if not subject_tag:
|
||||
continue
|
||||
|
||||
title = subject_tag.get_text(strip=True)
|
||||
link = "https://finance.naver.com" + subject_tag["href"]
|
||||
|
||||
# 요약 (dd.articleSummary)
|
||||
summary_tag = dl.select_one(".articleSummary")
|
||||
summary = ""
|
||||
press = ""
|
||||
date = ""
|
||||
|
||||
if summary_tag:
|
||||
# 불필요한 태그 제거
|
||||
for child in summary_tag.select(".press, .wdate"):
|
||||
if "press" in child.get("class", []):
|
||||
press = child.get_text(strip=True)
|
||||
if "wdate" in child.get("class", []):
|
||||
date = child.get_text(strip=True)
|
||||
child.extract()
|
||||
summary = summary_tag.get_text(strip=True)
|
||||
|
||||
articles.append({
|
||||
"title": title,
|
||||
"link": link,
|
||||
"summary": summary,
|
||||
"press": press,
|
||||
"date": date,
|
||||
"crawled_at": time.strftime("%Y-%m-%d %H:%M:%S")
|
||||
})
|
||||
|
||||
return articles
|
||||
|
||||
except Exception as e:
|
||||
print(f"[StockLab] Scraping failed: {e}")
|
||||
return []
|
||||
|
||||
def fetch_major_indices() -> Dict[str, Any]:
|
||||
"""
|
||||
KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈)
|
||||
"""
|
||||
# ... (추후 구현, 일단 빈 딕셔너리)
|
||||
return {}
|
||||
Reference in New Issue
Block a user