import requests from bs4 import BeautifulSoup from typing import List, Dict import time # 네이버 파이낸스 주요 뉴스 NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver" def fetch_market_news() -> List[Dict[str, str]]: """ 네이버 금융 '주요 뉴스' 크롤링 반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...] """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" } resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10) resp.raise_for_status() soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949") # 주요 뉴스 리스트 추출 # 구조: div.mainNewsList > ul > li articles = [] news_list = soup.select(".mainNewsList ul li") for li in news_list: # 썸네일 있을 수도 있고 없을 수도 있음 dl = li.select_one("dl") if not dl: continue # 제목 (dd.articleSubject > a) subject_tag = dl.select_one(".articleSubject a") if not subject_tag: continue title = subject_tag.get_text(strip=True) link = "https://finance.naver.com" + subject_tag["href"] # 요약 (dd.articleSummary) summary_tag = dl.select_one(".articleSummary") summary = "" press = "" date = "" if summary_tag: # 불필요한 태그 제거 for child in summary_tag.select(".press, .wdate"): if "press" in child.get("class", []): press = child.get_text(strip=True) if "wdate" in child.get("class", []): date = child.get_text(strip=True) child.extract() summary = summary_tag.get_text(strip=True) articles.append({ "title": title, "link": link, "summary": summary, "press": press, "date": date, "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S") }) return articles except Exception as e: print(f"[StockLab] Scraping failed: {e}") return [] def fetch_major_indices() -> Dict[str, Any]: """ KOSPI, KOSDAQ, USD/KRW 등 주요 지표 (네이버 금융 홈) """ # ... (추후 구현, 일단 빈 딕셔너리) return {}