import requests from bs4 import BeautifulSoup from typing import List, Dict, Any import time # 네이버 파이낸스 주요 뉴스 NAVER_FINANCE_NEWS_URL = "https://finance.naver.com/news/mainnews.naver" # 해외증시 뉴스 (모바일 API 사용) # NAVER_FINANCE_WORLD_NEWS_URL 사용 안함. # 해외증시 메인 (지수용) NAVER_FINANCE_WORLD_URL = "https://finance.naver.com/world/" def fetch_market_news() -> List[Dict[str, str]]: """ 네이버 금융 '주요 뉴스' 크롤링 반환: [{"title": "...", "link": "...", "summary": "...", "date": "..."}, ...] """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36" } resp = requests.get(NAVER_FINANCE_NEWS_URL, headers=headers, timeout=10) resp.raise_for_status() soup = BeautifulSoup(resp.content, "html.parser", from_encoding="cp949") # 주요 뉴스 리스트 추출 # 구조: div.mainNewsList > ul > li articles = [] news_list = soup.select(".mainNewsList ul li") for li in news_list: # 썸네일 있을 수도 있고 없을 수도 있음 dl = li.select_one("dl") if not dl: continue # 제목 (dd.articleSubject > a) subject_tag = dl.select_one(".articleSubject a") if not subject_tag: continue title = subject_tag.get_text(strip=True) link = "https://finance.naver.com" + subject_tag["href"] # 요약 (dd.articleSummary) summary_tag = dl.select_one(".articleSummary") summary = "" press = "" date = "" if summary_tag: # 불필요한 태그 제거 for child in summary_tag.select(".press, .wdate"): if "press" in child.get("class", []): press = child.get_text(strip=True) if "wdate" in child.get("class", []): date = child.get_text(strip=True) child.extract() summary = summary_tag.get_text(strip=True) articles.append({ "title": title, "link": link, "summary": summary, "press": press, "date": date, "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"), "category": "domestic" }) return articles except Exception as e: print(f"[StockLab] Scraping failed: {e}") return [] def fetch_overseas_news() -> List[Dict[str, str]]: """ 네이버 금융 해외증시 뉴스 크롤링 (모바일 API 사용) """ api_url = "https://api.stock.naver.com/news/overseas/category/breaking?pageSize=20&page=1" try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" } resp = requests.get(api_url, headers=headers, timeout=10) resp.raise_for_status() data = resp.json() if isinstance(data, list): items = data else: items = data.get("result", []) articles = [] for item in items: # API 키 매핑 (subject/title/tit, summary/subContent/sub_tit 등) title = item.get("subject") or item.get("title") or item.get("tit") or "" summary = item.get("summary") or item.get("subContent") or item.get("sub_tit") or "" press = item.get("officeName") or item.get("office_name") or item.get("cp_name") or "" # 날짜 포맷팅 (20260126123000 -> 2026-01-26 12:30:00) raw_dt = str(item.get("dt", "")) if len(raw_dt) == 14: date = f"{raw_dt[:4]}-{raw_dt[4:6]}-{raw_dt[6:8]} {raw_dt[8:10]}:{raw_dt[10:12]}:{raw_dt[12:]}" else: date = raw_dt # 링크 생성 aid = item.get("articleId") oid = item.get("officeId") link = f"https://m.stock.naver.com/worldstock/news/read/{oid}/{aid}" articles.append({ "title": title, "link": link, "summary": summary, "press": press, "date": date, "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S"), "category": "overseas" }) return articles except Exception as e: print(f"[StockLab] Overseas news failed: {e}") return [] def fetch_major_indices() -> Dict[str, Any]: """ KOSPI, KOSDAQ, KOSPI200 등 주요 지표 (네이버 금융 홈) """ url = "https://finance.naver.com/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" } try: targets = [ {"key": "KOSPI", "selector": ".kospi_area", "url": "https://finance.naver.com/"}, {"key": "KOSDAQ", "selector": ".kosdaq_area", "url": "https://finance.naver.com/"}, {"key": "KOSPI200", "selector": ".kospi200_area", "url": "https://finance.naver.com/"}, ] # 해외 지수 (네이버 금융 해외 메인) - 여기서는 별도 URL 호출 필요하거나, 메인에 있는지 확인 # 네이버 메인에는 해외지수가 안 나옴. https://finance.naver.com/world/ 에서 긁어야 함 # 그러나 한 번에 처리하기 위해 함수 내에서 추가 호출 indices = [] # --- 국내 --- resp_kr = requests.get("https://finance.naver.com/", headers=headers, timeout=5) soup_kr = BeautifulSoup(resp_kr.content, "html.parser", from_encoding="cp949") for t in targets: area = soup_kr.select_one(t["selector"]) if not area: continue # (기존 파싱 로직) num_tag = area.select_one(".num") value = num_tag.get_text(strip=True) if num_tag else "" change_val_tag = area.select_one(".num2") change_pct_tag = area.select_one(".num3") change_val = change_val_tag.get_text(strip=True) if change_val_tag else "" change_pct = change_pct_tag.get_text(strip=True) if change_pct_tag else "" direction = "" if area.select_one(".bu_p"): direction = "red" elif area.select_one(".bu_m"): direction = "blue" indices.append({ "name": t["key"], "value": value, "change_value": change_val, "change_percent": change_pct, "direction": direction, "type": "domestic" }) # --- 해외 (DJI, NAS, SPI) --- try: resp_world = requests.get(NAVER_FINANCE_WORLD_URL, headers=headers, timeout=5) soup_world = BeautifulSoup(resp_world.content, "html.parser", from_encoding="cp949") world_targets = [ {"key": "DJI", "selector": ".sise_major .data_list li:nth-child(1)"}, {"key": "NAS", "selector": ".sise_major .data_list li:nth-child(2)"}, {"key": "SPI", "selector": ".sise_major .data_list li:nth-child(3)"}, ] for wt in world_targets: li = soup_world.select_one(wt["selector"]) if not li: continue # 값: dd.point_status strong val_tag = li.select_one("dd.point_status strong") value = val_tag.get_text(strip=True) if val_tag else "" # 등락: dd.point_status em direction = "" status_dd = li.select_one("dd.point_status") if status_dd: em = status_dd.select_one("em") if em: if "red" in em.get("class", []): direction = "red" elif "blue" in em.get("class", []): direction = "blue" indices.append({ "name": wt["key"], "value": value, "change_value": "", "change_percent": "", "direction": direction, "type": "overseas" }) except Exception as e: print(f"[StockLab] World indices failed: {e}") return {"indices": indices, "crawled_at": time.strftime("%Y-%m-%d %H:%M:%S")} except Exception as e: print(f"[StockLab] Indices scraping failed: {e}") return {"indices": [], "error": str(e)}