40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
"""네이버 finance 종목 뉴스 스크래핑."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any, Dict, List
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
NAVER_NEWS_URL = "https://finance.naver.com/item/news_news.naver"
|
|
NAVER_HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Referer": "https://finance.naver.com/",
|
|
}
|
|
|
|
|
|
async def fetch_news(client, ticker: str, n: int = 5) -> List[Dict[str, Any]]:
|
|
"""Scrape top N news headlines for a ticker. Returns [] on any failure."""
|
|
try:
|
|
r = await client.get(NAVER_NEWS_URL, params={"code": ticker, "page": 1})
|
|
except Exception as e:
|
|
log.warning("ai_news scrape http error for %s: %s", ticker, e)
|
|
return []
|
|
if r.status_code != 200:
|
|
return []
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
out: List[Dict[str, Any]] = []
|
|
for row in soup.select("table.type5 tbody tr")[:n]:
|
|
title_el = row.select_one("td.title a")
|
|
date_el = row.select_one("td.date")
|
|
if not title_el or not date_el:
|
|
continue
|
|
out.append({
|
|
"title": title_el.get_text(strip=True),
|
|
"date": date_el.get_text(strip=True),
|
|
})
|
|
return out
|