diff --git a/stock-lab/app/screener/ai_news/__init__.py b/stock-lab/app/screener/ai_news/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stock-lab/app/screener/ai_news/scraper.py b/stock-lab/app/screener/ai_news/scraper.py new file mode 100644 index 0000000..823d974 --- /dev/null +++ b/stock-lab/app/screener/ai_news/scraper.py @@ -0,0 +1,39 @@ +"""네이버 finance 종목 뉴스 스크래핑.""" + +from __future__ import annotations + +import logging +from typing import Any, Dict, List + +from bs4 import BeautifulSoup + +log = logging.getLogger(__name__) + +NAVER_NEWS_URL = "https://finance.naver.com/item/news_news.naver" +NAVER_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://finance.naver.com/", +} + + +async def fetch_news(client, ticker: str, n: int = 5) -> List[Dict[str, Any]]: + """Scrape top N news headlines for a ticker. Returns [] on any failure.""" + try: + r = await client.get(NAVER_NEWS_URL, params={"code": ticker, "page": 1}) + except Exception as e: + log.warning("ai_news scrape http error for %s: %s", ticker, e) + return [] + if r.status_code != 200: + return [] + soup = BeautifulSoup(r.text, "lxml") + out: List[Dict[str, Any]] = [] + for row in soup.select("table.type5 tbody tr")[:n]: + title_el = row.select_one("td.title a") + date_el = row.select_one("td.date") + if not title_el or not date_el: + continue + out.append({ + "title": title_el.get_text(strip=True), + "date": date_el.get_text(strip=True), + }) + return out diff --git a/stock-lab/tests/test_ai_news_scraper.py b/stock-lab/tests/test_ai_news_scraper.py new file mode 100644 index 0000000..b7a021b --- /dev/null +++ b/stock-lab/tests/test_ai_news_scraper.py @@ -0,0 +1,55 @@ +import pytest +from unittest.mock import AsyncMock +from app.screener.ai_news import scraper + + +SAMPLE_HTML = """ + + + + + +
삼성전자, HBM 양산 가시화2026.05.13 07:30
삼성, 4분기 어닝 쇼크 우려2026.05.13 06:00
메모리 시장 회복세2026.05.12 18:00
+ +""" + +EMPTY_HTML = "
" + + +def _mk_client(status_code=200, text=SAMPLE_HTML): + client = AsyncMock() + resp = AsyncMock() + resp.status_code = status_code + resp.text = text + client.get = AsyncMock(return_value=resp) + return client + + +@pytest.mark.asyncio +async def test_fetch_news_success_returns_n_items(): + client = _mk_client() + out = await scraper.fetch_news(client, "005930", n=2) + assert len(out) == 2 + assert out[0]["title"] == "삼성전자, HBM 양산 가시화" + assert out[0]["date"] == "2026.05.13 07:30" + + +@pytest.mark.asyncio +async def test_fetch_news_404_returns_empty(): + client = _mk_client(status_code=404, text="") + out = await scraper.fetch_news(client, "999999", n=5) + assert out == [] + + +@pytest.mark.asyncio +async def test_fetch_news_empty_table_returns_empty(): + client = _mk_client(text=EMPTY_HTML) + out = await scraper.fetch_news(client, "005930", n=5) + assert out == [] + + +@pytest.mark.asyncio +async def test_fetch_news_n_caps_results(): + client = _mk_client() + out = await scraper.fetch_news(client, "005930", n=2) + assert len(out) == 2 # 샘플에 3개 있지만 n=2로 잘림