feat(screener): ai_news scraper (naver finance ticker news)
This commit is contained in:
0
stock-lab/app/screener/ai_news/__init__.py
Normal file
0
stock-lab/app/screener/ai_news/__init__.py
Normal file
39
stock-lab/app/screener/ai_news/scraper.py
Normal file
39
stock-lab/app/screener/ai_news/scraper.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
"""네이버 finance 종목 뉴스 스크래핑."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
NAVER_NEWS_URL = "https://finance.naver.com/item/news_news.naver"
|
||||||
|
NAVER_HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
|
"Referer": "https://finance.naver.com/",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_news(client, ticker: str, n: int = 5) -> List[Dict[str, Any]]:
|
||||||
|
"""Scrape top N news headlines for a ticker. Returns [] on any failure."""
|
||||||
|
try:
|
||||||
|
r = await client.get(NAVER_NEWS_URL, params={"code": ticker, "page": 1})
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("ai_news scrape http error for %s: %s", ticker, e)
|
||||||
|
return []
|
||||||
|
if r.status_code != 200:
|
||||||
|
return []
|
||||||
|
soup = BeautifulSoup(r.text, "lxml")
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for row in soup.select("table.type5 tbody tr")[:n]:
|
||||||
|
title_el = row.select_one("td.title a")
|
||||||
|
date_el = row.select_one("td.date")
|
||||||
|
if not title_el or not date_el:
|
||||||
|
continue
|
||||||
|
out.append({
|
||||||
|
"title": title_el.get_text(strip=True),
|
||||||
|
"date": date_el.get_text(strip=True),
|
||||||
|
})
|
||||||
|
return out
|
||||||
55
stock-lab/tests/test_ai_news_scraper.py
Normal file
55
stock-lab/tests/test_ai_news_scraper.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock
|
||||||
|
from app.screener.ai_news import scraper
|
||||||
|
|
||||||
|
|
||||||
|
SAMPLE_HTML = """
|
||||||
|
<html><body>
|
||||||
|
<table class="type5"><tbody>
|
||||||
|
<tr><td class="title"><a href="/news1">삼성전자, HBM 양산 가시화</a></td><td class="date">2026.05.13 07:30</td></tr>
|
||||||
|
<tr><td class="title"><a href="/news2">삼성, 4분기 어닝 쇼크 우려</a></td><td class="date">2026.05.13 06:00</td></tr>
|
||||||
|
<tr><td class="title"><a href="/news3">메모리 시장 회복세</a></td><td class="date">2026.05.12 18:00</td></tr>
|
||||||
|
</tbody></table>
|
||||||
|
</body></html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
EMPTY_HTML = "<html><body><table class='type5'><tbody></tbody></table></body></html>"
|
||||||
|
|
||||||
|
|
||||||
|
def _mk_client(status_code=200, text=SAMPLE_HTML):
|
||||||
|
client = AsyncMock()
|
||||||
|
resp = AsyncMock()
|
||||||
|
resp.status_code = status_code
|
||||||
|
resp.text = text
|
||||||
|
client.get = AsyncMock(return_value=resp)
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_news_success_returns_n_items():
|
||||||
|
client = _mk_client()
|
||||||
|
out = await scraper.fetch_news(client, "005930", n=2)
|
||||||
|
assert len(out) == 2
|
||||||
|
assert out[0]["title"] == "삼성전자, HBM 양산 가시화"
|
||||||
|
assert out[0]["date"] == "2026.05.13 07:30"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_news_404_returns_empty():
|
||||||
|
client = _mk_client(status_code=404, text="")
|
||||||
|
out = await scraper.fetch_news(client, "999999", n=5)
|
||||||
|
assert out == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_news_empty_table_returns_empty():
|
||||||
|
client = _mk_client(text=EMPTY_HTML)
|
||||||
|
out = await scraper.fetch_news(client, "005930", n=5)
|
||||||
|
assert out == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_news_n_caps_results():
|
||||||
|
client = _mk_client()
|
||||||
|
out = await scraper.fetch_news(client, "005930", n=2)
|
||||||
|
assert len(out) == 2 # 샘플에 3개 있지만 n=2로 잘림
|
||||||
Reference in New Issue
Block a user