feat(insta-lab): news_collector with NAVER news.json + dedupe
This commit is contained in:
89
insta-lab/tests/test_news_collector.py
Normal file
89
insta-lab/tests/test_news_collector.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from app import db as db_module
|
||||
from app import news_collector
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_db(monkeypatch):
|
||||
fd, path = tempfile.mkstemp(suffix=".db")
|
||||
os.close(fd)
|
||||
monkeypatch.setattr(db_module, "DB_PATH", path)
|
||||
db_module.init_db()
|
||||
yield path
|
||||
# Close all SQLite WAL files before removal (needed on Windows)
|
||||
import gc
|
||||
gc.collect()
|
||||
for ext in ("", "-wal", "-shm"):
|
||||
try:
|
||||
os.remove(path + ext)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
SAMPLE_RESPONSE = {
|
||||
"items": [
|
||||
{
|
||||
"title": "<b>금리</b> 인상 단행",
|
||||
"originallink": "https://news.example.com/1",
|
||||
"link": "https://n.news.naver.com/article/1",
|
||||
"description": "한국은행이 <b>기준금리</b>를 25bp 올렸다.",
|
||||
"pubDate": "Fri, 15 May 2026 08:00:00 +0900",
|
||||
},
|
||||
{
|
||||
"title": "환율 급등",
|
||||
"originallink": "https://news.example.com/2",
|
||||
"link": "https://n.news.naver.com/article/2",
|
||||
"description": "원달러 환율이 1400원을 돌파했다.",
|
||||
"pubDate": "Fri, 15 May 2026 09:00:00 +0900",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_strip_html_and_decode_entities():
|
||||
out = news_collector._clean(' <b>"테스트"</b> & 아이템 ')
|
||||
assert out == '"테스트" & 아이템'
|
||||
|
||||
|
||||
def test_search_news_parses_items(tmp_db):
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.json.return_value = SAMPLE_RESPONSE
|
||||
fake_resp.raise_for_status.return_value = None
|
||||
with patch.object(news_collector.requests, "get", return_value=fake_resp):
|
||||
items = news_collector.search_news("금리", display=10)
|
||||
assert len(items) == 2
|
||||
assert items[0]["title"] == "금리 인상 단행"
|
||||
assert items[0]["summary"].startswith("한국은행")
|
||||
|
||||
|
||||
def test_collect_for_category_inserts(tmp_db):
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.json.return_value = SAMPLE_RESPONSE
|
||||
fake_resp.raise_for_status.return_value = None
|
||||
with patch.object(news_collector.requests, "get", return_value=fake_resp):
|
||||
news_collector.collect_for_category("economy", seed_keywords=["금리"], per_keyword=10)
|
||||
rows = db_module.list_news_articles(category="economy", days=7)
|
||||
assert {r["link"] for r in rows} == {
|
||||
"https://n.news.naver.com/article/1",
|
||||
"https://n.news.naver.com/article/2",
|
||||
}
|
||||
|
||||
|
||||
def test_collect_dedupes_existing(tmp_db):
|
||||
db_module.add_news_article({
|
||||
"category": "economy", "title": "기존",
|
||||
"link": "https://n.news.naver.com/article/1", "summary": ""
|
||||
})
|
||||
fake_resp = MagicMock()
|
||||
fake_resp.json.return_value = SAMPLE_RESPONSE
|
||||
fake_resp.raise_for_status.return_value = None
|
||||
with patch.object(news_collector.requests, "get", return_value=fake_resp):
|
||||
news_collector.collect_for_category("economy", seed_keywords=["금리"])
|
||||
rows = db_module.list_news_articles(category="economy", days=7)
|
||||
# 1 pre-existing + 1 newly added (the other link); UNIQUE link blocks duplicate insert
|
||||
assert len(rows) == 2
|
||||
Reference in New Issue
Block a user