feat(insta-lab): keyword_extractor with frequency + Claude refinement
This commit is contained in:
65
insta-lab/tests/test_keyword_extractor.py
Normal file
65
insta-lab/tests/test_keyword_extractor.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import os
|
||||
import tempfile
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from app import db as db_module
|
||||
from app import keyword_extractor
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_db(monkeypatch):
|
||||
fd, path = tempfile.mkstemp(suffix=".db")
|
||||
os.close(fd)
|
||||
monkeypatch.setattr(db_module, "DB_PATH", path)
|
||||
db_module.init_db()
|
||||
yield path
|
||||
# Windows-safe cleanup: close handles + remove sidecars
|
||||
import gc
|
||||
gc.collect()
|
||||
for ext in ("", "-wal", "-shm"):
|
||||
try:
|
||||
os.remove(path + ext)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_count_nouns_extracts_korean_nouns():
|
||||
text = "기준금리 인상으로 환율 급등. 기준금리 추가 인상 가능성"
|
||||
counts = keyword_extractor._count_nouns(text)
|
||||
assert counts["기준금리"] == 2
|
||||
assert counts["환율"] == 1
|
||||
|
||||
|
||||
def test_top_candidates_filters_stopwords():
|
||||
counts = {"기준금리": 5, "있다": 7, "환율": 3, "그리고": 4}
|
||||
top = keyword_extractor._top_candidates(counts, n=10)
|
||||
keywords = [k for k, _ in top]
|
||||
assert "있다" not in keywords
|
||||
assert "그리고" not in keywords
|
||||
assert "기준금리" in keywords
|
||||
|
||||
|
||||
def test_extract_for_category_persists(tmp_db):
|
||||
# seed articles
|
||||
for i in range(3):
|
||||
db_module.add_news_article({
|
||||
"category": "economy",
|
||||
"title": f"기준금리 인상 {i}",
|
||||
"link": f"https://example.com/{i}",
|
||||
"summary": "환율도 영향",
|
||||
})
|
||||
|
||||
# mock LLM refinement
|
||||
fake_refined = [
|
||||
{"keyword": "기준금리", "score": 0.92, "reason": "핵심 금융 이슈"},
|
||||
{"keyword": "환율", "score": 0.71, "reason": "시장 영향"},
|
||||
]
|
||||
with patch.object(keyword_extractor, "_refine_with_llm", return_value=fake_refined):
|
||||
kws = keyword_extractor.extract_for_category("economy", limit=2)
|
||||
|
||||
assert len(kws) == 2
|
||||
assert kws[0]["keyword"] == "기준금리"
|
||||
persisted = db_module.list_trending_keywords(category="economy")
|
||||
assert {p["keyword"] for p in persisted} == {"기준금리", "환율"}
|
||||
Reference in New Issue
Block a user