Files
web-page-backend/insta-lab/tests/test_trend_collector.py
gahusb cfbb72051f fix(insta-lab): Google Trends — RSS endpoint도 404 폐기, dailytrends JSON API로 교체
Google이 /trends/trendingsearches/daily/rss?geo=KR도 404로 폐기 (직전
fix에서 RSS로 교체했으나 NAS에서 실제 호출 시 404 확인). 대안으로 비공식
/trends/api/dailytrends?hl=ko&tz=-540&geo=KR&ns=15 JSON API로 교체.
응답 앞 `)]}'` XSSI 보호 prefix는 정규식으로 자르고 JSON 파싱.
중복 키워드 제거 + 등장 순서 보존.
2026-05-17 09:30:40 +09:00

158 lines
5.8 KiB
Python

import os
import gc
import tempfile
from unittest.mock import patch, MagicMock
import pytest
from app import db as db_module
from app import trend_collector
@pytest.fixture
def tmp_db(monkeypatch):
fd, path = tempfile.mkstemp(suffix=".db")
os.close(fd)
monkeypatch.setattr(db_module, "DB_PATH", path)
db_module.init_db()
yield path
gc.collect()
for ext in ("", "-wal", "-shm"):
try:
os.remove(path + ext)
except OSError:
pass
NAVER_RESPONSE = {
"items": [
{"title": "<b>기준금리</b> 인상", "link": "https://n.news.naver.com/a/1", "description": "한국은행 발표"},
{"title": "환율 급등", "link": "https://n.news.naver.com/a/2", "description": "달러 강세"},
{"title": "기준금리 추가 인상", "link": "https://n.news.naver.com/a/3", "description": "추가 발표"},
],
}
def test_fetch_naver_popular_extracts_top_terms(tmp_db, monkeypatch):
fake_resp = MagicMock()
fake_resp.json.return_value = NAVER_RESPONSE
fake_resp.raise_for_status.return_value = None
with patch.object(trend_collector.requests, "get", return_value=fake_resp):
trends = trend_collector.fetch_naver_popular("economy", per_seed=10, top_n=5)
keywords = [t["keyword"] for t in trends]
assert "기준금리" in keywords
for t in trends:
assert t["category"] == "economy"
assert t["source"] == "naver_popular"
assert 0.0 <= t["score"] <= 1.0
def test_collect_naver_writes_to_db(tmp_db, monkeypatch):
fake_resp = MagicMock()
fake_resp.json.return_value = NAVER_RESPONSE
fake_resp.raise_for_status.return_value = None
with patch.object(trend_collector.requests, "get", return_value=fake_resp):
n = trend_collector.collect_naver_popular_for(["economy"])
assert n > 0
rows = db_module.list_trends(source="naver_popular")
assert len(rows) > 0
assert all(r["source"] == "naver_popular" for r in rows)
def test_classify_keyword_with_cache(monkeypatch):
calls = {"n": 0}
def fake_claude(keyword: str) -> str:
calls["n"] += 1
return "economy"
monkeypatch.setattr(trend_collector, "_llm_classify_one", fake_claude)
trend_collector._category_cache.clear()
c1 = trend_collector.classify_keyword("기준금리")
c2 = trend_collector.classify_keyword("기준금리")
assert c1 == c2 == "economy"
assert calls["n"] == 1
def test_fetch_google_trends_parses_json_and_classifies(tmp_db, monkeypatch):
import json as _json
payload = {
"default": {
"trendingSearchesDays": [
{
"date": "20260517",
"trendingSearches": [
{"title": {"query": "기준금리"}},
{"title": {"query": "BTS 컴백"}},
{"title": {"query": "스트레스 관리"}},
# 다음 날 데이터에 중복 키워드 — 중복 제거 확인
{"title": {"query": "기준금리"}},
],
}
]
}
}
fake_resp = MagicMock()
# 실제 Google 응답 형태: `)]}',\n` XSSI prefix가 앞에 붙음
fake_resp.text = ")]}',\n" + _json.dumps(payload, ensure_ascii=False)
fake_resp.raise_for_status.return_value = None
monkeypatch.setattr(trend_collector.requests, "get", lambda *a, **kw: fake_resp)
monkeypatch.setattr(trend_collector, "classify_keyword",
lambda kw: {"기준금리": "economy", "BTS 컴백": "celebrity",
"스트레스 관리": "psychology"}.get(kw, "uncategorized"))
trends = trend_collector.fetch_google_trends()
by_kw = {t["keyword"]: t for t in trends}
assert set(by_kw.keys()) == {"기준금리", "BTS 컴백", "스트레스 관리"} # 중복 제거됨
assert by_kw["기준금리"]["category"] == "economy"
assert by_kw["BTS 컴백"]["category"] == "celebrity"
assert by_kw["스트레스 관리"]["category"] == "psychology"
assert all(t["source"] == "google_trends" for t in trends)
def test_collect_all_invokes_both_sources(tmp_db, monkeypatch):
monkeypatch.setattr(trend_collector, "collect_naver_popular_for",
lambda cats: 5)
monkeypatch.setattr(trend_collector, "collect_google_trends",
lambda: 3)
out = trend_collector.collect_all(["economy"])
assert out == {"naver_popular": 5, "google_trends": 3}
def test_fetch_google_trends_graceful_on_api_failure(monkeypatch):
fake_resp = MagicMock()
fake_resp.raise_for_status.side_effect = RuntimeError("Google returned 404")
monkeypatch.setattr(trend_collector.requests, "get", lambda *a, **kw: fake_resp)
out = trend_collector.fetch_google_trends()
assert out == []
def test_seeds_for_filters_placeholder(tmp_db, monkeypatch):
"""category_seeds 템플릿에 placeholder '...'가 들어가도 DEFAULT 폴백."""
from app import db as db_module
db_module.upsert_prompt_template(
"category_seeds",
'{"economy": ["...", "", "a", "real_keyword"]}',
"test",
)
out = trend_collector._seeds_for("economy")
# '...', '…', 'a'(2자 미만)는 필터링되고 'real_keyword'만 남음
assert out == ["real_keyword"]
def test_seeds_for_falls_back_when_all_invalid(tmp_db, monkeypatch):
"""모든 시드가 invalid면 DEFAULT_CATEGORY_SEEDS 폴백."""
from app import db as db_module
db_module.upsert_prompt_template(
"category_seeds",
'{"economy": ["...", "TBD", ""]}',
"test",
)
out = trend_collector._seeds_for("economy")
# DEFAULT_CATEGORY_SEEDS["economy"] 가 반환되어야 함
from app.config import DEFAULT_CATEGORY_SEEDS
assert out == list(DEFAULT_CATEGORY_SEEDS["economy"])