fix(insta-lab): trend_collector — Google Trends RSS + seed placeholder filter

(1) pytrends 4.x가 Google API 변경으로 trending_searches(pn='south_korea')
가 404 반환 → daily trending searches RSS endpoint를 requests로 직접 호출
하도록 교체. pytrends 의존성 제거.

(2) category_seeds 프롬프트 템플릿에 placeholder ('...', 'TBD' 등) 또는
2자 미만 값이 들어가면 NAVER가 400 Bad Request 반환 → _seeds_for에
_is_valid_seed 가드 추가, 모두 invalid면 DEFAULT_CATEGORY_SEEDS 폴백.

테스트 8/8 PASS (기존 6 + placeholder/fallback 2 신규).
This commit is contained in:
2026-05-17 09:21:38 +09:00
parent ad6c744f2c
commit bf5897fc85
3 changed files with 98 additions and 36 deletions

View File

@@ -77,16 +77,20 @@ def test_classify_keyword_with_cache(monkeypatch):
assert calls["n"] == 1
def test_fetch_google_trends_parses_and_classifies(tmp_db, monkeypatch):
class FakePyTrends:
def __init__(self, *_a, **_kw):
pass
def trending_searches(self, pn="south_korea"):
import pandas as pd
return pd.DataFrame({"0": ["기준금리", "BTS 컴백", "스트레스 관리"]})
monkeypatch.setattr(trend_collector, "TrendReq", FakePyTrends)
def test_fetch_google_trends_parses_rss_and_classifies(tmp_db, monkeypatch):
fake_rss = """<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Daily Search Trends</title>
<item><title>기준금리</title></item>
<item><title>BTS 컴백</title></item>
<item><title>스트레스 관리</title></item>
</channel>
</rss>"""
fake_resp = MagicMock()
fake_resp.text = fake_rss
fake_resp.raise_for_status.return_value = None
monkeypatch.setattr(trend_collector.requests, "get", lambda *a, **kw: fake_resp)
monkeypatch.setattr(trend_collector, "classify_keyword",
lambda kw: {"기준금리": "economy", "BTS 컴백": "celebrity",
"스트레스 관리": "psychology"}.get(kw, "uncategorized"))
@@ -108,14 +112,36 @@ def test_collect_all_invokes_both_sources(tmp_db, monkeypatch):
assert out == {"naver_popular": 5, "google_trends": 3}
def test_fetch_google_trends_graceful_on_pytrends_failure(monkeypatch):
class FakePyTrends:
def __init__(self, *_a, **_kw):
pass
def trending_searches(self, pn="south_korea"):
raise RuntimeError("rate limited")
monkeypatch.setattr(trend_collector, "TrendReq", FakePyTrends)
def test_fetch_google_trends_graceful_on_rss_failure(monkeypatch):
fake_resp = MagicMock()
fake_resp.raise_for_status.side_effect = RuntimeError("Google returned 404")
monkeypatch.setattr(trend_collector.requests, "get", lambda *a, **kw: fake_resp)
out = trend_collector.fetch_google_trends()
assert out == []
def test_seeds_for_filters_placeholder(tmp_db, monkeypatch):
"""category_seeds 템플릿에 placeholder '...'가 들어가도 DEFAULT 폴백."""
from app import db as db_module
db_module.upsert_prompt_template(
"category_seeds",
'{"economy": ["...", "", "a", "real_keyword"]}',
"test",
)
out = trend_collector._seeds_for("economy")
# '...', '…', 'a'(2자 미만)는 필터링되고 'real_keyword'만 남음
assert out == ["real_keyword"]
def test_seeds_for_falls_back_when_all_invalid(tmp_db, monkeypatch):
"""모든 시드가 invalid면 DEFAULT_CATEGORY_SEEDS 폴백."""
from app import db as db_module
db_module.upsert_prompt_template(
"category_seeds",
'{"economy": ["...", "TBD", ""]}',
"test",
)
out = trend_collector._seeds_for("economy")
# DEFAULT_CATEGORY_SEEDS["economy"] 가 반환되어야 함
from app.config import DEFAULT_CATEGORY_SEEDS
assert out == list(DEFAULT_CATEGORY_SEEDS["economy"])