diff --git a/agent-office/app/agents/insta.py b/agent-office/app/agents/insta.py index 74c4f2b..15e74d8 100644 --- a/agent-office/app/agents/insta.py +++ b/agent-office/app/agents/insta.py @@ -18,6 +18,26 @@ from ..telegram import messaging logger = logging.getLogger(__name__) +# 텔레그램 후보 푸시 시 "확실한 것만" 보내기 위한 최소 신뢰도 (키워드 score 0~1) +KEYWORD_MIN_SCORE = 0.7 + + +def _dedup_and_filter_keywords( + keywords: List[Dict[str, Any]], min_score: float = KEYWORD_MIN_SCORE, +) -> List[Dict[str, Any]]: + """score >= min_score 인 키워드만 남기고, 동일 keyword 중복 제거(최고 score 유지). + 결과는 score 내림차순. 텔레그램 후보 푸시 전 정리용.""" + best: Dict[str, Dict[str, Any]] = {} + for k in keywords: + if float(k.get("score", 0)) < min_score: + continue + name = str(k.get("keyword", "")).strip() + if not name: + continue + if name not in best or k["score"] > best[name]["score"]: + best[name] = k + return sorted(best.values(), key=lambda k: -k["score"]) + async def _send_media_group(media: List[Dict[str, Any]], caption: str = "") -> Dict[str, Any]: """텔레그램 sendMediaGroup. media는 InputMediaPhoto dicts. @@ -89,14 +109,18 @@ class InstaAgent(BaseAgent): raise TimeoutError(f"{step} timeout {timeout_sec}s") async def _push_keyword_candidates(self, keywords: List[Dict[str, Any]]) -> None: - by_cat: Dict[str, List[Dict[str, Any]]] = {} - for k in keywords: - by_cat.setdefault(k["category"], []).append(k) - if not by_cat: - await messaging.send_raw("📰 [인스타 큐레이터] 오늘은 추천할 키워드가 없습니다.") + # 중복 제거 + 신뢰도(score) 임계값 이상만 — "확실한 것만" 정리해서 전송 + filtered = _dedup_and_filter_keywords(keywords) + if not filtered: + await messaging.send_raw( + f"📰 [인스타 큐레이터] 오늘은 확실한 추천 키워드가 없습니다 (신뢰도 {KEYWORD_MIN_SCORE:.1f}+ 기준)." + ) return + by_cat: Dict[str, List[Dict[str, Any]]] = {} + for k in filtered: + by_cat.setdefault(k["category"], []).append(k) rows: List[List[Dict[str, Any]]] = [] - text_lines = ["📰 [인스타 큐레이터] 오늘의 키워드 후보"] + text_lines = [f"📰 [인스타 큐레이터] 오늘의 키워드 후보 (신뢰도 {KEYWORD_MIN_SCORE:.1f}+)"] for cat, items in by_cat.items(): text_lines.append(f"\n{cat}") for k in items[:5]: diff --git a/agent-office/tests/test_insta_keyword_filter.py b/agent-office/tests/test_insta_keyword_filter.py new file mode 100644 index 0000000..ed99f4a --- /dev/null +++ b/agent-office/tests/test_insta_keyword_filter.py @@ -0,0 +1,55 @@ +import os +import sys +import tempfile + +_fd, _TMP = tempfile.mkstemp(suffix=".db") +os.close(_fd) +os.unlink(_TMP) +os.environ["AGENT_OFFICE_DB_PATH"] = _TMP + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.agents.insta import _dedup_and_filter_keywords, KEYWORD_MIN_SCORE + + +def test_filters_below_threshold(): + """score < 임계값(0.7) 키워드는 제외.""" + kws = [ + {"id": 1, "keyword": "금리인하", "category": "경제", "score": 0.9}, + {"id": 2, "keyword": "환율", "category": "경제", "score": 0.6}, # 컷 + {"id": 3, "keyword": "반도체", "category": "경제", "score": 0.71}, + ] + out = _dedup_and_filter_keywords(kws, min_score=0.7) + kept = {k["keyword"] for k in out} + assert kept == {"금리인하", "반도체"} + + +def test_dedup_keeps_highest_score(): + """동일 keyword 중복 시 최고 score 1개만 유지.""" + kws = [ + {"id": 1, "keyword": "AI", "category": "경제", "score": 0.75}, + {"id": 2, "keyword": "AI", "category": "기술", "score": 0.92}, # 같은 키워드, 더 높음 + ] + out = _dedup_and_filter_keywords(kws, min_score=0.7) + assert len(out) == 1 + assert out[0]["id"] == 2 + assert out[0]["score"] == 0.92 + + +def test_sorted_by_score_desc(): + kws = [ + {"id": 1, "keyword": "a", "category": "c", "score": 0.72}, + {"id": 2, "keyword": "b", "category": "c", "score": 0.95}, + {"id": 3, "keyword": "c", "category": "c", "score": 0.80}, + ] + out = _dedup_and_filter_keywords(kws, min_score=0.7) + assert [k["keyword"] for k in out] == ["b", "c", "a"] + + +def test_empty_when_all_below_threshold(): + kws = [{"id": 1, "keyword": "x", "category": "c", "score": 0.4}] + assert _dedup_and_filter_keywords(kws, min_score=0.7) == [] + + +def test_default_threshold_is_0_7(): + assert KEYWORD_MIN_SCORE == 0.7