diff --git a/insta-lab/app/selection.py b/insta-lab/app/selection.py new file mode 100644 index 0000000..4edca33 --- /dev/null +++ b/insta-lab/app/selection.py @@ -0,0 +1,81 @@ +"""발행 가치 자율 선별 — 순수 점수 함수 (외부 IO 없음, 단위테스트 대상). + +신호: dedup(게이트), freshness, account_fit, claude(선택). +final = 가중합(존재하는 신호만 정규화). eligible = dedup통과 and final>=threshold. +""" +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +DEFAULT_WEIGHTS = {"freshness": 0.3, "account_fit": 0.3, "claude": 0.4} +FRESH_WINDOW_HOURS = 168.0 # 7일 → 0 + + +def _parse_iso(s: str) -> datetime: + return datetime.fromisoformat(s.replace("Z", "+00:00")).astimezone(timezone.utc) + + +def _norm(kw: str) -> str: + return (kw or "").strip().lower() + + +def _is_duplicate(keyword: str, category: str, issued: List[Dict[str, Any]]) -> bool: + n = _norm(keyword) + if not n: + return False + for it in issued: + if it.get("category") != category: + continue + m = _norm(it.get("keyword", "")) + if not m: + continue + if n == m or n in m or m in n: + return True + return False + + +def _freshness(suggested_at: str, now: datetime) -> float: + try: + hours = (now - _parse_iso(suggested_at)).total_seconds() / 3600.0 + except Exception: + return 0.0 + return max(0.0, min(1.0, 1.0 - hours / FRESH_WINDOW_HOURS)) + + +def score_candidates( + candidates: List[Dict[str, Any]], + issued_topics: List[Dict[str, Any]], + prefs: Dict[str, float], + claude_scores: Optional[Dict[int, float]] = None, + weights: Optional[Dict[str, float]] = None, + threshold: float = 0.6, + now_iso: Optional[str] = None, +) -> List[Dict[str, Any]]: + w = weights or DEFAULT_WEIGHTS + now = _parse_iso(now_iso) if now_iso else datetime.now(timezone.utc) + max_w = max(prefs.values()) if prefs else 1.0 + out: List[Dict[str, Any]] = [] + for c in candidates: + cat = c.get("category", "") + dup = _is_duplicate(c.get("keyword", ""), cat, issued_topics) + freshness = _freshness(c.get("suggested_at", ""), now) + weight = prefs.get(cat, 1.0) + account_fit = max(0.0, min(1.0, (weight / max_w) * float(c.get("score", 0.0)))) + claude = None + if claude_scores is not None and c["id"] in claude_scores: + claude = max(0.0, min(1.0, float(claude_scores[c["id"]]))) + parts = [("freshness", freshness), ("account_fit", account_fit)] + if claude is not None: + parts.append(("claude", claude)) + total_w = sum(w[name] for name, _ in parts) + final = sum(w[name] * val for name, val in parts) / total_w if total_w else 0.0 + eligible = (not dup) and (final >= threshold) + out.append({ + "id": c["id"], "keyword": c.get("keyword"), "category": cat, + "final_score": round(final, 4), "eligible": eligible, + "breakdown": {"dedup_excluded": dup, "freshness": round(freshness, 4), + "account_fit": round(account_fit, 4), "claude": claude}, + }) + out.sort(key=lambda x: (-x["eligible"], -x["final_score"])) + return out diff --git a/insta-lab/tests/test_selection.py b/insta-lab/tests/test_selection.py new file mode 100644 index 0000000..818e80f --- /dev/null +++ b/insta-lab/tests/test_selection.py @@ -0,0 +1,47 @@ +from app.selection import score_candidates + +NOW = "2026-06-11T00:00:00Z" + + +def _cand(kid, kw, cat, score, suggested_at): + return {"id": kid, "keyword": kw, "category": cat, "score": score, "suggested_at": suggested_at} + + +def test_dedup_excludes_recent_issued(): + cands = [_cand(1, "금리", "economy", 0.9, "2026-06-11T00:00:00Z")] + issued = [{"keyword": "금리", "category": "economy"}] + out = score_candidates(cands, issued, prefs={}, claude_scores=None, threshold=0.0, now_iso=NOW) + assert out[0]["eligible"] is False + + +def test_freshness_recent_higher(): + fresh = _cand(1, "A", "economy", 0.5, "2026-06-11T00:00:00Z") + stale = _cand(2, "B", "economy", 0.5, "2026-06-04T00:00:00Z") + out = {c["id"]: c for c in score_candidates([fresh, stale], [], {}, None, threshold=0.0, now_iso=NOW)} + assert out[1]["breakdown"]["freshness"] > out[2]["breakdown"]["freshness"] + + +def test_account_fit_uses_weight(): + cands = [_cand(1, "A", "economy", 0.8, NOW), _cand(2, "B", "psychology", 0.8, NOW)] + prefs = {"economy": 2.0, "psychology": 1.0} + out = {c["id"]: c for c in score_candidates(cands, [], prefs, None, threshold=0.0, now_iso=NOW)} + assert out[1]["breakdown"]["account_fit"] > out[2]["breakdown"]["account_fit"] + + +def test_threshold_gate(): + cands = [_cand(1, "A", "economy", 0.1, "2026-06-01T00:00:00Z")] + out = score_candidates(cands, [], {}, None, threshold=0.6, now_iso=NOW) + assert out[0]["eligible"] is False + + +def test_claude_missing_renormalizes(): + cands = [_cand(1, "A", "economy", 1.0, NOW)] + out = score_candidates(cands, [], {"economy": 1.0}, None, threshold=0.0, now_iso=NOW) + assert out[0]["breakdown"]["claude"] is None + assert 0.0 <= out[0]["final_score"] <= 1.0 + + +def test_claude_included_when_provided(): + cands = [_cand(1, "A", "economy", 0.5, NOW)] + out = score_candidates(cands, [], {"economy": 1.0}, {1: 1.0}, threshold=0.0, now_iso=NOW) + assert out[0]["breakdown"]["claude"] == 1.0