feat(insta-lab): selection.py 순수 선별 점수(4신호)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
81
insta-lab/app/selection.py
Normal file
81
insta-lab/app/selection.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""발행 가치 자율 선별 — 순수 점수 함수 (외부 IO 없음, 단위테스트 대상).
|
||||
|
||||
신호: dedup(게이트), freshness, account_fit, claude(선택).
|
||||
final = 가중합(존재하는 신호만 정규화). eligible = dedup통과 and final>=threshold.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
DEFAULT_WEIGHTS = {"freshness": 0.3, "account_fit": 0.3, "claude": 0.4}
|
||||
FRESH_WINDOW_HOURS = 168.0 # 7일 → 0
|
||||
|
||||
|
||||
def _parse_iso(s: str) -> datetime:
|
||||
return datetime.fromisoformat(s.replace("Z", "+00:00")).astimezone(timezone.utc)
|
||||
|
||||
|
||||
def _norm(kw: str) -> str:
|
||||
return (kw or "").strip().lower()
|
||||
|
||||
|
||||
def _is_duplicate(keyword: str, category: str, issued: List[Dict[str, Any]]) -> bool:
|
||||
n = _norm(keyword)
|
||||
if not n:
|
||||
return False
|
||||
for it in issued:
|
||||
if it.get("category") != category:
|
||||
continue
|
||||
m = _norm(it.get("keyword", ""))
|
||||
if not m:
|
||||
continue
|
||||
if n == m or n in m or m in n:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _freshness(suggested_at: str, now: datetime) -> float:
|
||||
try:
|
||||
hours = (now - _parse_iso(suggested_at)).total_seconds() / 3600.0
|
||||
except Exception:
|
||||
return 0.0
|
||||
return max(0.0, min(1.0, 1.0 - hours / FRESH_WINDOW_HOURS))
|
||||
|
||||
|
||||
def score_candidates(
|
||||
candidates: List[Dict[str, Any]],
|
||||
issued_topics: List[Dict[str, Any]],
|
||||
prefs: Dict[str, float],
|
||||
claude_scores: Optional[Dict[int, float]] = None,
|
||||
weights: Optional[Dict[str, float]] = None,
|
||||
threshold: float = 0.6,
|
||||
now_iso: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
w = weights or DEFAULT_WEIGHTS
|
||||
now = _parse_iso(now_iso) if now_iso else datetime.now(timezone.utc)
|
||||
max_w = max(prefs.values()) if prefs else 1.0
|
||||
out: List[Dict[str, Any]] = []
|
||||
for c in candidates:
|
||||
cat = c.get("category", "")
|
||||
dup = _is_duplicate(c.get("keyword", ""), cat, issued_topics)
|
||||
freshness = _freshness(c.get("suggested_at", ""), now)
|
||||
weight = prefs.get(cat, 1.0)
|
||||
account_fit = max(0.0, min(1.0, (weight / max_w) * float(c.get("score", 0.0))))
|
||||
claude = None
|
||||
if claude_scores is not None and c["id"] in claude_scores:
|
||||
claude = max(0.0, min(1.0, float(claude_scores[c["id"]])))
|
||||
parts = [("freshness", freshness), ("account_fit", account_fit)]
|
||||
if claude is not None:
|
||||
parts.append(("claude", claude))
|
||||
total_w = sum(w[name] for name, _ in parts)
|
||||
final = sum(w[name] * val for name, val in parts) / total_w if total_w else 0.0
|
||||
eligible = (not dup) and (final >= threshold)
|
||||
out.append({
|
||||
"id": c["id"], "keyword": c.get("keyword"), "category": cat,
|
||||
"final_score": round(final, 4), "eligible": eligible,
|
||||
"breakdown": {"dedup_excluded": dup, "freshness": round(freshness, 4),
|
||||
"account_fit": round(account_fit, 4), "claude": claude},
|
||||
})
|
||||
out.sort(key=lambda x: (-x["eligible"], -x["final_score"]))
|
||||
return out
|
||||
Reference in New Issue
Block a user