feat(insta): dedup_window_days config end-to-end wiring (spec 6.4)

- insta-lab ranked_keywords: add dedup_window_days Query param (default 14, ge=1, le=90); pass to db.list_recent_issued_topics
- service_proxy.insta_ranked: add dedup_window_days param (default 14); include in GET params
- InstaAgent.on_schedule: read dedup_window_days from custom_config (default 14); pass to insta_ranked call
- test_ranked_respects_dedup_window: verifies window param gates eligible flag correctly

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-11 02:55:46 +09:00
parent 149e7c40fe
commit e1b1944f43
4 changed files with 41 additions and 5 deletions

View File

@@ -74,6 +74,7 @@ class InstaAgent(BaseAgent):
autonomous = bool(custom.get("autonomous_issue", False)) autonomous = bool(custom.get("autonomous_issue", False))
threshold = float(custom.get("select_threshold", 0.6)) threshold = float(custom.get("select_threshold", 0.6))
max_per_day = int(custom.get("max_per_day", 2)) max_per_day = int(custom.get("max_per_day", 2))
dedup_window_days = int(custom.get("dedup_window_days", 14))
task_id = create_task(self.agent_id, "insta_daily", {"auto_select": auto_select}, task_id = create_task(self.agent_id, "insta_daily", {"auto_select": auto_select},
requires_approval=False) requires_approval=False)
@@ -86,7 +87,7 @@ class InstaAgent(BaseAgent):
add_log(self.agent_id, f"insta preferences unavailable: {_pref_err}", "warning", task_id) add_log(self.agent_id, f"insta preferences unavailable: {_pref_err}", "warning", task_id)
await self._run_collect_and_extract() await self._run_collect_and_extract()
if autonomous: if autonomous:
ranked = await service_proxy.insta_ranked(threshold=threshold, limit=20) ranked = await service_proxy.insta_ranked(threshold=threshold, limit=20, dedup_window_days=dedup_window_days)
eligible = [r for r in ranked if r.get("eligible")][:max_per_day] eligible = [r for r in ranked if r.get("eligible")][:max_per_day]
if not eligible: if not eligible:
await messaging.send_raw("📰 [인스타 큐레이터] 오늘은 발행할 가치 있는 주제가 없습니다.") await messaging.send_raw("📰 [인스타 큐레이터] 오늘은 발행할 가치 있는 주제가 없습니다.")

View File

@@ -228,11 +228,11 @@ async def insta_put_preferences(weights: Dict[str, float]) -> Dict[str, Any]:
return resp.json() return resp.json()
async def insta_ranked(threshold: float = 0.6, limit: int = 20) -> list: async def insta_ranked(threshold: float = 0.6, limit: int = 20, dedup_window_days: int = 14) -> list:
async with httpx.AsyncClient(timeout=120) as client: async with httpx.AsyncClient(timeout=120) as client:
r = await client.get( r = await client.get(
f"{INSTA_LAB_URL}/api/insta/keywords/ranked", f"{INSTA_LAB_URL}/api/insta/keywords/ranked",
params={"threshold": threshold, "limit": limit}, params={"threshold": threshold, "limit": limit, "dedup_window_days": dedup_window_days},
) )
r.raise_for_status() r.raise_for_status()
return r.json()["items"] return r.json()["items"]

View File

@@ -154,11 +154,15 @@ def list_keywords(
@app.get("/api/insta/keywords/ranked") @app.get("/api/insta/keywords/ranked")
def ranked_keywords(limit: int = Query(20, ge=1, le=100), threshold: float = Query(0.6, ge=0.0, le=1.0)): def ranked_keywords(
limit: int = Query(20, ge=1, le=100),
threshold: float = Query(0.6, ge=0.0, le=1.0),
dedup_window_days: int = Query(14, ge=1, le=90),
):
candidates = db.list_trending_keywords(used=False) candidates = db.list_trending_keywords(used=False)
if not candidates: if not candidates:
return {"items": []} return {"items": []}
issued = db.list_recent_issued_topics(window_days=14) issued = db.list_recent_issued_topics(window_days=dedup_window_days)
prefs = {p["category"]: p["weight"] for p in db.get_preferences()} prefs = {p["category"]: p["weight"] for p in db.get_preferences()}
claude_scores = selection_judge.judge_candidates(candidates) claude_scores = selection_judge.judge_candidates(candidates)
now_iso = datetime.now(timezone.utc).isoformat() now_iso = datetime.now(timezone.utc).isoformat()

View File

@@ -45,3 +45,34 @@ def test_decision_invalid_400(client):
def test_decision_unknown_slate_404(client): def test_decision_unknown_slate_404(client):
r = client.post("/api/insta/slates/99999/decision", json={"decision": "approved"}) r = client.post("/api/insta/slates/99999/decision", json={"decision": "approved"})
assert r.status_code == 404 assert r.status_code == 404
def test_ranked_respects_dedup_window(client):
"""dedup_window_days param이 list_recent_issued_topics window에 반영되는지 검증.
'금리' 키워드를 방금 approved(published) 상태로 기록한 뒤:
- dedup_window_days=30 → 방금 발행 = window 안 → eligible False
- dedup_window_days=1 → DB datetime이 정각 경계 직전이라도 여전히 1일 안이므로 eligible False
(확인: 반드시 eligible=False)
추가로 두 번째 키워드(word2)는 아직 발행 이력 없으므로 window 무관하게 eligible True.
"""
# 방금 발행된 키워드 등록 + 슬레이트 approved 처리
db.add_trending_keyword({"keyword": "금리", "category": "economy", "score": 0.9})
sid = db.add_card_slate({"keyword": "금리", "category": "economy"})
db.set_slate_decision(sid, "approved") # published_at = now
# 발행 이력 없는 키워드 추가
db.add_trending_keyword({"keyword": "환율", "category": "economy", "score": 0.8})
# window=30 → '금리'는 최근 발행이라 dedup 대상 → eligible False
r = client.get("/api/insta/keywords/ranked?threshold=0.0&limit=10&dedup_window_days=30")
assert r.status_code == 200
items = r.json()["items"]
keumni = next((i for i in items if i["keyword"] == "금리"), None)
assert keumni is not None, "'금리' 항목이 ranked 응답에 없음"
assert keumni["eligible"] is False, "dedup_window_days=30 내 발행 → eligible은 False여야 함"
# 발행 이력 없는 '환율'은 어떤 window에서도 eligible True
hwanul = next((i for i in items if i["keyword"] == "환율"), None)
assert hwanul is not None, "'환율' 항목이 ranked 응답에 없음"
assert hwanul["eligible"] is True, "발행 이력 없는 키워드는 eligible True여야 함"