Files

121 lines
5.0 KiB
Python

"""AI 최종 검토 — 4축(메타/정책/시청/트렌드) 가중 평균."""
import os
import json
import logging
import httpx
logger = logging.getLogger("music-lab.review")
CLAUDE_SONNET_MODEL_DEFAULT = "claude-sonnet-4-6"
TIMEOUT_S = 60
POLICY_BANNED = {"f-word", "n-word"} # 운영 시 별도 파일로 — 데모용 자리
def _get_api_key() -> str:
return os.getenv("ANTHROPIC_API_KEY", "")
def _get_model() -> str:
return os.getenv("CLAUDE_SONNET_MODEL", CLAUDE_SONNET_MODEL_DEFAULT)
async def run_4_axis(*, pipeline: dict, track: dict, video_meta: dict,
metadata: dict, thumbnail_url: str, trend_top: list[str],
weights: dict, threshold: int) -> dict:
api_key = _get_api_key()
if not api_key:
return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
fallback_reason="no api key")
try:
scores = await _call_claude(pipeline, track, video_meta, metadata,
thumbnail_url, trend_top,
api_key=api_key, model=_get_model())
return _weighted_verdict(scores, weights, threshold, used_fallback=False)
except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
logger.warning("검토 LLM 실패 — 휴리스틱: %s", e)
return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
fallback_reason=str(e))
def _weighted_verdict(scores: dict, weights: dict, threshold: int,
used_fallback: bool) -> dict:
total = (
weights["meta"] / 100 * scores["metadata_quality"]["score"] +
weights["policy"] / 100 * scores["policy_compliance"]["score"] +
weights["viewer"] / 100 * scores["viewer_experience"]["score"] +
weights["trend"] / 100 * scores["trend_alignment"]["score"]
)
return {
**scores,
"weighted_total": round(total, 2),
"verdict": "pass" if total >= threshold else "fail",
"used_fallback": used_fallback,
}
async def _call_claude(pipeline, track, video_meta, metadata, thumbnail_url, trend_top,
*, api_key: str, model: str):
user = (
"트랙·영상·메타데이터를 4축으로 평가하고 JSON만 응답:\n"
f"트랙: {json.dumps(track, ensure_ascii=False)}\n"
f"영상: {json.dumps(video_meta)}\n"
f"메타: {json.dumps(metadata, ensure_ascii=False)}\n"
f"썸네일: {thumbnail_url}\n"
f"트렌드: {trend_top}\n"
'출력: {"metadata_quality":{"score":0-100,"notes":""},'
'"policy_compliance":{"score":0-100,"issues":[]},'
'"viewer_experience":{"score":0-100,"notes":""},'
'"trend_alignment":{"score":0-100,"matched_keywords":[]},'
'"summary":""}'
)
async with httpx.AsyncClient(timeout=TIMEOUT_S) as client:
resp = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={"model": model, "max_tokens": 1024,
"messages": [{"role": "user", "content": user}]},
)
resp.raise_for_status()
text = resp.json()["content"][0]["text"]
start = text.find("{")
end = text.rfind("}") + 1
if start < 0 or end <= start:
raise ValueError("Claude 응답 JSON 없음")
return json.loads(text[start:end])
def _heuristic(metadata, video_meta, track, trend_top, weights, threshold, fallback_reason):
# 메타: 길이·태그 카운트
title_len = len(metadata.get("title", ""))
desc_len = len(metadata.get("description", ""))
tag_n = len(metadata.get("tags", []))
meta_score = 100 if 5 <= title_len <= 60 and 50 <= desc_len <= 1000 and 5 <= tag_n <= 15 else 50
# 정책: 금칙어 매치
text_blob = (metadata.get("title", "") + metadata.get("description", "")).lower()
policy_score = 100 if not any(w in text_blob for w in POLICY_BANNED) else 30
# 시청: 영상 길이가 트랙과 큰 차이 없는지 휴리스틱(±5초)
expected = track.get("duration_sec", video_meta.get("length_sec", 0))
delta = abs(video_meta.get("length_sec", 0) - expected)
viewer_score = 90 if delta <= 5 else 60
# 트렌드: 태그가 트렌드와 겹치는지
overlap = set(metadata.get("tags", [])) & set(trend_top)
trend_score = 100 if overlap else 40
scores = {
"metadata_quality": {"score": meta_score, "notes": "휴리스틱"},
"policy_compliance": {"score": policy_score, "issues": []},
"viewer_experience": {"score": viewer_score, "notes": "휴리스틱"},
"trend_alignment": {"score": trend_score, "matched_keywords": list(overlap)},
"summary": f"휴리스틱 fallback: {fallback_reason}",
}
return _weighted_verdict(scores, weights, threshold, used_fallback=True)