web-page-backend/music-lab/app/pipeline/review.py

"""AI 최종 검토 — 4축(메타/정책/시청/트렌드) 가중 평균."""
import os
import json
import logging

import httpx

logger = logging.getLogger("music-lab.review")

CLAUDE_SONNET_MODEL_DEFAULT = "claude-sonnet-4-6"
TIMEOUT_S = 60

POLICY_BANNED = {"f-word", "n-word"}  # 운영 시 별도 파일로 — 데모용 자리


def _get_api_key() -> str:
    return os.getenv("ANTHROPIC_API_KEY", "")


def _get_model() -> str:
    return os.getenv("CLAUDE_SONNET_MODEL", CLAUDE_SONNET_MODEL_DEFAULT)


async def run_4_axis(*, pipeline: dict, track: dict, video_meta: dict,
                      metadata: dict, thumbnail_url: str, trend_top: list[str],
                      weights: dict, threshold: int) -> dict:
    api_key = _get_api_key()
    if not api_key:
        return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
                          fallback_reason="no api key")
    try:
        scores = await _call_claude(pipeline, track, video_meta, metadata,
                                    thumbnail_url, trend_top,
                                    api_key=api_key, model=_get_model())
        return _weighted_verdict(scores, weights, threshold, used_fallback=False)
    except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
        logger.warning("검토 LLM 실패 — 휴리스틱: %s", e)
        return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
                          fallback_reason=str(e))


def _weighted_verdict(scores: dict, weights: dict, threshold: int,
                       used_fallback: bool) -> dict:
    total = (
        weights["meta"]   / 100 * scores["metadata_quality"]["score"] +
        weights["policy"] / 100 * scores["policy_compliance"]["score"] +
        weights["viewer"] / 100 * scores["viewer_experience"]["score"] +
        weights["trend"]  / 100 * scores["trend_alignment"]["score"]
    )
    return {
        **scores,
        "weighted_total": round(total, 2),
        "verdict": "pass" if total >= threshold else "fail",
        "used_fallback": used_fallback,
    }


async def _call_claude(pipeline, track, video_meta, metadata, thumbnail_url, trend_top,
                        *, api_key: str, model: str):
    user = (
        "트랙·영상·메타데이터를 4축으로 평가하고 JSON만 응답:\n"
        f"트랙: {json.dumps(track, ensure_ascii=False)}\n"
        f"영상: {json.dumps(video_meta)}\n"
        f"메타: {json.dumps(metadata, ensure_ascii=False)}\n"
        f"썸네일: {thumbnail_url}\n"
        f"트렌드: {trend_top}\n"
        '출력: {"metadata_quality":{"score":0-100,"notes":""},'
        '"policy_compliance":{"score":0-100,"issues":[]},'
        '"viewer_experience":{"score":0-100,"notes":""},'
        '"trend_alignment":{"score":0-100,"matched_keywords":[]},'
        '"summary":""}'
    )
    async with httpx.AsyncClient(timeout=TIMEOUT_S) as client:
        resp = await client.post(
            "https://api.anthropic.com/v1/messages",
            headers={
                "x-api-key": api_key,
                "anthropic-version": "2023-06-01",
                "content-type": "application/json",
            },
            json={"model": model, "max_tokens": 1024,
                  "messages": [{"role": "user", "content": user}]},
        )
        resp.raise_for_status()
        text = resp.json()["content"][0]["text"]
        start = text.find("{")
        end = text.rfind("}") + 1
        if start < 0 or end <= start:
            raise ValueError("Claude 응답 JSON 없음")
        return json.loads(text[start:end])


def _heuristic(metadata, video_meta, track, trend_top, weights, threshold, fallback_reason):
    # 메타: 길이·태그 카운트
    title_len = len(metadata.get("title", ""))
    desc_len = len(metadata.get("description", ""))
    tag_n = len(metadata.get("tags", []))
    meta_score = 100 if 5 <= title_len <= 60 and 50 <= desc_len <= 1000 and 5 <= tag_n <= 15 else 50

    # 정책: 금칙어 매치
    text_blob = (metadata.get("title", "") + metadata.get("description", "")).lower()
    policy_score = 100 if not any(w in text_blob for w in POLICY_BANNED) else 30

    # 시청: 영상 길이가 트랙과 큰 차이 없는지 휴리스틱(±5초)
    expected = track.get("duration_sec", video_meta.get("length_sec", 0))
    delta = abs(video_meta.get("length_sec", 0) - expected)
    viewer_score = 90 if delta <= 5 else 60

    # 트렌드: 태그가 트렌드와 겹치는지
    overlap = set(metadata.get("tags", [])) & set(trend_top)
    trend_score = 100 if overlap else 40

    scores = {
        "metadata_quality": {"score": meta_score, "notes": "휴리스틱"},
        "policy_compliance": {"score": policy_score, "issues": []},
        "viewer_experience": {"score": viewer_score, "notes": "휴리스틱"},
        "trend_alignment": {"score": trend_score, "matched_keywords": list(overlap)},
        "summary": f"휴리스틱 fallback: {fallback_reason}",
    }
    return _weighted_verdict(scores, weights, threshold, used_fallback=True)