feat(music-lab): pipeline 4축 AI 검토 + 휴리스틱 폴백

2026-05-07 17:01:17 +09:00
parent 1c705b0ef3
commit ad1c721ba8
2 changed files with 204 additions and 0 deletions
--- a/music-lab/app/pipeline/review.py
+++ b/music-lab/app/pipeline/review.py
@@ -0,0 +1,120 @@
+"""AI 최종 검토 — 4축(메타/정책/시청/트렌드) 가중 평균."""
+import os
+import json
+import logging
+
+import httpx
+
+logger = logging.getLogger("music-lab.review")
+
+CLAUDE_SONNET_MODEL_DEFAULT = "claude-sonnet-4-6"
+TIMEOUT_S = 60
+
+POLICY_BANNED = {"f-word", "n-word"}  # 운영 시 별도 파일로 — 데모용 자리
+
+
+def _get_api_key() -> str:
+    return os.getenv("ANTHROPIC_API_KEY", "")
+
+
+def _get_model() -> str:
+    return os.getenv("CLAUDE_SONNET_MODEL", CLAUDE_SONNET_MODEL_DEFAULT)
+
+
+async def run_4_axis(*, pipeline: dict, track: dict, video_meta: dict,
+                      metadata: dict, thumbnail_url: str, trend_top: list[str],
+                      weights: dict, threshold: int) -> dict:
+    api_key = _get_api_key()
+    if not api_key:
+        return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
+                          fallback_reason="no api key")
+    try:
+        scores = await _call_claude(pipeline, track, video_meta, metadata,
+                                    thumbnail_url, trend_top,
+                                    api_key=api_key, model=_get_model())
+        return _weighted_verdict(scores, weights, threshold, used_fallback=False)
+    except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
+        logger.warning("검토 LLM 실패 — 휴리스틱: %s", e)
+        return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
+                          fallback_reason=str(e))
+
+
+def _weighted_verdict(scores: dict, weights: dict, threshold: int,
+                       used_fallback: bool) -> dict:
+    total = (
+        weights["meta"]   / 100 * scores["metadata_quality"]["score"] +
+        weights["policy"] / 100 * scores["policy_compliance"]["score"] +
+        weights["viewer"] / 100 * scores["viewer_experience"]["score"] +
+        weights["trend"]  / 100 * scores["trend_alignment"]["score"]
+    )
+    return {
+        **scores,
+        "weighted_total": round(total, 2),
+        "verdict": "pass" if total >= threshold else "fail",
+        "used_fallback": used_fallback,
+    }
+
+
+async def _call_claude(pipeline, track, video_meta, metadata, thumbnail_url, trend_top,
+                        *, api_key: str, model: str):
+    user = (
+        "트랙·영상·메타데이터를 4축으로 평가하고 JSON만 응답:\n"
+        f"트랙: {json.dumps(track, ensure_ascii=False)}\n"
+        f"영상: {json.dumps(video_meta)}\n"
+        f"메타: {json.dumps(metadata, ensure_ascii=False)}\n"
+        f"썸네일: {thumbnail_url}\n"
+        f"트렌드: {trend_top}\n"
+        '출력: {"metadata_quality":{"score":0-100,"notes":""},'
+        '"policy_compliance":{"score":0-100,"issues":[]},'
+        '"viewer_experience":{"score":0-100,"notes":""},'
+        '"trend_alignment":{"score":0-100,"matched_keywords":[]},'
+        '"summary":""}'
+    )
+    async with httpx.AsyncClient(timeout=TIMEOUT_S) as client:
+        resp = await client.post(
+            "https://api.anthropic.com/v1/messages",
+            headers={
+                "x-api-key": api_key,
+                "anthropic-version": "2023-06-01",
+                "content-type": "application/json",
+            },
+            json={"model": model, "max_tokens": 1024,
+                  "messages": [{"role": "user", "content": user}]},
+        )
+        resp.raise_for_status()
+        text = resp.json()["content"][0]["text"]
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start < 0 or end <= start:
+            raise ValueError("Claude 응답 JSON 없음")
+        return json.loads(text[start:end])
+
+
+def _heuristic(metadata, video_meta, track, trend_top, weights, threshold, fallback_reason):
+    # 메타: 길이·태그 카운트
+    title_len = len(metadata.get("title", ""))
+    desc_len = len(metadata.get("description", ""))
+    tag_n = len(metadata.get("tags", []))
+    meta_score = 100 if 5 <= title_len <= 60 and 50 <= desc_len <= 1000 and 5 <= tag_n <= 15 else 50
+
+    # 정책: 금칙어 매치
+    text_blob = (metadata.get("title", "") + metadata.get("description", "")).lower()
+    policy_score = 100 if not any(w in text_blob for w in POLICY_BANNED) else 30
+
+    # 시청: 영상 길이가 트랙과 큰 차이 없는지 휴리스틱(±5초)
+    expected = track.get("duration_sec", video_meta.get("length_sec", 0))
+    delta = abs(video_meta.get("length_sec", 0) - expected)
+    viewer_score = 90 if delta <= 5 else 60
+
+    # 트렌드: 태그가 트렌드와 겹치는지
+    overlap = set(metadata.get("tags", [])) & set(trend_top)
+    trend_score = 100 if overlap else 40
+
+    scores = {
+        "metadata_quality": {"score": meta_score, "notes": "휴리스틱"},
+        "policy_compliance": {"score": policy_score, "issues": []},
+        "viewer_experience": {"score": viewer_score, "notes": "휴리스틱"},
+        "trend_alignment": {"score": trend_score, "matched_keywords": list(overlap)},
+        "summary": f"휴리스틱 fallback: {fallback_reason}",
+    }
+    return _weighted_verdict(scores, weights, threshold, used_fallback=True)
--- a/music-lab/tests/test_review.py
+++ b/music-lab/tests/test_review.py
@@ -0,0 +1,84 @@
+import pytest
+import respx
+from httpx import Response
+from app.pipeline import review
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_review_returns_pass_when_above_threshold(monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
+    body = {"content": [{"type": "text", "text":
+        '{"metadata_quality":{"score":80,"notes":"x"},'
+        '"policy_compliance":{"score":90,"issues":[]},'
+        '"viewer_experience":{"score":75,"notes":"y"},'
+        '"trend_alignment":{"score":70,"matched_keywords":["lofi"]},'
+        '"summary":"good"}'}]}
+    respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body))
+    result = await review.run_4_axis(
+        pipeline={"id": 1}, track={"title": "x", "genre": "lo-fi", "bpm": 85},
+        video_meta={"length_sec": 120, "resolution": "1920x1080"},
+        metadata={"title": "Y", "description": "Z", "tags": ["lofi"], "category_id": 10},
+        thumbnail_url="/m/x.jpg", trend_top=["lofi"],
+        weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
+        threshold=60,
+    )
+    assert result["verdict"] == "pass"
+    expected_total = 0.25 * 80 + 0.30 * 90 + 0.25 * 75 + 0.20 * 70
+    assert result["weighted_total"] == pytest.approx(expected_total, abs=0.01)
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_review_fail_below_threshold(monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
+    body = {"content": [{"type": "text", "text":
+        '{"metadata_quality":{"score":40,"notes":"x"},'
+        '"policy_compliance":{"score":50,"issues":[]},'
+        '"viewer_experience":{"score":30,"notes":"y"},'
+        '"trend_alignment":{"score":20,"matched_keywords":[]},'
+        '"summary":"bad"}'}]}
+    respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body))
+    result = await review.run_4_axis(
+        pipeline={"id": 2}, track={"title": "x", "genre": "lo-fi", "bpm": 85},
+        video_meta={"length_sec": 120, "resolution": "1920x1080"},
+        metadata={"title": "Y", "description": "Z", "tags": [], "category_id": 10},
+        thumbnail_url="/m/x.jpg", trend_top=[],
+        weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
+        threshold=60,
+    )
+    assert result["verdict"] == "fail"
+
+
+@pytest.mark.asyncio
+@respx.mock
+async def test_review_heuristic_fallback_on_llm_error(monkeypatch):
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
+    respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(500))
+    result = await review.run_4_axis(
+        pipeline={"id": 3}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120},
+        video_meta={"length_sec": 120, "resolution": "1920x1080"},
+        metadata={"title": "Y" * 30, "description": "Z" * 200, "tags": ["a", "b", "c", "d", "e"], "category_id": 10},
+        thumbnail_url="/m/x.jpg", trend_top=["lofi"],
+        weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
+        threshold=60,
+    )
+    assert result["used_fallback"] is True
+    assert "weighted_total" in result
+
+
+@pytest.mark.asyncio
+async def test_review_heuristic_when_no_api_key(monkeypatch):
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    result = await review.run_4_axis(
+        pipeline={"id": 4}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120},
+        video_meta={"length_sec": 120, "resolution": "1920x1080"},
+        metadata={"title": "Test Title", "description": "Description here, more text " * 5,
+                  "tags": ["lofi", "study", "chill", "ambient", "instrumental"], "category_id": 10},
+        thumbnail_url="/m/x.jpg", trend_top=["lofi"],
+        weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
+        threshold=60,
+    )
+    assert result["used_fallback"] is True
+    # 휴리스틱: 좋은 메타+영상길이 일치+태그 트렌드 겹침 → pass 기대
+    assert result["verdict"] == "pass"