diff --git a/music-lab/app/pipeline/review.py b/music-lab/app/pipeline/review.py new file mode 100644 index 0000000..e94edd8 --- /dev/null +++ b/music-lab/app/pipeline/review.py @@ -0,0 +1,120 @@ +"""AI 최종 검토 — 4축(메타/정책/시청/트렌드) 가중 평균.""" +import os +import json +import logging + +import httpx + +logger = logging.getLogger("music-lab.review") + +CLAUDE_SONNET_MODEL_DEFAULT = "claude-sonnet-4-6" +TIMEOUT_S = 60 + +POLICY_BANNED = {"f-word", "n-word"} # 운영 시 별도 파일로 — 데모용 자리 + + +def _get_api_key() -> str: + return os.getenv("ANTHROPIC_API_KEY", "") + + +def _get_model() -> str: + return os.getenv("CLAUDE_SONNET_MODEL", CLAUDE_SONNET_MODEL_DEFAULT) + + +async def run_4_axis(*, pipeline: dict, track: dict, video_meta: dict, + metadata: dict, thumbnail_url: str, trend_top: list[str], + weights: dict, threshold: int) -> dict: + api_key = _get_api_key() + if not api_key: + return _heuristic(metadata, video_meta, track, trend_top, weights, threshold, + fallback_reason="no api key") + try: + scores = await _call_claude(pipeline, track, video_meta, metadata, + thumbnail_url, trend_top, + api_key=api_key, model=_get_model()) + return _weighted_verdict(scores, weights, threshold, used_fallback=False) + except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e: + logger.warning("검토 LLM 실패 — 휴리스틱: %s", e) + return _heuristic(metadata, video_meta, track, trend_top, weights, threshold, + fallback_reason=str(e)) + + +def _weighted_verdict(scores: dict, weights: dict, threshold: int, + used_fallback: bool) -> dict: + total = ( + weights["meta"] / 100 * scores["metadata_quality"]["score"] + + weights["policy"] / 100 * scores["policy_compliance"]["score"] + + weights["viewer"] / 100 * scores["viewer_experience"]["score"] + + weights["trend"] / 100 * scores["trend_alignment"]["score"] + ) + return { + **scores, + "weighted_total": round(total, 2), + "verdict": "pass" if total >= threshold else "fail", + "used_fallback": used_fallback, + } + + +async def _call_claude(pipeline, track, video_meta, metadata, thumbnail_url, trend_top, + *, api_key: str, model: str): + user = ( + "트랙·영상·메타데이터를 4축으로 평가하고 JSON만 응답:\n" + f"트랙: {json.dumps(track, ensure_ascii=False)}\n" + f"영상: {json.dumps(video_meta)}\n" + f"메타: {json.dumps(metadata, ensure_ascii=False)}\n" + f"썸네일: {thumbnail_url}\n" + f"트렌드: {trend_top}\n" + '출력: {"metadata_quality":{"score":0-100,"notes":""},' + '"policy_compliance":{"score":0-100,"issues":[]},' + '"viewer_experience":{"score":0-100,"notes":""},' + '"trend_alignment":{"score":0-100,"matched_keywords":[]},' + '"summary":""}' + ) + async with httpx.AsyncClient(timeout=TIMEOUT_S) as client: + resp = await client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json={"model": model, "max_tokens": 1024, + "messages": [{"role": "user", "content": user}]}, + ) + resp.raise_for_status() + text = resp.json()["content"][0]["text"] + start = text.find("{") + end = text.rfind("}") + 1 + if start < 0 or end <= start: + raise ValueError("Claude 응답 JSON 없음") + return json.loads(text[start:end]) + + +def _heuristic(metadata, video_meta, track, trend_top, weights, threshold, fallback_reason): + # 메타: 길이·태그 카운트 + title_len = len(metadata.get("title", "")) + desc_len = len(metadata.get("description", "")) + tag_n = len(metadata.get("tags", [])) + meta_score = 100 if 5 <= title_len <= 60 and 50 <= desc_len <= 1000 and 5 <= tag_n <= 15 else 50 + + # 정책: 금칙어 매치 + text_blob = (metadata.get("title", "") + metadata.get("description", "")).lower() + policy_score = 100 if not any(w in text_blob for w in POLICY_BANNED) else 30 + + # 시청: 영상 길이가 트랙과 큰 차이 없는지 휴리스틱(±5초) + expected = track.get("duration_sec", video_meta.get("length_sec", 0)) + delta = abs(video_meta.get("length_sec", 0) - expected) + viewer_score = 90 if delta <= 5 else 60 + + # 트렌드: 태그가 트렌드와 겹치는지 + overlap = set(metadata.get("tags", [])) & set(trend_top) + trend_score = 100 if overlap else 40 + + scores = { + "metadata_quality": {"score": meta_score, "notes": "휴리스틱"}, + "policy_compliance": {"score": policy_score, "issues": []}, + "viewer_experience": {"score": viewer_score, "notes": "휴리스틱"}, + "trend_alignment": {"score": trend_score, "matched_keywords": list(overlap)}, + "summary": f"휴리스틱 fallback: {fallback_reason}", + } + return _weighted_verdict(scores, weights, threshold, used_fallback=True) diff --git a/music-lab/tests/test_review.py b/music-lab/tests/test_review.py new file mode 100644 index 0000000..2985b19 --- /dev/null +++ b/music-lab/tests/test_review.py @@ -0,0 +1,84 @@ +import pytest +import respx +from httpx import Response +from app.pipeline import review + + +@pytest.mark.asyncio +@respx.mock +async def test_review_returns_pass_when_above_threshold(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "k") + body = {"content": [{"type": "text", "text": + '{"metadata_quality":{"score":80,"notes":"x"},' + '"policy_compliance":{"score":90,"issues":[]},' + '"viewer_experience":{"score":75,"notes":"y"},' + '"trend_alignment":{"score":70,"matched_keywords":["lofi"]},' + '"summary":"good"}'}]} + respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body)) + result = await review.run_4_axis( + pipeline={"id": 1}, track={"title": "x", "genre": "lo-fi", "bpm": 85}, + video_meta={"length_sec": 120, "resolution": "1920x1080"}, + metadata={"title": "Y", "description": "Z", "tags": ["lofi"], "category_id": 10}, + thumbnail_url="/m/x.jpg", trend_top=["lofi"], + weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20}, + threshold=60, + ) + assert result["verdict"] == "pass" + expected_total = 0.25 * 80 + 0.30 * 90 + 0.25 * 75 + 0.20 * 70 + assert result["weighted_total"] == pytest.approx(expected_total, abs=0.01) + + +@pytest.mark.asyncio +@respx.mock +async def test_review_fail_below_threshold(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "k") + body = {"content": [{"type": "text", "text": + '{"metadata_quality":{"score":40,"notes":"x"},' + '"policy_compliance":{"score":50,"issues":[]},' + '"viewer_experience":{"score":30,"notes":"y"},' + '"trend_alignment":{"score":20,"matched_keywords":[]},' + '"summary":"bad"}'}]} + respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body)) + result = await review.run_4_axis( + pipeline={"id": 2}, track={"title": "x", "genre": "lo-fi", "bpm": 85}, + video_meta={"length_sec": 120, "resolution": "1920x1080"}, + metadata={"title": "Y", "description": "Z", "tags": [], "category_id": 10}, + thumbnail_url="/m/x.jpg", trend_top=[], + weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20}, + threshold=60, + ) + assert result["verdict"] == "fail" + + +@pytest.mark.asyncio +@respx.mock +async def test_review_heuristic_fallback_on_llm_error(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "k") + respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(500)) + result = await review.run_4_axis( + pipeline={"id": 3}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120}, + video_meta={"length_sec": 120, "resolution": "1920x1080"}, + metadata={"title": "Y" * 30, "description": "Z" * 200, "tags": ["a", "b", "c", "d", "e"], "category_id": 10}, + thumbnail_url="/m/x.jpg", trend_top=["lofi"], + weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20}, + threshold=60, + ) + assert result["used_fallback"] is True + assert "weighted_total" in result + + +@pytest.mark.asyncio +async def test_review_heuristic_when_no_api_key(monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + result = await review.run_4_axis( + pipeline={"id": 4}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120}, + video_meta={"length_sec": 120, "resolution": "1920x1080"}, + metadata={"title": "Test Title", "description": "Description here, more text " * 5, + "tags": ["lofi", "study", "chill", "ambient", "instrumental"], "category_id": 10}, + thumbnail_url="/m/x.jpg", trend_top=["lofi"], + weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20}, + threshold=60, + ) + assert result["used_fallback"] is True + # 휴리스틱: 좋은 메타+영상길이 일치+태그 트렌드 겹침 → pass 기대 + assert result["verdict"] == "pass"