feat(music-lab): pipeline 4축 AI 검토 + 휴리스틱 폴백

This commit is contained in:
2026-05-07 17:01:17 +09:00
parent 1c705b0ef3
commit ad1c721ba8
2 changed files with 204 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
"""AI 최종 검토 — 4축(메타/정책/시청/트렌드) 가중 평균."""
import os
import json
import logging
import httpx
logger = logging.getLogger("music-lab.review")
CLAUDE_SONNET_MODEL_DEFAULT = "claude-sonnet-4-6"
TIMEOUT_S = 60
POLICY_BANNED = {"f-word", "n-word"} # 운영 시 별도 파일로 — 데모용 자리
def _get_api_key() -> str:
return os.getenv("ANTHROPIC_API_KEY", "")
def _get_model() -> str:
return os.getenv("CLAUDE_SONNET_MODEL", CLAUDE_SONNET_MODEL_DEFAULT)
async def run_4_axis(*, pipeline: dict, track: dict, video_meta: dict,
metadata: dict, thumbnail_url: str, trend_top: list[str],
weights: dict, threshold: int) -> dict:
api_key = _get_api_key()
if not api_key:
return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
fallback_reason="no api key")
try:
scores = await _call_claude(pipeline, track, video_meta, metadata,
thumbnail_url, trend_top,
api_key=api_key, model=_get_model())
return _weighted_verdict(scores, weights, threshold, used_fallback=False)
except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
logger.warning("검토 LLM 실패 — 휴리스틱: %s", e)
return _heuristic(metadata, video_meta, track, trend_top, weights, threshold,
fallback_reason=str(e))
def _weighted_verdict(scores: dict, weights: dict, threshold: int,
used_fallback: bool) -> dict:
total = (
weights["meta"] / 100 * scores["metadata_quality"]["score"] +
weights["policy"] / 100 * scores["policy_compliance"]["score"] +
weights["viewer"] / 100 * scores["viewer_experience"]["score"] +
weights["trend"] / 100 * scores["trend_alignment"]["score"]
)
return {
**scores,
"weighted_total": round(total, 2),
"verdict": "pass" if total >= threshold else "fail",
"used_fallback": used_fallback,
}
async def _call_claude(pipeline, track, video_meta, metadata, thumbnail_url, trend_top,
*, api_key: str, model: str):
user = (
"트랙·영상·메타데이터를 4축으로 평가하고 JSON만 응답:\n"
f"트랙: {json.dumps(track, ensure_ascii=False)}\n"
f"영상: {json.dumps(video_meta)}\n"
f"메타: {json.dumps(metadata, ensure_ascii=False)}\n"
f"썸네일: {thumbnail_url}\n"
f"트렌드: {trend_top}\n"
'출력: {"metadata_quality":{"score":0-100,"notes":""},'
'"policy_compliance":{"score":0-100,"issues":[]},'
'"viewer_experience":{"score":0-100,"notes":""},'
'"trend_alignment":{"score":0-100,"matched_keywords":[]},'
'"summary":""}'
)
async with httpx.AsyncClient(timeout=TIMEOUT_S) as client:
resp = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={"model": model, "max_tokens": 1024,
"messages": [{"role": "user", "content": user}]},
)
resp.raise_for_status()
text = resp.json()["content"][0]["text"]
start = text.find("{")
end = text.rfind("}") + 1
if start < 0 or end <= start:
raise ValueError("Claude 응답 JSON 없음")
return json.loads(text[start:end])
def _heuristic(metadata, video_meta, track, trend_top, weights, threshold, fallback_reason):
# 메타: 길이·태그 카운트
title_len = len(metadata.get("title", ""))
desc_len = len(metadata.get("description", ""))
tag_n = len(metadata.get("tags", []))
meta_score = 100 if 5 <= title_len <= 60 and 50 <= desc_len <= 1000 and 5 <= tag_n <= 15 else 50
# 정책: 금칙어 매치
text_blob = (metadata.get("title", "") + metadata.get("description", "")).lower()
policy_score = 100 if not any(w in text_blob for w in POLICY_BANNED) else 30
# 시청: 영상 길이가 트랙과 큰 차이 없는지 휴리스틱(±5초)
expected = track.get("duration_sec", video_meta.get("length_sec", 0))
delta = abs(video_meta.get("length_sec", 0) - expected)
viewer_score = 90 if delta <= 5 else 60
# 트렌드: 태그가 트렌드와 겹치는지
overlap = set(metadata.get("tags", [])) & set(trend_top)
trend_score = 100 if overlap else 40
scores = {
"metadata_quality": {"score": meta_score, "notes": "휴리스틱"},
"policy_compliance": {"score": policy_score, "issues": []},
"viewer_experience": {"score": viewer_score, "notes": "휴리스틱"},
"trend_alignment": {"score": trend_score, "matched_keywords": list(overlap)},
"summary": f"휴리스틱 fallback: {fallback_reason}",
}
return _weighted_verdict(scores, weights, threshold, used_fallback=True)

View File

@@ -0,0 +1,84 @@
import pytest
import respx
from httpx import Response
from app.pipeline import review
@pytest.mark.asyncio
@respx.mock
async def test_review_returns_pass_when_above_threshold(monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
body = {"content": [{"type": "text", "text":
'{"metadata_quality":{"score":80,"notes":"x"},'
'"policy_compliance":{"score":90,"issues":[]},'
'"viewer_experience":{"score":75,"notes":"y"},'
'"trend_alignment":{"score":70,"matched_keywords":["lofi"]},'
'"summary":"good"}'}]}
respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body))
result = await review.run_4_axis(
pipeline={"id": 1}, track={"title": "x", "genre": "lo-fi", "bpm": 85},
video_meta={"length_sec": 120, "resolution": "1920x1080"},
metadata={"title": "Y", "description": "Z", "tags": ["lofi"], "category_id": 10},
thumbnail_url="/m/x.jpg", trend_top=["lofi"],
weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
threshold=60,
)
assert result["verdict"] == "pass"
expected_total = 0.25 * 80 + 0.30 * 90 + 0.25 * 75 + 0.20 * 70
assert result["weighted_total"] == pytest.approx(expected_total, abs=0.01)
@pytest.mark.asyncio
@respx.mock
async def test_review_fail_below_threshold(monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
body = {"content": [{"type": "text", "text":
'{"metadata_quality":{"score":40,"notes":"x"},'
'"policy_compliance":{"score":50,"issues":[]},'
'"viewer_experience":{"score":30,"notes":"y"},'
'"trend_alignment":{"score":20,"matched_keywords":[]},'
'"summary":"bad"}'}]}
respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(200, json=body))
result = await review.run_4_axis(
pipeline={"id": 2}, track={"title": "x", "genre": "lo-fi", "bpm": 85},
video_meta={"length_sec": 120, "resolution": "1920x1080"},
metadata={"title": "Y", "description": "Z", "tags": [], "category_id": 10},
thumbnail_url="/m/x.jpg", trend_top=[],
weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
threshold=60,
)
assert result["verdict"] == "fail"
@pytest.mark.asyncio
@respx.mock
async def test_review_heuristic_fallback_on_llm_error(monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "k")
respx.post("https://api.anthropic.com/v1/messages").mock(return_value=Response(500))
result = await review.run_4_axis(
pipeline={"id": 3}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120},
video_meta={"length_sec": 120, "resolution": "1920x1080"},
metadata={"title": "Y" * 30, "description": "Z" * 200, "tags": ["a", "b", "c", "d", "e"], "category_id": 10},
thumbnail_url="/m/x.jpg", trend_top=["lofi"],
weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
threshold=60,
)
assert result["used_fallback"] is True
assert "weighted_total" in result
@pytest.mark.asyncio
async def test_review_heuristic_when_no_api_key(monkeypatch):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
result = await review.run_4_axis(
pipeline={"id": 4}, track={"title": "x", "genre": "lo-fi", "bpm": 85, "duration_sec": 120},
video_meta={"length_sec": 120, "resolution": "1920x1080"},
metadata={"title": "Test Title", "description": "Description here, more text " * 5,
"tags": ["lofi", "study", "chill", "ambient", "instrumental"], "category_id": 10},
thumbnail_url="/m/x.jpg", trend_top=["lofi"],
weights={"meta": 25, "policy": 30, "viewer": 25, "trend": 20},
threshold=60,
)
assert result["used_fallback"] is True
# 휴리스틱: 좋은 메타+영상길이 일치+태그 트렌드 겹침 → pass 기대
assert result["verdict"] == "pass"