feat(music-lab): pipeline 메타데이터 LLM 생성 + 폴백

This commit is contained in:
2026-05-07 16:58:03 +09:00
parent 68dec2e53d
commit 1c705b0ef3
2 changed files with 177 additions and 0 deletions

View File

@@ -0,0 +1,95 @@
"""메타데이터 생성 — Claude Haiku + 템플릿 폴백."""
import os
import json
import logging
import httpx
logger = logging.getLogger("music-lab.metadata")
CLAUDE_HAIKU_MODEL_DEFAULT = "claude-haiku-4-5-20251001"
TIMEOUT_S = 30
def _get_api_key() -> str:
return os.getenv("ANTHROPIC_API_KEY", "")
def _get_model() -> str:
return os.getenv("CLAUDE_HAIKU_MODEL", CLAUDE_HAIKU_MODEL_DEFAULT)
async def generate(*, track: dict, template: dict, trend_keywords: list[str],
feedback: str = "") -> dict:
"""메타데이터 생성. 성공 시 LLM, 실패/미설정 시 템플릿 치환 폴백.
반환: {"title", "description", "tags", "category_id", "used_fallback", "error"}
"""
api_key = _get_api_key()
if not api_key:
return {**_fallback_template(track, template), "used_fallback": True, "error": "no api key"}
try:
result = await _call_claude(track, template, trend_keywords, feedback,
api_key=api_key, model=_get_model())
return {**result, "used_fallback": False, "error": None}
except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
logger.warning("메타데이터 LLM 실패 — 폴백: %s", e)
return {**_fallback_template(track, template), "used_fallback": True, "error": str(e)}
def _fallback_template(track: dict, template: dict) -> dict:
fmt_vars = {
"title": track.get("title", ""),
"genre": track.get("genre", ""),
"bpm": track.get("bpm", ""),
"key": track.get("key", ""),
"scale": track.get("scale", ""),
}
title = template.get("title", "{title}").format(**fmt_vars)
description = template.get("description", "{title}").format(**fmt_vars)
return {
"title": title[:100],
"description": description[:5000],
"tags": (template.get("tags") or [])[:15],
"category_id": template.get("category_id", 10),
}
async def _call_claude(track: dict, template: dict, trend_keywords: list[str],
feedback: str, *, api_key: str, model: str) -> dict:
user_prompt = (
"다음 트랙의 YouTube 메타데이터를 생성하세요. JSON으로만 응답.\n\n"
f"트랙: {json.dumps(track, ensure_ascii=False)}\n"
f"템플릿: {json.dumps(template, ensure_ascii=False)}\n"
f"트렌드 키워드: {', '.join(trend_keywords)}\n"
)
if feedback:
user_prompt += f"\n사용자 피드백: {feedback}\n"
user_prompt += (
'\n출력 JSON: {"title": "60자 이내", "description": "1000자 이내, 3-5문단",'
' "tags": ["15개 이내"], "category_id": 10}'
)
async with httpx.AsyncClient(timeout=TIMEOUT_S) as client:
resp = await client.post(
"https://api.anthropic.com/v1/messages",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
"content-type": "application/json",
},
json={
"model": model,
"max_tokens": 1024,
"messages": [{"role": "user", "content": user_prompt}],
},
)
resp.raise_for_status()
text = resp.json()["content"][0]["text"]
# 가장 첫 JSON 블록 추출
start = text.find("{")
end = text.rfind("}") + 1
if start < 0 or end <= start:
raise ValueError("Claude 응답에 JSON 블록 없음")
return json.loads(text[start:end])

View File

@@ -0,0 +1,82 @@
import pytest
import respx
from httpx import Response
from app.pipeline import metadata
@pytest.mark.asyncio
@respx.mock
async def test_metadata_calls_claude_and_parses_json(monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
payload = {
"content": [{"type": "text", "text": '{"title":"[Lo-fi] Drive | 85BPM",'
'"description":"chill","tags":["lofi","85bpm"],'
'"category_id":10}'}]
}
respx.post("https://api.anthropic.com/v1/messages").mock(
return_value=Response(200, json=payload)
)
result = await metadata.generate(
track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor",
"moods": ["chill"], "instruments": ["piano"]},
template={"title": "[{genre}] {title} | {bpm}BPM",
"description": "{title}\n", "tags": [], "category_id": 10},
trend_keywords=["lofi", "study"],
feedback="",
)
assert result["title"].startswith("[Lo-fi]")
assert "lofi" in result["tags"]
assert result["used_fallback"] is False
@pytest.mark.asyncio
async def test_metadata_fallback_when_no_api_key(monkeypatch):
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
result = await metadata.generate(
track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor",
"moods": [], "instruments": []},
template={"title": "[{genre}] {title} | {bpm}BPM",
"description": "{title}", "tags": ["lofi"], "category_id": 10},
trend_keywords=[],
)
# 템플릿 변수 그대로 치환된 폴백
assert result["title"] == "[lo-fi] Drive | 85BPM"
assert result["used_fallback"] is True
@pytest.mark.asyncio
@respx.mock
async def test_metadata_includes_feedback_in_prompt(monkeypatch):
import json
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
captured = {}
def hook(req):
captured["body"] = json.loads(req.content)
return Response(200, json={"content": [{"type": "text",
"text": '{"title":"x","description":"y","tags":[],"category_id":10}'}]})
respx.post("https://api.anthropic.com/v1/messages").mock(side_effect=hook)
await metadata.generate(
track={"title": "X", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor",
"moods": [], "instruments": []},
template={"title": "{title}", "description": "{title}", "tags": [], "category_id": 10},
trend_keywords=[],
feedback="제목을 짧게",
)
assert "제목을 짧게" in str(captured["body"])
@pytest.mark.asyncio
@respx.mock
async def test_metadata_falls_back_on_api_error(monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
respx.post("https://api.anthropic.com/v1/messages").mock(
return_value=Response(500)
)
result = await metadata.generate(
track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor",
"moods": [], "instruments": []},
template={"title": "[{genre}] {title}", "description": "x", "tags": ["lofi"], "category_id": 10},
trend_keywords=[],
)
assert result["used_fallback"] is True
assert "Drive" in result["title"]