From 1c705b0ef3131337cd71c400cacc30a96c3e8097 Mon Sep 17 00:00:00 2001 From: gahusb Date: Thu, 7 May 2026 16:58:03 +0900 Subject: [PATCH] =?UTF-8?q?feat(music-lab):=20pipeline=20=EB=A9=94?= =?UTF-8?q?=ED=83=80=EB=8D=B0=EC=9D=B4=ED=84=B0=20LLM=20=EC=83=9D=EC=84=B1?= =?UTF-8?q?=20+=20=ED=8F=B4=EB=B0=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- music-lab/app/pipeline/metadata.py | 95 +++++++++++++++++++++ music-lab/tests/test_metadata_generation.py | 82 ++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 music-lab/app/pipeline/metadata.py create mode 100644 music-lab/tests/test_metadata_generation.py diff --git a/music-lab/app/pipeline/metadata.py b/music-lab/app/pipeline/metadata.py new file mode 100644 index 0000000..c5b12ba --- /dev/null +++ b/music-lab/app/pipeline/metadata.py @@ -0,0 +1,95 @@ +"""메타데이터 생성 — Claude Haiku + 템플릿 폴백.""" +import os +import json +import logging + +import httpx + +logger = logging.getLogger("music-lab.metadata") + +CLAUDE_HAIKU_MODEL_DEFAULT = "claude-haiku-4-5-20251001" +TIMEOUT_S = 30 + + +def _get_api_key() -> str: + return os.getenv("ANTHROPIC_API_KEY", "") + + +def _get_model() -> str: + return os.getenv("CLAUDE_HAIKU_MODEL", CLAUDE_HAIKU_MODEL_DEFAULT) + + +async def generate(*, track: dict, template: dict, trend_keywords: list[str], + feedback: str = "") -> dict: + """메타데이터 생성. 성공 시 LLM, 실패/미설정 시 템플릿 치환 폴백. + + 반환: {"title", "description", "tags", "category_id", "used_fallback", "error"} + """ + api_key = _get_api_key() + if not api_key: + return {**_fallback_template(track, template), "used_fallback": True, "error": "no api key"} + + try: + result = await _call_claude(track, template, trend_keywords, feedback, + api_key=api_key, model=_get_model()) + return {**result, "used_fallback": False, "error": None} + except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e: + logger.warning("메타데이터 LLM 실패 — 폴백: %s", e) + return {**_fallback_template(track, template), "used_fallback": True, "error": str(e)} + + +def _fallback_template(track: dict, template: dict) -> dict: + fmt_vars = { + "title": track.get("title", ""), + "genre": track.get("genre", ""), + "bpm": track.get("bpm", ""), + "key": track.get("key", ""), + "scale": track.get("scale", ""), + } + title = template.get("title", "{title}").format(**fmt_vars) + description = template.get("description", "{title}").format(**fmt_vars) + return { + "title": title[:100], + "description": description[:5000], + "tags": (template.get("tags") or [])[:15], + "category_id": template.get("category_id", 10), + } + + +async def _call_claude(track: dict, template: dict, trend_keywords: list[str], + feedback: str, *, api_key: str, model: str) -> dict: + user_prompt = ( + "다음 트랙의 YouTube 메타데이터를 생성하세요. JSON으로만 응답.\n\n" + f"트랙: {json.dumps(track, ensure_ascii=False)}\n" + f"템플릿: {json.dumps(template, ensure_ascii=False)}\n" + f"트렌드 키워드: {', '.join(trend_keywords)}\n" + ) + if feedback: + user_prompt += f"\n사용자 피드백: {feedback}\n" + user_prompt += ( + '\n출력 JSON: {"title": "60자 이내", "description": "1000자 이내, 3-5문단",' + ' "tags": ["15개 이내"], "category_id": 10}' + ) + + async with httpx.AsyncClient(timeout=TIMEOUT_S) as client: + resp = await client.post( + "https://api.anthropic.com/v1/messages", + headers={ + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + "content-type": "application/json", + }, + json={ + "model": model, + "max_tokens": 1024, + "messages": [{"role": "user", "content": user_prompt}], + }, + ) + resp.raise_for_status() + text = resp.json()["content"][0]["text"] + # 가장 첫 JSON 블록 추출 + start = text.find("{") + end = text.rfind("}") + 1 + if start < 0 or end <= start: + raise ValueError("Claude 응답에 JSON 블록 없음") + return json.loads(text[start:end]) diff --git a/music-lab/tests/test_metadata_generation.py b/music-lab/tests/test_metadata_generation.py new file mode 100644 index 0000000..b890896 --- /dev/null +++ b/music-lab/tests/test_metadata_generation.py @@ -0,0 +1,82 @@ +import pytest +import respx +from httpx import Response +from app.pipeline import metadata + + +@pytest.mark.asyncio +@respx.mock +async def test_metadata_calls_claude_and_parses_json(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + payload = { + "content": [{"type": "text", "text": '{"title":"[Lo-fi] Drive | 85BPM",' + '"description":"chill","tags":["lofi","85bpm"],' + '"category_id":10}'}] + } + respx.post("https://api.anthropic.com/v1/messages").mock( + return_value=Response(200, json=payload) + ) + result = await metadata.generate( + track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor", + "moods": ["chill"], "instruments": ["piano"]}, + template={"title": "[{genre}] {title} | {bpm}BPM", + "description": "{title}\n", "tags": [], "category_id": 10}, + trend_keywords=["lofi", "study"], + feedback="", + ) + assert result["title"].startswith("[Lo-fi]") + assert "lofi" in result["tags"] + assert result["used_fallback"] is False + + +@pytest.mark.asyncio +async def test_metadata_fallback_when_no_api_key(monkeypatch): + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + result = await metadata.generate( + track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor", + "moods": [], "instruments": []}, + template={"title": "[{genre}] {title} | {bpm}BPM", + "description": "{title}", "tags": ["lofi"], "category_id": 10}, + trend_keywords=[], + ) + # 템플릿 변수 그대로 치환된 폴백 + assert result["title"] == "[lo-fi] Drive | 85BPM" + assert result["used_fallback"] is True + + +@pytest.mark.asyncio +@respx.mock +async def test_metadata_includes_feedback_in_prompt(monkeypatch): + import json + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + captured = {} + def hook(req): + captured["body"] = json.loads(req.content) + return Response(200, json={"content": [{"type": "text", + "text": '{"title":"x","description":"y","tags":[],"category_id":10}'}]}) + respx.post("https://api.anthropic.com/v1/messages").mock(side_effect=hook) + await metadata.generate( + track={"title": "X", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor", + "moods": [], "instruments": []}, + template={"title": "{title}", "description": "{title}", "tags": [], "category_id": 10}, + trend_keywords=[], + feedback="제목을 짧게", + ) + assert "제목을 짧게" in str(captured["body"]) + + +@pytest.mark.asyncio +@respx.mock +async def test_metadata_falls_back_on_api_error(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") + respx.post("https://api.anthropic.com/v1/messages").mock( + return_value=Response(500) + ) + result = await metadata.generate( + track={"title": "Drive", "genre": "lo-fi", "bpm": 85, "key": "C", "scale": "minor", + "moods": [], "instruments": []}, + template={"title": "[{genre}] {title}", "description": "x", "tags": ["lofi"], "category_id": 10}, + trend_keywords=[], + ) + assert result["used_fallback"] is True + assert "Drive" in result["title"]