diff --git a/music-lab/app/pipeline/metadata.py b/music-lab/app/pipeline/metadata.py index c5b12ba..f84fe27 100644 --- a/music-lab/app/pipeline/metadata.py +++ b/music-lab/app/pipeline/metadata.py @@ -19,26 +19,46 @@ def _get_model() -> str: return os.getenv("CLAUDE_HAIKU_MODEL", CLAUDE_HAIKU_MODEL_DEFAULT) +def _format_chapters(tracks: list[dict]) -> str: + """YouTube 챕터 자동 인식 형식: '[mm:ss] 제목' 한 줄씩. + + 1시간 이상이면 hh:mm:ss 형식. + """ + if not tracks: + return "" + lines = [] + for t in tracks: + offset = int(t.get("start_offset_sec", 0)) + m, s = divmod(offset, 60) + h, m = divmod(m, 60) + if h > 0: + ts = f"{h:02d}:{m:02d}:{s:02d}" + else: + ts = f"{m:02d}:{s:02d}" + lines.append(f"{ts} {t.get('title', '')}") + return "\n".join(lines) + + async def generate(*, track: dict, template: dict, trend_keywords: list[str], - feedback: str = "") -> dict: + feedback: str = "", tracks: list[dict] | None = None) -> dict: """메타데이터 생성. 성공 시 LLM, 실패/미설정 시 템플릿 치환 폴백. 반환: {"title", "description", "tags", "category_id", "used_fallback", "error"} """ api_key = _get_api_key() if not api_key: - return {**_fallback_template(track, template), "used_fallback": True, "error": "no api key"} + return {**_fallback_template(track, template, tracks), "used_fallback": True, "error": "no api key"} try: - result = await _call_claude(track, template, trend_keywords, feedback, + result = await _call_claude(track, template, trend_keywords, feedback, tracks, api_key=api_key, model=_get_model()) return {**result, "used_fallback": False, "error": None} except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e: logger.warning("메타데이터 LLM 실패 — 폴백: %s", e) - return {**_fallback_template(track, template), "used_fallback": True, "error": str(e)} + return {**_fallback_template(track, template, tracks), "used_fallback": True, "error": str(e)} -def _fallback_template(track: dict, template: dict) -> dict: +def _fallback_template(track: dict, template: dict, tracks: list[dict] | None = None) -> dict: fmt_vars = { "title": track.get("title", ""), "genre": track.get("genre", ""), @@ -48,6 +68,8 @@ def _fallback_template(track: dict, template: dict) -> dict: } title = template.get("title", "{title}").format(**fmt_vars) description = template.get("description", "{title}").format(**fmt_vars) + if tracks and len(tracks) > 1: + description = description + "\n\n" + _format_chapters(tracks) return { "title": title[:100], "description": description[:5000], @@ -57,17 +79,24 @@ def _fallback_template(track: dict, template: dict) -> dict: async def _call_claude(track: dict, template: dict, trend_keywords: list[str], - feedback: str, *, api_key: str, model: str) -> dict: + feedback: str, tracks: list[dict] | None, + *, api_key: str, model: str) -> dict: user_prompt = ( "다음 트랙의 YouTube 메타데이터를 생성하세요. JSON으로만 응답.\n\n" f"트랙: {json.dumps(track, ensure_ascii=False)}\n" f"템플릿: {json.dumps(template, ensure_ascii=False)}\n" f"트렌드 키워드: {', '.join(trend_keywords)}\n" ) + if tracks and len(tracks) > 1: + chapters = _format_chapters(tracks) + user_prompt += ( + f"\n이 영상은 {len(tracks)}개 트랙의 mix입니다. " + f"description에 다음 챕터 리스트를 그대로 포함하세요 (YouTube 자동 챕터 인식용):\n{chapters}\n" + ) if feedback: user_prompt += f"\n사용자 피드백: {feedback}\n" user_prompt += ( - '\n출력 JSON: {"title": "60자 이내", "description": "1000자 이내, 3-5문단",' + '\n출력 JSON: {"title": "60자 이내", "description": "1000자 이내",' ' "tags": ["15개 이내"], "category_id": 10}' ) @@ -81,7 +110,7 @@ async def _call_claude(track: dict, template: dict, trend_keywords: list[str], }, json={ "model": model, - "max_tokens": 1024, + "max_tokens": 2048, # mix 더 길어서 "messages": [{"role": "user", "content": user_prompt}], }, ) diff --git a/music-lab/tests/test_metadata_generation.py b/music-lab/tests/test_metadata_generation.py index b890896..ee26cfc 100644 --- a/music-lab/tests/test_metadata_generation.py +++ b/music-lab/tests/test_metadata_generation.py @@ -80,3 +80,75 @@ async def test_metadata_falls_back_on_api_error(monkeypatch): ) assert result["used_fallback"] is True assert "Drive" in result["title"] + + +@pytest.mark.asyncio +@respx.mock +async def test_metadata_with_tracks_includes_chapter_format(monkeypatch): + monkeypatch.setenv("ANTHROPIC_API_KEY", "k") + captured = {} + + def hook(req): + import json as _json + captured["body"] = _json.loads(req.content) + return Response(200, json={"content": [{"type": "text", "text": + '{"title":"Lo-Fi Mix 3 Tracks","description":"Track 1: [00:00] T1\\nTrack 2: [03:00] T2",' + '"tags":["lofi","mix"],"category_id":10}'}]}) + + respx.post("https://api.anthropic.com/v1/messages").mock(side_effect=hook) + result = await metadata.generate( + track={"title": "Mix", "genre": "mix", "duration_sec": 600, + "moods": []}, + template={"title": "{title}", "description": "{title}", + "tags": [], "category_id": 10}, + trend_keywords=[], + tracks=[ + {"id": 1, "title": "T1", "start_offset_sec": 0, "duration_sec": 180}, + {"id": 2, "title": "T2", "start_offset_sec": 180, "duration_sec": 200}, + {"id": 3, "title": "T3", "start_offset_sec": 380, "duration_sec": 220}, + ], + ) + body_str = str(captured["body"]) + assert "T1" in body_str and "T2" in body_str and "T3" in body_str + assert "00:00" in body_str + assert result["used_fallback"] is False + + +@pytest.mark.asyncio +async def test_metadata_fallback_with_tracks(monkeypatch): + """API 키 없을 때 폴백에서도 트랙 챕터 포함.""" + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + result = await metadata.generate( + track={"title": "Mix", "genre": "mix", "duration_sec": 600, "moods": []}, + template={"title": "{title}", "description": "{title}", + "tags": [], "category_id": 10}, + trend_keywords=[], + tracks=[ + {"id": 1, "title": "T1", "start_offset_sec": 0, "duration_sec": 180}, + {"id": 2, "title": "T2", "start_offset_sec": 180, "duration_sec": 200}, + ], + ) + assert result["used_fallback"] is True + assert "00:00" in result["description"] + assert "T1" in result["description"] + assert "T2" in result["description"] + + +def test_format_chapters_under_hour(): + from app.pipeline.metadata import _format_chapters + out = _format_chapters([ + {"start_offset_sec": 0, "title": "T1"}, + {"start_offset_sec": 180, "title": "T2"}, + ]) + assert "00:00 T1" in out + assert "03:00 T2" in out + + +def test_format_chapters_over_hour(): + from app.pipeline.metadata import _format_chapters + out = _format_chapters([ + {"start_offset_sec": 0, "title": "T1"}, + {"start_offset_sec": 3700, "title": "T2"}, + ]) + assert "00:00 T1" in out + assert "01:01:40 T2" in out