import os import re import asyncio from typing import List, Dict, Any import httpx YOUTUBE_DATA_API_KEY = os.getenv("YOUTUBE_DATA_API_KEY", "") MUSIC_LAB_URL = os.getenv("MUSIC_LAB_URL", "http://music-lab:8000") TARGET_COUNTRIES = ["BR", "ID", "MX", "US", "KR"] TREND_KEYWORDS = ["lofi music", "phonk", "ambient music", "chill beats", "study music"] YOUTUBE_MUSIC_CAT = "10" GENRE_TAGS = { "lo-fi": ["lofi", "lo-fi", "lo fi", "chill", "study"], "phonk": ["phonk", "drift", "memphis"], "ambient": ["ambient", "relaxing", "meditation"], "pop": ["pop", "kpop", "k-pop"], "funk": ["funk", "baile funk"], "latin": ["latin", "reggaeton", "sertanejo"], } def _tags_to_genre(tags: list) -> str: joined = " ".join(t.lower() for t in tags) for genre, kws in GENRE_TAGS.items(): if any(kw in joined for kw in kws): return genre return "general" async def fetch_youtube_trending(country: str, max_results: int = 50) -> List[Dict[str, Any]]: """YouTube Data API v3 — 국가별 트렌딩 음악 영상 (categoryId=10).""" if not YOUTUBE_DATA_API_KEY: return [] async with httpx.AsyncClient(timeout=10.0) as client: try: resp = await client.get( "https://www.googleapis.com/youtube/v3/videos", params={ "part": "snippet,statistics", "chart": "mostPopular", "regionCode": country, "videoCategoryId": YOUTUBE_MUSIC_CAT, "maxResults": max_results, "key": YOUTUBE_DATA_API_KEY, }, ) if resp.status_code != 200: return [] items = resp.json().get("items", []) except Exception: return [] results = [] for i, item in enumerate(items): snippet = item.get("snippet", {}) stats = item.get("statistics", {}) genre = _tags_to_genre(snippet.get("tags") or []) results.append({ "source": "youtube", "country": country, "genre": genre, "keyword": snippet.get("title", "")[:100], "score": round(1.0 - i / max_results, 3), "rank": i + 1, "metadata": { "video_id": item["id"], "view_count": int(stats.get("viewCount", 0)), "channel": snippet.get("channelTitle", ""), }, }) return results async def fetch_google_trends(keywords: List[str], countries: List[str]) -> List[Dict[str, Any]]: """pytrends — 키워드별 Google 관심도 (sync → threadpool).""" try: from pytrends.request import TrendReq except ImportError: return [] def _sync_fetch(kw: str) -> List[Dict[str, Any]]: try: pt = TrendReq(hl="en-US", tz=0, timeout=(5, 15)) pt.build_payload([kw], timeframe="now 7-d") df = pt.interest_over_time() if df.empty or kw not in df.columns: return [] score = round(float(df[kw].mean()) / 100.0, 3) return [ {"source": "google_trends", "country": c, "genre": "", "keyword": kw, "score": score, "rank": None, "metadata": {}} for c in countries ] except Exception: return [] loop = asyncio.get_running_loop() results = [] for kw in keywords[:5]: rows = await loop.run_in_executor(None, _sync_fetch, kw) results.extend(rows) await asyncio.sleep(1.0) return results async def fetch_billboard_top20() -> List[Dict[str, Any]]: """Billboard Hot 100 스크래핑 — 상위 20위.""" async with httpx.AsyncClient( timeout=10.0, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}, follow_redirects=True, ) as client: try: resp = await client.get("https://www.billboard.com/charts/hot-100/") if resp.status_code != 200: return [] titles = re.findall( r'class="c-title[^"]*"[^>]*>\s*([^<\n]{3,80})\s*<', resp.text )[:20] return [ {"source": "billboard", "country": "US", "genre": "pop", "keyword": t.strip(), "score": round(1.0 - i / 20, 3), "rank": i + 1, "metadata": {}} for i, t in enumerate(titles) if t.strip() ] except Exception: return [] async def push_to_music_lab(trends: List[Dict[str, Any]], report_date: str) -> bool: """수집한 트렌드를 music-lab /api/music/market/ingest로 push.""" async with httpx.AsyncClient(timeout=15.0) as client: try: resp = await client.post( f"{MUSIC_LAB_URL}/api/music/market/ingest", json={"trends": trends, "report_date": report_date}, ) return resp.status_code == 200 except Exception: return False