diff --git a/agent-office/app/youtube_researcher.py b/agent-office/app/youtube_researcher.py new file mode 100644 index 0000000..1cf8639 --- /dev/null +++ b/agent-office/app/youtube_researcher.py @@ -0,0 +1,142 @@ +import os +import re +import asyncio +from typing import List, Dict, Any + +import httpx + +YOUTUBE_DATA_API_KEY = os.getenv("YOUTUBE_DATA_API_KEY", "") +MUSIC_LAB_URL = os.getenv("MUSIC_LAB_URL", "http://music-lab:8000") +TARGET_COUNTRIES = ["BR", "ID", "MX", "US", "KR"] +TREND_KEYWORDS = ["lofi music", "phonk", "ambient music", "chill beats", "study music"] +YOUTUBE_MUSIC_CAT = "10" + +GENRE_TAGS = { + "lo-fi": ["lofi", "lo-fi", "lo fi", "chill", "study"], + "phonk": ["phonk", "drift", "memphis"], + "ambient": ["ambient", "relaxing", "meditation"], + "pop": ["pop", "kpop", "k-pop"], + "funk": ["funk", "baile funk"], + "latin": ["latin", "reggaeton", "sertanejo"], +} + + +def _tags_to_genre(tags: list) -> str: + joined = " ".join(t.lower() for t in tags) + for genre, kws in GENRE_TAGS.items(): + if any(kw in joined for kw in kws): + return genre + return "general" + + +async def fetch_youtube_trending(country: str, max_results: int = 50) -> List[Dict[str, Any]]: + """YouTube Data API v3 — 국가별 트렌딩 음악 영상 (categoryId=10).""" + if not YOUTUBE_DATA_API_KEY: + return [] + async with httpx.AsyncClient(timeout=10.0) as client: + try: + resp = await client.get( + "https://www.googleapis.com/youtube/v3/videos", + params={ + "part": "snippet,statistics", + "chart": "mostPopular", + "regionCode": country, + "videoCategoryId": YOUTUBE_MUSIC_CAT, + "maxResults": max_results, + "key": YOUTUBE_DATA_API_KEY, + }, + ) + if resp.status_code != 200: + return [] + items = resp.json().get("items", []) + except Exception: + return [] + + results = [] + for i, item in enumerate(items): + snippet = item.get("snippet", {}) + stats = item.get("statistics", {}) + genre = _tags_to_genre(snippet.get("tags") or []) + results.append({ + "source": "youtube", + "country": country, + "genre": genre, + "keyword": snippet.get("title", "")[:100], + "score": round(1.0 - i / max_results, 3), + "rank": i + 1, + "metadata": { + "video_id": item["id"], + "view_count": int(stats.get("viewCount", 0)), + "channel": snippet.get("channelTitle", ""), + }, + }) + return results + + +async def fetch_google_trends(keywords: List[str], countries: List[str]) -> List[Dict[str, Any]]: + """pytrends — 키워드별 Google 관심도 (sync → threadpool).""" + try: + from pytrends.request import TrendReq + except ImportError: + return [] + + def _sync_fetch(kw: str) -> List[Dict[str, Any]]: + try: + pt = TrendReq(hl="en-US", tz=0) + pt.build_payload([kw], timeframe="now 7-d") + df = pt.interest_over_time() + if df.empty or kw not in df.columns: + return [] + score = round(float(df[kw].mean()) / 100.0, 3) + return [ + {"source": "google_trends", "country": c, "genre": "", + "keyword": kw, "score": score, "rank": None, "metadata": {}} + for c in countries + ] + except Exception: + return [] + + loop = asyncio.get_event_loop() + results = [] + for kw in keywords[:5]: + rows = await loop.run_in_executor(None, _sync_fetch, kw) + results.extend(rows) + await asyncio.sleep(1.0) + return results + + +async def fetch_billboard_top20() -> List[Dict[str, Any]]: + """Billboard Hot 100 스크래핑 — 상위 20위.""" + async with httpx.AsyncClient( + timeout=10.0, + headers={"User-Agent": "Mozilla/5.0 (compatible; music-research-bot/1.0)"}, + follow_redirects=True, + ) as client: + try: + resp = await client.get("https://www.billboard.com/charts/hot-100/") + if resp.status_code != 200: + return [] + titles = re.findall( + r'class="c-title[^"]*"[^>]*>\s*([^<\n]{3,80})\s*<', resp.text + )[:20] + return [ + {"source": "billboard", "country": "US", "genre": "pop", + "keyword": t.strip(), "score": round(1.0 - i / 20, 3), + "rank": i + 1, "metadata": {}} + for i, t in enumerate(titles) if t.strip() + ] + except Exception: + return [] + + +async def push_to_music_lab(trends: List[Dict[str, Any]], report_date: str) -> bool: + """수집한 트렌드를 music-lab /api/music/market/ingest로 push.""" + async with httpx.AsyncClient(timeout=15.0) as client: + try: + resp = await client.post( + f"{MUSIC_LAB_URL}/api/music/market/ingest", + json={"trends": trends, "report_date": report_date}, + ) + return resp.status_code == 200 + except Exception: + return False diff --git a/agent-office/requirements.txt b/agent-office/requirements.txt index 0aed057..c5497a7 100644 --- a/agent-office/requirements.txt +++ b/agent-office/requirements.txt @@ -3,3 +3,5 @@ uvicorn[standard]==0.30.6 apscheduler==3.10.4 websockets>=12.0 httpx>=0.27 +google-api-python-client>=2.100.0 +pytrends>=4.9.2