"""네이버 검색 API 연동 — 블로그 + 쇼핑 검색.""" import asyncio import logging import re import requests from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) from .config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET BLOG_URL = "https://openapi.naver.com/v1/search/blog.json" SHOP_URL = "https://openapi.naver.com/v1/search/shop.json" _HEADERS = { "X-Naver-Client-Id": NAVER_CLIENT_ID, "X-Naver-Client-Secret": NAVER_CLIENT_SECRET, } _TAG_RE = re.compile(r"<[^>]+>") def _strip_html(text: str) -> str: return _TAG_RE.sub("", text).strip() def search_blog(keyword: str, display: int = 10, sort: str = "sim") -> Dict[str, Any]: """네이버 블로그 검색. Args: keyword: 검색 키워드 display: 결과 수 (1-100) sort: sim(정확도) | date(날짜) Returns: {"total": int, "items": [...]} """ resp = requests.get( BLOG_URL, headers=_HEADERS, params={"query": keyword, "display": display, "sort": sort}, timeout=10, ) resp.raise_for_status() data = resp.json() items = [ { "title": _strip_html(item.get("title", "")), "description": _strip_html(item.get("description", "")), "link": item.get("link", ""), "bloggername": item.get("bloggername", ""), "postdate": item.get("postdate", ""), } for item in data.get("items", []) ] return {"total": data.get("total", 0), "items": items} def search_shopping(keyword: str, display: int = 20, sort: str = "sim") -> Dict[str, Any]: """네이버 쇼핑 검색. Args: keyword: 검색 키워드 display: 결과 수 (1-100) sort: sim(정확도) | date(날짜) | asc(가격↑) | dsc(가격↓) Returns: {"total": int, "items": [...], "price_stats": {...}} """ resp = requests.get( SHOP_URL, headers=_HEADERS, params={"query": keyword, "display": display, "sort": sort}, timeout=10, ) resp.raise_for_status() data = resp.json() items = [] prices = [] for item in data.get("items", []): lprice = _safe_int(item.get("lprice")) hprice = _safe_int(item.get("hprice")) parsed = { "title": _strip_html(item.get("title", "")), "link": item.get("link", ""), "image": item.get("image", ""), "lprice": lprice, "hprice": hprice, "mallName": item.get("mallName", ""), "productId": item.get("productId", ""), "productType": item.get("productType", ""), "category1": item.get("category1", ""), "category2": item.get("category2", ""), "category3": item.get("category3", ""), "brand": item.get("brand", ""), "maker": item.get("maker", ""), } items.append(parsed) if lprice and lprice > 0: prices.append(lprice) price_stats = None if prices: price_stats = { "min": min(prices), "max": max(prices), "avg": int(sum(prices) / len(prices)), "count": len(prices), } return { "total": data.get("total", 0), "items": items, "price_stats": price_stats, } def _safe_int(val) -> Optional[int]: if val is None: return None try: return int(val) except (ValueError, TypeError): return None def analyze_keyword(keyword: str) -> Dict[str, Any]: """키워드 경쟁도/기회 분석. 블로그 총 결과수, 쇼핑 총 결과수, 가격 통계를 기반으로 competition_score(경쟁도)와 opportunity_score(기회점수) 산출. Returns: { "keyword", "blog_total", "shop_total", "competition", "opportunity", "avg_price", "min_price", "max_price", "top_products": [...], "top_blogs": [...] } """ blog = search_blog(keyword, display=10, sort="sim") shop = search_shopping(keyword, display=20, sort="sim") blog_total = blog["total"] shop_total = shop["total"] # 경쟁도: 블로그 결과 수 기반 (로그 스케일 0-100) import math if blog_total > 0: competition = min(100, int(math.log10(blog_total + 1) * 15)) else: competition = 0 # 기회 점수: 쇼핑 수요가 높고 블로그 경쟁이 낮을수록 높음 if shop_total > 0 and blog_total > 0: ratio = shop_total / blog_total opportunity = min(100, int(ratio * 20)) elif shop_total > 0: opportunity = 90 # 경쟁 없이 수요만 있으면 높은 기회 else: opportunity = 10 # 쇼핑 수요 없음 price_stats = shop.get("price_stats") or {} return { "keyword": keyword, "blog_total": blog_total, "shop_total": shop_total, "competition": competition, "opportunity": opportunity, "avg_price": price_stats.get("avg"), "min_price": price_stats.get("min"), "max_price": price_stats.get("max"), "top_products": shop["items"][:5], "top_blogs": blog["items"][:5], } def _run_enrich(top_blogs: list) -> list: """동기 컨텍스트에서 비동기 enrich_top_blogs 실행.""" from .web_crawler import enrich_top_blogs try: loop = asyncio.get_event_loop() if loop.is_running(): import concurrent.futures with concurrent.futures.ThreadPoolExecutor() as pool: return pool.submit( asyncio.run, enrich_top_blogs(top_blogs) ).result(timeout=60) else: return asyncio.run(enrich_top_blogs(top_blogs)) except Exception as e: logger.warning("블로그 크롤링 실패, 기존 데이터 사용: %s", e) return top_blogs def analyze_keyword_with_crawling(keyword: str) -> Dict[str, Any]: """analyze_keyword + 상위 블로그 본문 크롤링.""" result = analyze_keyword(keyword) result["top_blogs"] = _run_enrich(result["top_blogs"]) return result