web-page-backend/blog-lab/app/naver_search.py

"""네이버 검색 API 연동 — 블로그 + 쇼핑 검색."""

import asyncio
import logging
import re
import requests
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)

from .config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET

BLOG_URL = "https://openapi.naver.com/v1/search/blog.json"
SHOP_URL = "https://openapi.naver.com/v1/search/shop.json"

_HEADERS = {
    "X-Naver-Client-Id": NAVER_CLIENT_ID,
    "X-Naver-Client-Secret": NAVER_CLIENT_SECRET,
}

_TAG_RE = re.compile(r"<[^>]+>")


def _strip_html(text: str) -> str:
    return _TAG_RE.sub("", text).strip()


def search_blog(keyword: str, display: int = 10, sort: str = "sim") -> Dict[str, Any]:
    """네이버 블로그 검색.

    Args:
        keyword: 검색 키워드
        display: 결과 수 (1-100)
        sort: sim(정확도) | date(날짜)

    Returns:
        {"total": int, "items": [...]}
    """
    resp = requests.get(
        BLOG_URL,
        headers=_HEADERS,
        params={"query": keyword, "display": display, "sort": sort},
        timeout=10,
    )
    resp.raise_for_status()
    data = resp.json()
    items = [
        {
            "title": _strip_html(item.get("title", "")),
            "description": _strip_html(item.get("description", "")),
            "link": item.get("link", ""),
            "bloggername": item.get("bloggername", ""),
            "postdate": item.get("postdate", ""),
        }
        for item in data.get("items", [])
    ]
    return {"total": data.get("total", 0), "items": items}


def search_shopping(keyword: str, display: int = 20, sort: str = "sim") -> Dict[str, Any]:
    """네이버 쇼핑 검색.

    Args:
        keyword: 검색 키워드
        display: 결과 수 (1-100)
        sort: sim(정확도) | date(날짜) | asc(가격↑) | dsc(가격↓)

    Returns:
        {"total": int, "items": [...], "price_stats": {...}}
    """
    resp = requests.get(
        SHOP_URL,
        headers=_HEADERS,
        params={"query": keyword, "display": display, "sort": sort},
        timeout=10,
    )
    resp.raise_for_status()
    data = resp.json()

    items = []
    prices = []
    for item in data.get("items", []):
        lprice = _safe_int(item.get("lprice"))
        hprice = _safe_int(item.get("hprice"))
        parsed = {
            "title": _strip_html(item.get("title", "")),
            "link": item.get("link", ""),
            "image": item.get("image", ""),
            "lprice": lprice,
            "hprice": hprice,
            "mallName": item.get("mallName", ""),
            "productId": item.get("productId", ""),
            "productType": item.get("productType", ""),
            "category1": item.get("category1", ""),
            "category2": item.get("category2", ""),
            "category3": item.get("category3", ""),
            "brand": item.get("brand", ""),
            "maker": item.get("maker", ""),
        }
        items.append(parsed)
        if lprice and lprice > 0:
            prices.append(lprice)

    price_stats = None
    if prices:
        price_stats = {
            "min": min(prices),
            "max": max(prices),
            "avg": int(sum(prices) / len(prices)),
            "count": len(prices),
        }

    return {
        "total": data.get("total", 0),
        "items": items,
        "price_stats": price_stats,
    }


def _safe_int(val) -> Optional[int]:
    if val is None:
        return None
    try:
        return int(val)
    except (ValueError, TypeError):
        return None


def analyze_keyword(keyword: str) -> Dict[str, Any]:
    """키워드 경쟁도/기회 분석.

    블로그 총 결과수, 쇼핑 총 결과수, 가격 통계를 기반으로
    competition_score(경쟁도)와 opportunity_score(기회점수) 산출.

    Returns:
        {
            "keyword", "blog_total", "shop_total",
            "competition", "opportunity",
            "avg_price", "min_price", "max_price",
            "top_products": [...], "top_blogs": [...]
        }
    """
    blog = search_blog(keyword, display=10, sort="sim")
    shop = search_shopping(keyword, display=20, sort="sim")

    blog_total = blog["total"]
    shop_total = shop["total"]

    # 경쟁도: 블로그 결과 수 기반 (로그 스케일 0-100)
    import math
    if blog_total > 0:
        competition = min(100, int(math.log10(blog_total + 1) * 15))
    else:
        competition = 0

    # 기회 점수: 쇼핑 수요가 높고 블로그 경쟁이 낮을수록 높음
    if shop_total > 0 and blog_total > 0:
        ratio = shop_total / blog_total
        opportunity = min(100, int(ratio * 20))
    elif shop_total > 0:
        opportunity = 90  # 경쟁 없이 수요만 있으면 높은 기회
    else:
        opportunity = 10  # 쇼핑 수요 없음

    price_stats = shop.get("price_stats") or {}

    return {
        "keyword": keyword,
        "blog_total": blog_total,
        "shop_total": shop_total,
        "competition": competition,
        "opportunity": opportunity,
        "avg_price": price_stats.get("avg"),
        "min_price": price_stats.get("min"),
        "max_price": price_stats.get("max"),
        "top_products": shop["items"][:5],
        "top_blogs": blog["items"][:5],
    }


def _run_enrich(top_blogs: list) -> list:
    """동기 컨텍스트에서 비동기 enrich_top_blogs 실행."""
    from .web_crawler import enrich_top_blogs
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            import concurrent.futures
            with concurrent.futures.ThreadPoolExecutor() as pool:
                return pool.submit(
                    asyncio.run, enrich_top_blogs(top_blogs)
                ).result(timeout=60)
        else:
            return asyncio.run(enrich_top_blogs(top_blogs))
    except Exception as e:
        logger.warning("블로그 크롤링 실패, 기존 데이터 사용: %s", e)
        return top_blogs


def analyze_keyword_with_crawling(keyword: str) -> Dict[str, Any]:
    """analyze_keyword + 상위 블로그 본문 크롤링."""
    result = analyze_keyword(keyword)
    result["top_blogs"] = _run_enrich(result["top_blogs"])
    return result