Files
web-page-backend/blog-lab/app/naver_search.py

204 lines
6.0 KiB
Python

"""네이버 검색 API 연동 — 블로그 + 쇼핑 검색."""
import asyncio
import logging
import re
import requests
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
from .config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET
BLOG_URL = "https://openapi.naver.com/v1/search/blog.json"
SHOP_URL = "https://openapi.naver.com/v1/search/shop.json"
_HEADERS = {
"X-Naver-Client-Id": NAVER_CLIENT_ID,
"X-Naver-Client-Secret": NAVER_CLIENT_SECRET,
}
_TAG_RE = re.compile(r"<[^>]+>")
def _strip_html(text: str) -> str:
return _TAG_RE.sub("", text).strip()
def search_blog(keyword: str, display: int = 10, sort: str = "sim") -> Dict[str, Any]:
"""네이버 블로그 검색.
Args:
keyword: 검색 키워드
display: 결과 수 (1-100)
sort: sim(정확도) | date(날짜)
Returns:
{"total": int, "items": [...]}
"""
resp = requests.get(
BLOG_URL,
headers=_HEADERS,
params={"query": keyword, "display": display, "sort": sort},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
items = [
{
"title": _strip_html(item.get("title", "")),
"description": _strip_html(item.get("description", "")),
"link": item.get("link", ""),
"bloggername": item.get("bloggername", ""),
"postdate": item.get("postdate", ""),
}
for item in data.get("items", [])
]
return {"total": data.get("total", 0), "items": items}
def search_shopping(keyword: str, display: int = 20, sort: str = "sim") -> Dict[str, Any]:
"""네이버 쇼핑 검색.
Args:
keyword: 검색 키워드
display: 결과 수 (1-100)
sort: sim(정확도) | date(날짜) | asc(가격↑) | dsc(가격↓)
Returns:
{"total": int, "items": [...], "price_stats": {...}}
"""
resp = requests.get(
SHOP_URL,
headers=_HEADERS,
params={"query": keyword, "display": display, "sort": sort},
timeout=10,
)
resp.raise_for_status()
data = resp.json()
items = []
prices = []
for item in data.get("items", []):
lprice = _safe_int(item.get("lprice"))
hprice = _safe_int(item.get("hprice"))
parsed = {
"title": _strip_html(item.get("title", "")),
"link": item.get("link", ""),
"image": item.get("image", ""),
"lprice": lprice,
"hprice": hprice,
"mallName": item.get("mallName", ""),
"productId": item.get("productId", ""),
"productType": item.get("productType", ""),
"category1": item.get("category1", ""),
"category2": item.get("category2", ""),
"category3": item.get("category3", ""),
"brand": item.get("brand", ""),
"maker": item.get("maker", ""),
}
items.append(parsed)
if lprice and lprice > 0:
prices.append(lprice)
price_stats = None
if prices:
price_stats = {
"min": min(prices),
"max": max(prices),
"avg": int(sum(prices) / len(prices)),
"count": len(prices),
}
return {
"total": data.get("total", 0),
"items": items,
"price_stats": price_stats,
}
def _safe_int(val) -> Optional[int]:
if val is None:
return None
try:
return int(val)
except (ValueError, TypeError):
return None
def analyze_keyword(keyword: str) -> Dict[str, Any]:
"""키워드 경쟁도/기회 분석.
블로그 총 결과수, 쇼핑 총 결과수, 가격 통계를 기반으로
competition_score(경쟁도)와 opportunity_score(기회점수) 산출.
Returns:
{
"keyword", "blog_total", "shop_total",
"competition", "opportunity",
"avg_price", "min_price", "max_price",
"top_products": [...], "top_blogs": [...]
}
"""
blog = search_blog(keyword, display=10, sort="sim")
shop = search_shopping(keyword, display=20, sort="sim")
blog_total = blog["total"]
shop_total = shop["total"]
# 경쟁도: 블로그 결과 수 기반 (로그 스케일 0-100)
import math
if blog_total > 0:
competition = min(100, int(math.log10(blog_total + 1) * 15))
else:
competition = 0
# 기회 점수: 쇼핑 수요가 높고 블로그 경쟁이 낮을수록 높음
if shop_total > 0 and blog_total > 0:
ratio = shop_total / blog_total
opportunity = min(100, int(ratio * 20))
elif shop_total > 0:
opportunity = 90 # 경쟁 없이 수요만 있으면 높은 기회
else:
opportunity = 10 # 쇼핑 수요 없음
price_stats = shop.get("price_stats") or {}
return {
"keyword": keyword,
"blog_total": blog_total,
"shop_total": shop_total,
"competition": competition,
"opportunity": opportunity,
"avg_price": price_stats.get("avg"),
"min_price": price_stats.get("min"),
"max_price": price_stats.get("max"),
"top_products": shop["items"][:5],
"top_blogs": blog["items"][:5],
}
def _run_enrich(top_blogs: list) -> list:
"""동기 컨텍스트에서 비동기 enrich_top_blogs 실행."""
from .web_crawler import enrich_top_blogs
try:
loop = asyncio.get_event_loop()
if loop.is_running():
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as pool:
return pool.submit(
asyncio.run, enrich_top_blogs(top_blogs)
).result(timeout=60)
else:
return asyncio.run(enrich_top_blogs(top_blogs))
except Exception as e:
logger.warning("블로그 크롤링 실패, 기존 데이터 사용: %s", e)
return top_blogs
def analyze_keyword_with_crawling(keyword: str) -> Dict[str, Any]:
"""analyze_keyword + 상위 블로그 본문 크롤링."""
result = analyze_keyword(keyword)
result["top_blogs"] = _run_enrich(result["top_blogs"])
return result