Files
web-page-backend/stock/app/screener/ai_news/articles_source.py
gahusb ace0339d33 refactor: rename stock-lab → stock (graduation)
- git mv stock-lab/ → stock/
- docker-compose.yml: 서비스 키 + container_name + build.context +
  frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL
- agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/
  STOCK_LAB_URL → STOCK_URL
- nginx/default.conf: proxy_pass http://stock-labhttp://stock (3 lines)
- CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신
- stock/ 내부 자기 참조 갱신

lab 네이밍 정책 (feedback_lab_naming.md) graduation.
API URL / Python import / DB 파일명 변경 없음.
2026-05-15 01:45:44 +09:00

71 lines
2.2 KiB
Python

"""기존 articles 테이블에서 종목별 뉴스 매핑."""
from __future__ import annotations
import datetime as dt
import logging
import sqlite3
from typing import Any, Dict, List, Tuple
log = logging.getLogger(__name__)
def gather_articles_for_tickers(
conn: sqlite3.Connection,
tickers: List[str],
asof: dt.date,
*,
window_days: int = 1,
max_per_ticker: int = 5,
) -> Tuple[Dict[str, List[Dict[str, Any]]], Dict[str, int]]:
"""articles 에서 ticker.name substring 매칭으로 종목별 뉴스 dict 반환.
Returns:
(
{ticker: [{"title": str, "summary": str, "press": str, "pub_date": str}, ...]},
{"total_articles": int, "matched_pairs": int, "hit_tickers": int},
)
"""
out: Dict[str, List[Dict[str, Any]]] = {t: [] for t in tickers}
stats = {"total_articles": 0, "matched_pairs": 0, "hit_tickers": 0}
if not tickers:
return out, stats
cutoff = (asof - dt.timedelta(days=window_days)).isoformat()
placeholders = ",".join("?" * len(tickers))
name_rows = conn.execute(
f"SELECT ticker, name FROM krx_master WHERE ticker IN ({placeholders})",
tickers,
).fetchall()
# 2글자 미만 회사명은 false positive 위험으로 제외
name_map = {r[0]: r[1] for r in name_rows if r[1] and len(r[1]) >= 2}
articles = conn.execute(
"SELECT title, summary, press, pub_date, crawled_at "
"FROM articles WHERE crawled_at >= ? ORDER BY crawled_at DESC",
(cutoff,),
).fetchall()
stats["total_articles"] = len(articles)
for a in articles:
title = (a[0] or "").strip()
summary = (a[1] or "").strip()
haystack = title + " " + summary
for ticker, name in name_map.items():
if name not in haystack:
continue
if len(out[ticker]) >= max_per_ticker:
continue
out[ticker].append({
"title": title,
"summary": summary,
"press": a[2] or "",
"pub_date": a[3] or "",
})
stats["matched_pairs"] += 1
stats["hit_tickers"] = sum(1 for arts in out.values() if arts)
return out, stats