refactor: rename stock-lab → stock (graduation)

- git mv stock-lab/ → stock/
- docker-compose.yml: 서비스 키 + container_name + build.context +
  frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL
- agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/
  STOCK_LAB_URL → STOCK_URL
- nginx/default.conf: proxy_pass http://stock-labhttp://stock (3 lines)
- CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신
- stock/ 내부 자기 참조 갱신

lab 네이밍 정책 (feedback_lab_naming.md) graduation.
API URL / Python import / DB 파일명 변경 없음.
This commit is contained in:
2026-05-15 01:45:22 +09:00
parent 8812bd870a
commit ace0339d33
74 changed files with 67 additions and 67 deletions

View File

View File

@@ -0,0 +1,103 @@
"""Claude Haiku 기반 종목 뉴스 호재/악재 분석."""
from __future__ import annotations
import json
import logging
import os
from typing import Any, Dict, List
log = logging.getLogger(__name__)
DEFAULT_MODEL = os.getenv("AI_NEWS_MODEL", "claude-haiku-4-5-20251001")
PROMPT_TEMPLATE = """다음은 종목 {name}({ticker})에 대한 최근 뉴스 {n}개의 헤드라인입니다.
{news_block}
이 뉴스들이 종목에 호재인지 악재인지 평가하세요.
score: -10(매우 강한 악재) ~ +10(매우 강한 호재) 사이의 실수. 0은 중립.
reason: 30자 이내 한 줄 근거.
JSON으로만 응답하세요. 다른 텍스트 금지:
{{"score": <float>, "reason": "<string>"}}"""
def _clamp(x: float, lo: float = -10.0, hi: float = 10.0) -> float:
return max(lo, min(hi, x))
def _format_news_block(news: List[Dict[str, Any]]) -> str:
"""news dict 리스트 → prompt 에 들어가는 텍스트 블록.
summary 가 있으면 title 다음 줄에 indent 해서 포함 (최대 200자).
pub_date 가 있으면 title 앞에 표시.
"""
lines: List[str] = []
for n in news:
date = (n.get("pub_date") or "").strip()
title = (n.get("title") or "").strip()
summary = (n.get("summary") or "").strip()
prefix = f"[{date}] " if date else ""
if summary:
lines.append(f"- {prefix}{title}\n {summary[:200]}")
else:
lines.append(f"- {prefix}{title}")
return "\n".join(lines)
async def score_sentiment(
llm,
ticker: str,
news: List[Dict[str, Any]],
*,
name: str | None = None,
model: str = DEFAULT_MODEL,
) -> Dict[str, Any]:
"""Returns {ticker, score_raw, reason, news_count, tokens_input, tokens_output, model}."""
news_block = _format_news_block(news)
prompt = PROMPT_TEMPLATE.format(
name=name or ticker, ticker=ticker,
n=len(news), news_block=news_block,
)
resp = await llm.messages.create(
model=model,
max_tokens=200,
temperature=0,
system="너는 한국 주식 뉴스 감성 분석가다. JSON 객체 하나만 반환한다.",
messages=[
{"role": "user", "content": prompt},
# Assistant prefill — 첫 토큰을 강제로 '{' 로 시작해 JSON 응답을 보장
{"role": "assistant", "content": "{"},
],
)
raw = resp.content[0].text if resp.content else ""
# prefill '{' 이 응답에 포함되지 않으므로 다시 붙임
text = "{" + raw if not raw.lstrip().startswith("{") else raw
in_tokens = int(getattr(resp.usage, "input_tokens", 0) or 0)
out_tokens = int(getattr(resp.usage, "output_tokens", 0) or 0)
try:
data = json.loads(text)
score = _clamp(float(data["score"]))
reason = str(data["reason"])[:200]
return {
"ticker": ticker,
"score_raw": score,
"reason": reason,
"news_count": len(news),
"tokens_input": in_tokens,
"tokens_output": out_tokens,
"model": model,
}
except (json.JSONDecodeError, KeyError, TypeError, ValueError) as e:
log.warning("ai_news parse fail for %s: %s (raw=%r)", ticker, e, text[:100])
return {
"ticker": ticker,
"score_raw": 0.0,
"reason": f"parse fail: {e!s}"[:200],
"news_count": len(news),
"tokens_input": in_tokens,
"tokens_output": out_tokens,
"model": model,
}

View File

@@ -0,0 +1,70 @@
"""기존 articles 테이블에서 종목별 뉴스 매핑."""
from __future__ import annotations
import datetime as dt
import logging
import sqlite3
from typing import Any, Dict, List, Tuple
log = logging.getLogger(__name__)
def gather_articles_for_tickers(
conn: sqlite3.Connection,
tickers: List[str],
asof: dt.date,
*,
window_days: int = 1,
max_per_ticker: int = 5,
) -> Tuple[Dict[str, List[Dict[str, Any]]], Dict[str, int]]:
"""articles 에서 ticker.name substring 매칭으로 종목별 뉴스 dict 반환.
Returns:
(
{ticker: [{"title": str, "summary": str, "press": str, "pub_date": str}, ...]},
{"total_articles": int, "matched_pairs": int, "hit_tickers": int},
)
"""
out: Dict[str, List[Dict[str, Any]]] = {t: [] for t in tickers}
stats = {"total_articles": 0, "matched_pairs": 0, "hit_tickers": 0}
if not tickers:
return out, stats
cutoff = (asof - dt.timedelta(days=window_days)).isoformat()
placeholders = ",".join("?" * len(tickers))
name_rows = conn.execute(
f"SELECT ticker, name FROM krx_master WHERE ticker IN ({placeholders})",
tickers,
).fetchall()
# 2글자 미만 회사명은 false positive 위험으로 제외
name_map = {r[0]: r[1] for r in name_rows if r[1] and len(r[1]) >= 2}
articles = conn.execute(
"SELECT title, summary, press, pub_date, crawled_at "
"FROM articles WHERE crawled_at >= ? ORDER BY crawled_at DESC",
(cutoff,),
).fetchall()
stats["total_articles"] = len(articles)
for a in articles:
title = (a[0] or "").strip()
summary = (a[1] or "").strip()
haystack = title + " " + summary
for ticker, name in name_map.items():
if name not in haystack:
continue
if len(out[ticker]) >= max_per_ticker:
continue
out[ticker].append({
"title": title,
"summary": summary,
"press": a[2] or "",
"pub_date": a[3] or "",
})
stats["matched_pairs"] += 1
stats["hit_tickers"] = sum(1 for arts in out.values() if arts)
return out, stats

View File

@@ -0,0 +1,141 @@
"""ai_news refresh pipeline — 시총 상위 N종목 병렬 처리."""
from __future__ import annotations
import asyncio
import datetime as dt
import logging
import os
import sqlite3
import time
from typing import Any, Dict, List
from . import scraper as _scraper # legacy, kept for backward import
from . import analyzer as _analyzer
from . import articles_source # 신규
log = logging.getLogger(__name__)
DEFAULT_TOP_N = 100
DEFAULT_CONCURRENCY = 10
DEFAULT_NEWS_PER_TICKER = 5
def _top_market_cap_tickers(conn: sqlite3.Connection, n: int) -> List[str]:
rows = conn.execute(
"SELECT ticker FROM krx_master "
"WHERE market_cap IS NOT NULL AND is_preferred=0 AND is_spac=0 "
"ORDER BY market_cap DESC LIMIT ?",
(n,),
).fetchall()
return [r[0] for r in rows]
def _make_llm():
"""Anthropic AsyncClient — env에 ANTHROPIC_API_KEY 필수."""
from anthropic import AsyncAnthropic
return AsyncAnthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
async def _process_one(
ticker: str, name: str, articles: List[Dict[str, Any]],
sem: asyncio.Semaphore, llm, model: str,
) -> Dict[str, Any]:
async with sem:
return await _analyzer.score_sentiment(
llm, ticker, articles, name=name, model=model,
)
def _upsert_news_sentiment(
conn: sqlite3.Connection, asof: dt.date,
rows: List[Dict[str, Any]], *, source: str = "articles",
) -> None:
iso = asof.isoformat()
data = [
(
r["ticker"], iso, r["score_raw"], r["reason"], r["news_count"],
r["tokens_input"], r["tokens_output"], r["model"], source,
)
for r in rows
]
conn.executemany(
"""INSERT INTO news_sentiment
(ticker, date, score_raw, reason, news_count,
tokens_input, tokens_output, model, source)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ticker, date) DO UPDATE SET
score_raw=excluded.score_raw,
reason=excluded.reason,
news_count=excluded.news_count,
tokens_input=excluded.tokens_input,
tokens_output=excluded.tokens_output,
model=excluded.model,
source=excluded.source
""",
data,
)
conn.commit()
async def refresh_daily(
conn: sqlite3.Connection,
asof: dt.date,
*,
top_n: int = DEFAULT_TOP_N,
concurrency: int = DEFAULT_CONCURRENCY,
max_news_per_ticker: int = DEFAULT_NEWS_PER_TICKER,
window_days: int = 1,
model: str = _analyzer.DEFAULT_MODEL,
) -> Dict[str, Any]:
started = time.time()
tickers = _top_market_cap_tickers(conn, n=top_n)
name_map = {
r[0]: r[1] for r in conn.execute(
f"SELECT ticker, name FROM krx_master WHERE ticker IN "
f"({','.join('?' * len(tickers))})", tickers,
).fetchall()
} if tickers else {}
articles_by_ticker, mapping_stats = articles_source.gather_articles_for_tickers(
conn, tickers, asof,
window_days=window_days,
max_per_ticker=max_news_per_ticker,
)
sem = asyncio.Semaphore(concurrency)
async with _make_llm() as llm:
tasks = []
for t in tickers:
arts = articles_by_ticker.get(t, [])
if not arts:
continue # 매핑 0 — score 미생성
tasks.append(_process_one(t, name_map.get(t, t), arts, sem, llm, model))
raw_results = await asyncio.gather(*tasks, return_exceptions=True)
successes: List[Dict[str, Any]] = []
failures: List[str] = []
for r in raw_results:
if isinstance(r, BaseException):
failures.append(repr(r))
elif isinstance(r, dict):
successes.append(r)
if successes:
_upsert_news_sentiment(conn, asof, successes, source="articles")
top_pos = sorted(successes, key=lambda r: -r["score_raw"])[:5]
top_neg = sorted(successes, key=lambda r: r["score_raw"])[:5]
return {
"asof": asof.isoformat(),
"updated": len(successes),
"failures": failures,
"duration_sec": round(time.time() - started, 2),
"tokens_input": sum(r["tokens_input"] for r in successes),
"tokens_output": sum(r["tokens_output"] for r in successes),
"top_pos": top_pos,
"top_neg": top_neg,
"model": model,
"mapping": mapping_stats,
}

View File

@@ -0,0 +1,46 @@
"""[DEPRECATED] 네이버 finance 종목 뉴스 스크래핑.
본 모듈은 ai_news Phase 1 (2026-05-14) 에서 더 이상 파이프라인에서 사용되지 않음.
데이터 소스는 stock 의 articles 테이블 (ai_news/articles_source.py) 로 전환됨.
삭제 시점: Phase 2 (DART 도입) 결정 후. IC 검증 4주 누적 후 노드 활성화
여부에 따라 본 모듈을 (a) 완전 삭제 또는 (b) ensemble fallback 으로 재활용.
"""
from __future__ import annotations
import logging
from typing import Any, Dict, List
from bs4 import BeautifulSoup
log = logging.getLogger(__name__)
NAVER_NEWS_URL = "https://finance.naver.com/item/news_news.naver"
NAVER_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://finance.naver.com/",
}
async def fetch_news(client, ticker: str, n: int = 5) -> List[Dict[str, Any]]:
"""Scrape top N news headlines for a ticker. Returns [] on any failure."""
try:
r = await client.get(NAVER_NEWS_URL, params={"code": ticker, "page": 1})
except Exception as e:
log.warning("ai_news scrape http error for %s: %s", ticker, e)
return []
if r.status_code != 200:
return []
soup = BeautifulSoup(r.text, "lxml")
out: List[Dict[str, Any]] = []
for row in soup.select("table.type5 tbody tr")[:n]:
title_el = row.select_one("td.title a")
date_el = row.select_one("td.date")
if not title_el or not date_el:
continue
out.append({
"title": title_el.get_text(strip=True),
"date": date_el.get_text(strip=True),
})
return out

View File

@@ -0,0 +1,73 @@
"""ai_news Top 5/5 텔레그램 메시지 빌더 (MarkdownV2)."""
from __future__ import annotations
from typing import Any, Dict, List
_MD_SPECIAL = r"_*[]()~`>#+-=|{}.!\\"
def _escape(text: str) -> str:
return "".join("\\" + c if c in _MD_SPECIAL else c for c in str(text))
def _cost_won(tokens_input: int, tokens_output: int) -> int:
"""Claude Haiku 가격 환산 (대략): in $1/M × ₩1300, out $5/M × ₩1300."""
return int(tokens_input * 0.0013 + tokens_output * 0.0065)
def _row_line(idx: int, r: Dict[str, Any]) -> str:
score = r["score_raw"]
# score 문자열 자체를 _escape 통과 — '+', '-', '.' 모두 MarkdownV2 reserved
score_str = _escape(f"{score:+.1f}")
name = r.get("name") or ""
ticker = r["ticker"]
label = (
f"{_escape(name)} \\({_escape(ticker)}\\)"
if name else _escape(ticker)
)
return f"{idx}\\. {label} \\({score_str}\\) — {_escape(r['reason'])}"
def build_message(
*,
asof: str,
top_pos: List[Dict[str, Any]],
top_neg: List[Dict[str, Any]],
tokens_input: int,
tokens_output: int,
mapping: Dict[str, int] | None = None,
) -> str:
lines: List[str] = [
f"🌅 *AI 뉴스 분석* \\({_escape(asof)} 08:00\\)",
"",
"📈 *호재 Top 5*",
]
if top_pos:
for i, r in enumerate(top_pos, 1):
lines.append(_row_line(i, r))
else:
lines.append(_escape("- (없음)"))
lines += ["", "📉 *악재 Top 5*"]
if top_neg:
for i, r in enumerate(top_neg, 1):
lines.append(_row_line(i, r))
else:
lines.append(_escape("- (없음)"))
cost = _cost_won(tokens_input, tokens_output)
mapping_part = ""
if mapping:
mapping_part = (
f"매핑 {mapping['hit_tickers']}/100 ticker "
f"\\({mapping['matched_pairs']}쌍 / articles {mapping['total_articles']}\\) · "
)
lines += [
"",
f"_분석: 시총 상위 100종목 · {mapping_part}"
f"토큰 {tokens_input:,} in / {tokens_output:,} out · "
f"약 ₩{cost:,}_",
]
return "\n".join(lines)

View File

@@ -0,0 +1,125 @@
"""AI news sentiment validation — Spearman IC vs forward returns.
핵심 metric: 일자별 score_raw 와 다음 N일 forward return 의 Spearman 상관.
4주+ 누적 후 IC mean > 0.05 면 weight 활성화 가치 있음.
"""
from __future__ import annotations
import datetime as dt
import sqlite3
from typing import Any, Dict, List, Optional
import pandas as pd
def _spearman(a: pd.Series, b: pd.Series) -> Optional[float]:
"""Spearman rank correlation. None if insufficient/degenerate data."""
if len(a) < 5 or len(b) < 5:
return None
if a.std(ddof=0) == 0 or b.std(ddof=0) == 0:
return None
return float(a.rank().corr(b.rank()))
def compute_ic(
conn: sqlite3.Connection,
*,
days: int = 30,
horizon: int = 1,
min_news_count: int = 1,
asof_today: Optional[dt.date] = None,
) -> Dict[str, Any]:
"""Compute daily Spearman IC of ai_news.score_raw vs forward return.
Returns:
{
"horizon_days": int,
"min_news_count": int,
"window_days": int,
"ic_count": int, # 유효 일수
"ic_mean": float | None,
"ic_std": float | None,
"ic_per_day": [{"date": "YYYY-MM-DD", "ic": float, "n": int}, ...],
"verdict": "skip" | "weak" | "strong",
}
verdict:
- skip: ic_count < 10
- weak: ic_mean in [-0.05, 0.05]
- strong: |ic_mean| > 0.05
"""
asof_today = asof_today or dt.date.today()
cutoff = (asof_today - dt.timedelta(days=days)).isoformat()
sentiment = pd.read_sql_query(
"SELECT ticker, date, score_raw, news_count "
"FROM news_sentiment WHERE date >= ? AND news_count >= ? ORDER BY date",
conn, params=(cutoff, min_news_count),
)
if sentiment.empty:
return _empty_result(days, horizon, min_news_count)
# forward return 조회: 각 (ticker, date) 에 대해 close[date+horizon] / close[date] - 1
prices = pd.read_sql_query(
"SELECT ticker, date, close FROM krx_daily_prices "
"WHERE date >= ? ORDER BY ticker, date",
conn, params=(cutoff,),
)
if prices.empty:
return _empty_result(days, horizon, min_news_count)
prices = prices.sort_values(["ticker", "date"])
prices["fwd_close"] = prices.groupby("ticker", group_keys=False)["close"].shift(-horizon)
prices["fwd_ret"] = prices["fwd_close"] / prices["close"] - 1.0
merged = sentiment.merge(
prices[["ticker", "date", "fwd_ret"]], on=["ticker", "date"], how="inner"
)
merged = merged.dropna(subset=["fwd_ret"])
if merged.empty:
return _empty_result(days, horizon, min_news_count)
ic_rows: List[Dict[str, Any]] = []
for date, grp in merged.groupby("date"):
ic = _spearman(grp["score_raw"], grp["fwd_ret"])
if ic is not None:
ic_rows.append({"date": date, "ic": ic, "n": int(len(grp))})
if not ic_rows:
return _empty_result(days, horizon, min_news_count)
ic_series = pd.Series([r["ic"] for r in ic_rows], dtype=float)
ic_mean = float(ic_series.mean())
ic_std = float(ic_series.std(ddof=0)) if len(ic_series) > 1 else 0.0
if len(ic_rows) < 10:
verdict = "skip"
elif abs(ic_mean) > 0.05:
verdict = "strong"
else:
verdict = "weak"
return {
"horizon_days": horizon,
"min_news_count": min_news_count,
"window_days": days,
"ic_count": len(ic_rows),
"ic_mean": round(ic_mean, 4),
"ic_std": round(ic_std, 4),
"ic_per_day": ic_rows,
"verdict": verdict,
}
def _empty_result(days: int, horizon: int, min_news_count: int) -> Dict[str, Any]:
return {
"horizon_days": horizon,
"min_news_count": min_news_count,
"window_days": days,
"ic_count": 0,
"ic_mean": None,
"ic_std": None,
"ic_per_day": [],
"verdict": "skip",
}