- git mv stock-lab/ → stock/ - docker-compose.yml: 서비스 키 + container_name + build.context + frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL - agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/ STOCK_LAB_URL → STOCK_URL - nginx/default.conf: proxy_pass http://stock-lab → http://stock (3 lines) - CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신 - stock/ 내부 자기 참조 갱신 lab 네이밍 정책 (feedback_lab_naming.md) graduation. API URL / Python import / DB 파일명 변경 없음.
126 lines
3.8 KiB
Python
126 lines
3.8 KiB
Python
"""AI news sentiment validation — Spearman IC vs forward returns.
|
|
|
|
핵심 metric: 일자별 score_raw 와 다음 N일 forward return 의 Spearman 상관.
|
|
4주+ 누적 후 IC mean > 0.05 면 weight 활성화 가치 있음.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import datetime as dt
|
|
import sqlite3
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import pandas as pd
|
|
|
|
|
|
def _spearman(a: pd.Series, b: pd.Series) -> Optional[float]:
|
|
"""Spearman rank correlation. None if insufficient/degenerate data."""
|
|
if len(a) < 5 or len(b) < 5:
|
|
return None
|
|
if a.std(ddof=0) == 0 or b.std(ddof=0) == 0:
|
|
return None
|
|
return float(a.rank().corr(b.rank()))
|
|
|
|
|
|
def compute_ic(
|
|
conn: sqlite3.Connection,
|
|
*,
|
|
days: int = 30,
|
|
horizon: int = 1,
|
|
min_news_count: int = 1,
|
|
asof_today: Optional[dt.date] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Compute daily Spearman IC of ai_news.score_raw vs forward return.
|
|
|
|
Returns:
|
|
{
|
|
"horizon_days": int,
|
|
"min_news_count": int,
|
|
"window_days": int,
|
|
"ic_count": int, # 유효 일수
|
|
"ic_mean": float | None,
|
|
"ic_std": float | None,
|
|
"ic_per_day": [{"date": "YYYY-MM-DD", "ic": float, "n": int}, ...],
|
|
"verdict": "skip" | "weak" | "strong",
|
|
}
|
|
|
|
verdict:
|
|
- skip: ic_count < 10
|
|
- weak: ic_mean in [-0.05, 0.05]
|
|
- strong: |ic_mean| > 0.05
|
|
"""
|
|
asof_today = asof_today or dt.date.today()
|
|
cutoff = (asof_today - dt.timedelta(days=days)).isoformat()
|
|
|
|
sentiment = pd.read_sql_query(
|
|
"SELECT ticker, date, score_raw, news_count "
|
|
"FROM news_sentiment WHERE date >= ? AND news_count >= ? ORDER BY date",
|
|
conn, params=(cutoff, min_news_count),
|
|
)
|
|
if sentiment.empty:
|
|
return _empty_result(days, horizon, min_news_count)
|
|
|
|
# forward return 조회: 각 (ticker, date) 에 대해 close[date+horizon] / close[date] - 1
|
|
prices = pd.read_sql_query(
|
|
"SELECT ticker, date, close FROM krx_daily_prices "
|
|
"WHERE date >= ? ORDER BY ticker, date",
|
|
conn, params=(cutoff,),
|
|
)
|
|
if prices.empty:
|
|
return _empty_result(days, horizon, min_news_count)
|
|
|
|
prices = prices.sort_values(["ticker", "date"])
|
|
prices["fwd_close"] = prices.groupby("ticker", group_keys=False)["close"].shift(-horizon)
|
|
prices["fwd_ret"] = prices["fwd_close"] / prices["close"] - 1.0
|
|
|
|
merged = sentiment.merge(
|
|
prices[["ticker", "date", "fwd_ret"]], on=["ticker", "date"], how="inner"
|
|
)
|
|
merged = merged.dropna(subset=["fwd_ret"])
|
|
if merged.empty:
|
|
return _empty_result(days, horizon, min_news_count)
|
|
|
|
ic_rows: List[Dict[str, Any]] = []
|
|
for date, grp in merged.groupby("date"):
|
|
ic = _spearman(grp["score_raw"], grp["fwd_ret"])
|
|
if ic is not None:
|
|
ic_rows.append({"date": date, "ic": ic, "n": int(len(grp))})
|
|
|
|
if not ic_rows:
|
|
return _empty_result(days, horizon, min_news_count)
|
|
|
|
ic_series = pd.Series([r["ic"] for r in ic_rows], dtype=float)
|
|
ic_mean = float(ic_series.mean())
|
|
ic_std = float(ic_series.std(ddof=0)) if len(ic_series) > 1 else 0.0
|
|
|
|
if len(ic_rows) < 10:
|
|
verdict = "skip"
|
|
elif abs(ic_mean) > 0.05:
|
|
verdict = "strong"
|
|
else:
|
|
verdict = "weak"
|
|
|
|
return {
|
|
"horizon_days": horizon,
|
|
"min_news_count": min_news_count,
|
|
"window_days": days,
|
|
"ic_count": len(ic_rows),
|
|
"ic_mean": round(ic_mean, 4),
|
|
"ic_std": round(ic_std, 4),
|
|
"ic_per_day": ic_rows,
|
|
"verdict": verdict,
|
|
}
|
|
|
|
|
|
def _empty_result(days: int, horizon: int, min_news_count: int) -> Dict[str, Any]:
|
|
return {
|
|
"horizon_days": horizon,
|
|
"min_news_count": min_news_count,
|
|
"window_days": days,
|
|
"ic_count": 0,
|
|
"ic_mean": None,
|
|
"ic_std": None,
|
|
"ic_per_day": [],
|
|
"verdict": "skip",
|
|
}
|