refactor: rename stock-lab → stock (graduation)
- git mv stock-lab/ → stock/ - docker-compose.yml: 서비스 키 + container_name + build.context + frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL - agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/ STOCK_LAB_URL → STOCK_URL - nginx/default.conf: proxy_pass http://stock-lab → http://stock (3 lines) - CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신 - stock/ 내부 자기 참조 갱신 lab 네이밍 정책 (feedback_lab_naming.md) graduation. API URL / Python import / DB 파일명 변경 없음.
This commit is contained in:
0
stock/app/screener/ai_news/__init__.py
Normal file
0
stock/app/screener/ai_news/__init__.py
Normal file
103
stock/app/screener/ai_news/analyzer.py
Normal file
103
stock/app/screener/ai_news/analyzer.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Claude Haiku 기반 종목 뉴스 호재/악재 분석."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_MODEL = os.getenv("AI_NEWS_MODEL", "claude-haiku-4-5-20251001")
|
||||
|
||||
PROMPT_TEMPLATE = """다음은 종목 {name}({ticker})에 대한 최근 뉴스 {n}개의 헤드라인입니다.
|
||||
|
||||
{news_block}
|
||||
|
||||
이 뉴스들이 종목에 호재인지 악재인지 평가하세요.
|
||||
score: -10(매우 강한 악재) ~ +10(매우 강한 호재) 사이의 실수. 0은 중립.
|
||||
reason: 30자 이내 한 줄 근거.
|
||||
|
||||
JSON으로만 응답하세요. 다른 텍스트 금지:
|
||||
{{"score": <float>, "reason": "<string>"}}"""
|
||||
|
||||
|
||||
def _clamp(x: float, lo: float = -10.0, hi: float = 10.0) -> float:
|
||||
return max(lo, min(hi, x))
|
||||
|
||||
|
||||
def _format_news_block(news: List[Dict[str, Any]]) -> str:
|
||||
"""news dict 리스트 → prompt 에 들어가는 텍스트 블록.
|
||||
|
||||
summary 가 있으면 title 다음 줄에 indent 해서 포함 (최대 200자).
|
||||
pub_date 가 있으면 title 앞에 표시.
|
||||
"""
|
||||
lines: List[str] = []
|
||||
for n in news:
|
||||
date = (n.get("pub_date") or "").strip()
|
||||
title = (n.get("title") or "").strip()
|
||||
summary = (n.get("summary") or "").strip()
|
||||
prefix = f"[{date}] " if date else ""
|
||||
if summary:
|
||||
lines.append(f"- {prefix}{title}\n {summary[:200]}")
|
||||
else:
|
||||
lines.append(f"- {prefix}{title}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def score_sentiment(
|
||||
llm,
|
||||
ticker: str,
|
||||
news: List[Dict[str, Any]],
|
||||
*,
|
||||
name: str | None = None,
|
||||
model: str = DEFAULT_MODEL,
|
||||
) -> Dict[str, Any]:
|
||||
"""Returns {ticker, score_raw, reason, news_count, tokens_input, tokens_output, model}."""
|
||||
news_block = _format_news_block(news)
|
||||
prompt = PROMPT_TEMPLATE.format(
|
||||
name=name or ticker, ticker=ticker,
|
||||
n=len(news), news_block=news_block,
|
||||
)
|
||||
resp = await llm.messages.create(
|
||||
model=model,
|
||||
max_tokens=200,
|
||||
temperature=0,
|
||||
system="너는 한국 주식 뉴스 감성 분석가다. JSON 객체 하나만 반환한다.",
|
||||
messages=[
|
||||
{"role": "user", "content": prompt},
|
||||
# Assistant prefill — 첫 토큰을 강제로 '{' 로 시작해 JSON 응답을 보장
|
||||
{"role": "assistant", "content": "{"},
|
||||
],
|
||||
)
|
||||
raw = resp.content[0].text if resp.content else ""
|
||||
# prefill '{' 이 응답에 포함되지 않으므로 다시 붙임
|
||||
text = "{" + raw if not raw.lstrip().startswith("{") else raw
|
||||
in_tokens = int(getattr(resp.usage, "input_tokens", 0) or 0)
|
||||
out_tokens = int(getattr(resp.usage, "output_tokens", 0) or 0)
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
score = _clamp(float(data["score"]))
|
||||
reason = str(data["reason"])[:200]
|
||||
return {
|
||||
"ticker": ticker,
|
||||
"score_raw": score,
|
||||
"reason": reason,
|
||||
"news_count": len(news),
|
||||
"tokens_input": in_tokens,
|
||||
"tokens_output": out_tokens,
|
||||
"model": model,
|
||||
}
|
||||
except (json.JSONDecodeError, KeyError, TypeError, ValueError) as e:
|
||||
log.warning("ai_news parse fail for %s: %s (raw=%r)", ticker, e, text[:100])
|
||||
return {
|
||||
"ticker": ticker,
|
||||
"score_raw": 0.0,
|
||||
"reason": f"parse fail: {e!s}"[:200],
|
||||
"news_count": len(news),
|
||||
"tokens_input": in_tokens,
|
||||
"tokens_output": out_tokens,
|
||||
"model": model,
|
||||
}
|
||||
70
stock/app/screener/ai_news/articles_source.py
Normal file
70
stock/app/screener/ai_news/articles_source.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""기존 articles 테이블에서 종목별 뉴스 매핑."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import logging
|
||||
import sqlite3
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def gather_articles_for_tickers(
|
||||
conn: sqlite3.Connection,
|
||||
tickers: List[str],
|
||||
asof: dt.date,
|
||||
*,
|
||||
window_days: int = 1,
|
||||
max_per_ticker: int = 5,
|
||||
) -> Tuple[Dict[str, List[Dict[str, Any]]], Dict[str, int]]:
|
||||
"""articles 에서 ticker.name substring 매칭으로 종목별 뉴스 dict 반환.
|
||||
|
||||
Returns:
|
||||
(
|
||||
{ticker: [{"title": str, "summary": str, "press": str, "pub_date": str}, ...]},
|
||||
{"total_articles": int, "matched_pairs": int, "hit_tickers": int},
|
||||
)
|
||||
"""
|
||||
out: Dict[str, List[Dict[str, Any]]] = {t: [] for t in tickers}
|
||||
stats = {"total_articles": 0, "matched_pairs": 0, "hit_tickers": 0}
|
||||
|
||||
if not tickers:
|
||||
return out, stats
|
||||
|
||||
cutoff = (asof - dt.timedelta(days=window_days)).isoformat()
|
||||
|
||||
placeholders = ",".join("?" * len(tickers))
|
||||
name_rows = conn.execute(
|
||||
f"SELECT ticker, name FROM krx_master WHERE ticker IN ({placeholders})",
|
||||
tickers,
|
||||
).fetchall()
|
||||
# 2글자 미만 회사명은 false positive 위험으로 제외
|
||||
name_map = {r[0]: r[1] for r in name_rows if r[1] and len(r[1]) >= 2}
|
||||
|
||||
articles = conn.execute(
|
||||
"SELECT title, summary, press, pub_date, crawled_at "
|
||||
"FROM articles WHERE crawled_at >= ? ORDER BY crawled_at DESC",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
stats["total_articles"] = len(articles)
|
||||
|
||||
for a in articles:
|
||||
title = (a[0] or "").strip()
|
||||
summary = (a[1] or "").strip()
|
||||
haystack = title + " " + summary
|
||||
for ticker, name in name_map.items():
|
||||
if name not in haystack:
|
||||
continue
|
||||
if len(out[ticker]) >= max_per_ticker:
|
||||
continue
|
||||
out[ticker].append({
|
||||
"title": title,
|
||||
"summary": summary,
|
||||
"press": a[2] or "",
|
||||
"pub_date": a[3] or "",
|
||||
})
|
||||
stats["matched_pairs"] += 1
|
||||
|
||||
stats["hit_tickers"] = sum(1 for arts in out.values() if arts)
|
||||
return out, stats
|
||||
141
stock/app/screener/ai_news/pipeline.py
Normal file
141
stock/app/screener/ai_news/pipeline.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""ai_news refresh pipeline — 시총 상위 N종목 병렬 처리."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import datetime as dt
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from . import scraper as _scraper # legacy, kept for backward import
|
||||
from . import analyzer as _analyzer
|
||||
from . import articles_source # 신규
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_TOP_N = 100
|
||||
DEFAULT_CONCURRENCY = 10
|
||||
DEFAULT_NEWS_PER_TICKER = 5
|
||||
|
||||
|
||||
def _top_market_cap_tickers(conn: sqlite3.Connection, n: int) -> List[str]:
|
||||
rows = conn.execute(
|
||||
"SELECT ticker FROM krx_master "
|
||||
"WHERE market_cap IS NOT NULL AND is_preferred=0 AND is_spac=0 "
|
||||
"ORDER BY market_cap DESC LIMIT ?",
|
||||
(n,),
|
||||
).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
|
||||
def _make_llm():
|
||||
"""Anthropic AsyncClient — env에 ANTHROPIC_API_KEY 필수."""
|
||||
from anthropic import AsyncAnthropic
|
||||
return AsyncAnthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
||||
|
||||
|
||||
async def _process_one(
|
||||
ticker: str, name: str, articles: List[Dict[str, Any]],
|
||||
sem: asyncio.Semaphore, llm, model: str,
|
||||
) -> Dict[str, Any]:
|
||||
async with sem:
|
||||
return await _analyzer.score_sentiment(
|
||||
llm, ticker, articles, name=name, model=model,
|
||||
)
|
||||
|
||||
|
||||
def _upsert_news_sentiment(
|
||||
conn: sqlite3.Connection, asof: dt.date,
|
||||
rows: List[Dict[str, Any]], *, source: str = "articles",
|
||||
) -> None:
|
||||
iso = asof.isoformat()
|
||||
data = [
|
||||
(
|
||||
r["ticker"], iso, r["score_raw"], r["reason"], r["news_count"],
|
||||
r["tokens_input"], r["tokens_output"], r["model"], source,
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
conn.executemany(
|
||||
"""INSERT INTO news_sentiment
|
||||
(ticker, date, score_raw, reason, news_count,
|
||||
tokens_input, tokens_output, model, source)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(ticker, date) DO UPDATE SET
|
||||
score_raw=excluded.score_raw,
|
||||
reason=excluded.reason,
|
||||
news_count=excluded.news_count,
|
||||
tokens_input=excluded.tokens_input,
|
||||
tokens_output=excluded.tokens_output,
|
||||
model=excluded.model,
|
||||
source=excluded.source
|
||||
""",
|
||||
data,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
async def refresh_daily(
|
||||
conn: sqlite3.Connection,
|
||||
asof: dt.date,
|
||||
*,
|
||||
top_n: int = DEFAULT_TOP_N,
|
||||
concurrency: int = DEFAULT_CONCURRENCY,
|
||||
max_news_per_ticker: int = DEFAULT_NEWS_PER_TICKER,
|
||||
window_days: int = 1,
|
||||
model: str = _analyzer.DEFAULT_MODEL,
|
||||
) -> Dict[str, Any]:
|
||||
started = time.time()
|
||||
tickers = _top_market_cap_tickers(conn, n=top_n)
|
||||
name_map = {
|
||||
r[0]: r[1] for r in conn.execute(
|
||||
f"SELECT ticker, name FROM krx_master WHERE ticker IN "
|
||||
f"({','.join('?' * len(tickers))})", tickers,
|
||||
).fetchall()
|
||||
} if tickers else {}
|
||||
|
||||
articles_by_ticker, mapping_stats = articles_source.gather_articles_for_tickers(
|
||||
conn, tickers, asof,
|
||||
window_days=window_days,
|
||||
max_per_ticker=max_news_per_ticker,
|
||||
)
|
||||
|
||||
sem = asyncio.Semaphore(concurrency)
|
||||
async with _make_llm() as llm:
|
||||
tasks = []
|
||||
for t in tickers:
|
||||
arts = articles_by_ticker.get(t, [])
|
||||
if not arts:
|
||||
continue # 매핑 0 — score 미생성
|
||||
tasks.append(_process_one(t, name_map.get(t, t), arts, sem, llm, model))
|
||||
raw_results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
successes: List[Dict[str, Any]] = []
|
||||
failures: List[str] = []
|
||||
for r in raw_results:
|
||||
if isinstance(r, BaseException):
|
||||
failures.append(repr(r))
|
||||
elif isinstance(r, dict):
|
||||
successes.append(r)
|
||||
|
||||
if successes:
|
||||
_upsert_news_sentiment(conn, asof, successes, source="articles")
|
||||
|
||||
top_pos = sorted(successes, key=lambda r: -r["score_raw"])[:5]
|
||||
top_neg = sorted(successes, key=lambda r: r["score_raw"])[:5]
|
||||
|
||||
return {
|
||||
"asof": asof.isoformat(),
|
||||
"updated": len(successes),
|
||||
"failures": failures,
|
||||
"duration_sec": round(time.time() - started, 2),
|
||||
"tokens_input": sum(r["tokens_input"] for r in successes),
|
||||
"tokens_output": sum(r["tokens_output"] for r in successes),
|
||||
"top_pos": top_pos,
|
||||
"top_neg": top_neg,
|
||||
"model": model,
|
||||
"mapping": mapping_stats,
|
||||
}
|
||||
46
stock/app/screener/ai_news/scraper.py
Normal file
46
stock/app/screener/ai_news/scraper.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""[DEPRECATED] 네이버 finance 종목 뉴스 스크래핑.
|
||||
|
||||
본 모듈은 ai_news Phase 1 (2026-05-14) 에서 더 이상 파이프라인에서 사용되지 않음.
|
||||
데이터 소스는 stock 의 articles 테이블 (ai_news/articles_source.py) 로 전환됨.
|
||||
|
||||
삭제 시점: Phase 2 (DART 도입) 결정 후. IC 검증 4주 누적 후 노드 활성화
|
||||
여부에 따라 본 모듈을 (a) 완전 삭제 또는 (b) ensemble fallback 으로 재활용.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
NAVER_NEWS_URL = "https://finance.naver.com/item/news_news.naver"
|
||||
NAVER_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
"Referer": "https://finance.naver.com/",
|
||||
}
|
||||
|
||||
|
||||
async def fetch_news(client, ticker: str, n: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Scrape top N news headlines for a ticker. Returns [] on any failure."""
|
||||
try:
|
||||
r = await client.get(NAVER_NEWS_URL, params={"code": ticker, "page": 1})
|
||||
except Exception as e:
|
||||
log.warning("ai_news scrape http error for %s: %s", ticker, e)
|
||||
return []
|
||||
if r.status_code != 200:
|
||||
return []
|
||||
soup = BeautifulSoup(r.text, "lxml")
|
||||
out: List[Dict[str, Any]] = []
|
||||
for row in soup.select("table.type5 tbody tr")[:n]:
|
||||
title_el = row.select_one("td.title a")
|
||||
date_el = row.select_one("td.date")
|
||||
if not title_el or not date_el:
|
||||
continue
|
||||
out.append({
|
||||
"title": title_el.get_text(strip=True),
|
||||
"date": date_el.get_text(strip=True),
|
||||
})
|
||||
return out
|
||||
73
stock/app/screener/ai_news/telegram.py
Normal file
73
stock/app/screener/ai_news/telegram.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""ai_news Top 5/5 텔레그램 메시지 빌더 (MarkdownV2)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
_MD_SPECIAL = r"_*[]()~`>#+-=|{}.!\\"
|
||||
|
||||
|
||||
def _escape(text: str) -> str:
|
||||
return "".join("\\" + c if c in _MD_SPECIAL else c for c in str(text))
|
||||
|
||||
|
||||
def _cost_won(tokens_input: int, tokens_output: int) -> int:
|
||||
"""Claude Haiku 가격 환산 (대략): in $1/M × ₩1300, out $5/M × ₩1300."""
|
||||
return int(tokens_input * 0.0013 + tokens_output * 0.0065)
|
||||
|
||||
|
||||
def _row_line(idx: int, r: Dict[str, Any]) -> str:
|
||||
score = r["score_raw"]
|
||||
# score 문자열 자체를 _escape 통과 — '+', '-', '.' 모두 MarkdownV2 reserved
|
||||
score_str = _escape(f"{score:+.1f}")
|
||||
name = r.get("name") or ""
|
||||
ticker = r["ticker"]
|
||||
label = (
|
||||
f"{_escape(name)} \\({_escape(ticker)}\\)"
|
||||
if name else _escape(ticker)
|
||||
)
|
||||
return f"{idx}\\. {label} \\({score_str}\\) — {_escape(r['reason'])}"
|
||||
|
||||
|
||||
def build_message(
|
||||
*,
|
||||
asof: str,
|
||||
top_pos: List[Dict[str, Any]],
|
||||
top_neg: List[Dict[str, Any]],
|
||||
tokens_input: int,
|
||||
tokens_output: int,
|
||||
mapping: Dict[str, int] | None = None,
|
||||
) -> str:
|
||||
lines: List[str] = [
|
||||
f"🌅 *AI 뉴스 분석* \\({_escape(asof)} 08:00\\)",
|
||||
"",
|
||||
"📈 *호재 Top 5*",
|
||||
]
|
||||
if top_pos:
|
||||
for i, r in enumerate(top_pos, 1):
|
||||
lines.append(_row_line(i, r))
|
||||
else:
|
||||
lines.append(_escape("- (없음)"))
|
||||
|
||||
lines += ["", "📉 *악재 Top 5*"]
|
||||
if top_neg:
|
||||
for i, r in enumerate(top_neg, 1):
|
||||
lines.append(_row_line(i, r))
|
||||
else:
|
||||
lines.append(_escape("- (없음)"))
|
||||
|
||||
cost = _cost_won(tokens_input, tokens_output)
|
||||
mapping_part = ""
|
||||
if mapping:
|
||||
mapping_part = (
|
||||
f"매핑 {mapping['hit_tickers']}/100 ticker "
|
||||
f"\\({mapping['matched_pairs']}쌍 / articles {mapping['total_articles']}건\\) · "
|
||||
)
|
||||
lines += [
|
||||
"",
|
||||
f"_분석: 시총 상위 100종목 · {mapping_part}"
|
||||
f"토큰 {tokens_input:,} in / {tokens_output:,} out · "
|
||||
f"약 ₩{cost:,}_",
|
||||
]
|
||||
return "\n".join(lines)
|
||||
125
stock/app/screener/ai_news/validation.py
Normal file
125
stock/app/screener/ai_news/validation.py
Normal file
@@ -0,0 +1,125 @@
|
||||
"""AI news sentiment validation — Spearman IC vs forward returns.
|
||||
|
||||
핵심 metric: 일자별 score_raw 와 다음 N일 forward return 의 Spearman 상관.
|
||||
4주+ 누적 후 IC mean > 0.05 면 weight 활성화 가치 있음.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import sqlite3
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _spearman(a: pd.Series, b: pd.Series) -> Optional[float]:
|
||||
"""Spearman rank correlation. None if insufficient/degenerate data."""
|
||||
if len(a) < 5 or len(b) < 5:
|
||||
return None
|
||||
if a.std(ddof=0) == 0 or b.std(ddof=0) == 0:
|
||||
return None
|
||||
return float(a.rank().corr(b.rank()))
|
||||
|
||||
|
||||
def compute_ic(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
days: int = 30,
|
||||
horizon: int = 1,
|
||||
min_news_count: int = 1,
|
||||
asof_today: Optional[dt.date] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute daily Spearman IC of ai_news.score_raw vs forward return.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"horizon_days": int,
|
||||
"min_news_count": int,
|
||||
"window_days": int,
|
||||
"ic_count": int, # 유효 일수
|
||||
"ic_mean": float | None,
|
||||
"ic_std": float | None,
|
||||
"ic_per_day": [{"date": "YYYY-MM-DD", "ic": float, "n": int}, ...],
|
||||
"verdict": "skip" | "weak" | "strong",
|
||||
}
|
||||
|
||||
verdict:
|
||||
- skip: ic_count < 10
|
||||
- weak: ic_mean in [-0.05, 0.05]
|
||||
- strong: |ic_mean| > 0.05
|
||||
"""
|
||||
asof_today = asof_today or dt.date.today()
|
||||
cutoff = (asof_today - dt.timedelta(days=days)).isoformat()
|
||||
|
||||
sentiment = pd.read_sql_query(
|
||||
"SELECT ticker, date, score_raw, news_count "
|
||||
"FROM news_sentiment WHERE date >= ? AND news_count >= ? ORDER BY date",
|
||||
conn, params=(cutoff, min_news_count),
|
||||
)
|
||||
if sentiment.empty:
|
||||
return _empty_result(days, horizon, min_news_count)
|
||||
|
||||
# forward return 조회: 각 (ticker, date) 에 대해 close[date+horizon] / close[date] - 1
|
||||
prices = pd.read_sql_query(
|
||||
"SELECT ticker, date, close FROM krx_daily_prices "
|
||||
"WHERE date >= ? ORDER BY ticker, date",
|
||||
conn, params=(cutoff,),
|
||||
)
|
||||
if prices.empty:
|
||||
return _empty_result(days, horizon, min_news_count)
|
||||
|
||||
prices = prices.sort_values(["ticker", "date"])
|
||||
prices["fwd_close"] = prices.groupby("ticker", group_keys=False)["close"].shift(-horizon)
|
||||
prices["fwd_ret"] = prices["fwd_close"] / prices["close"] - 1.0
|
||||
|
||||
merged = sentiment.merge(
|
||||
prices[["ticker", "date", "fwd_ret"]], on=["ticker", "date"], how="inner"
|
||||
)
|
||||
merged = merged.dropna(subset=["fwd_ret"])
|
||||
if merged.empty:
|
||||
return _empty_result(days, horizon, min_news_count)
|
||||
|
||||
ic_rows: List[Dict[str, Any]] = []
|
||||
for date, grp in merged.groupby("date"):
|
||||
ic = _spearman(grp["score_raw"], grp["fwd_ret"])
|
||||
if ic is not None:
|
||||
ic_rows.append({"date": date, "ic": ic, "n": int(len(grp))})
|
||||
|
||||
if not ic_rows:
|
||||
return _empty_result(days, horizon, min_news_count)
|
||||
|
||||
ic_series = pd.Series([r["ic"] for r in ic_rows], dtype=float)
|
||||
ic_mean = float(ic_series.mean())
|
||||
ic_std = float(ic_series.std(ddof=0)) if len(ic_series) > 1 else 0.0
|
||||
|
||||
if len(ic_rows) < 10:
|
||||
verdict = "skip"
|
||||
elif abs(ic_mean) > 0.05:
|
||||
verdict = "strong"
|
||||
else:
|
||||
verdict = "weak"
|
||||
|
||||
return {
|
||||
"horizon_days": horizon,
|
||||
"min_news_count": min_news_count,
|
||||
"window_days": days,
|
||||
"ic_count": len(ic_rows),
|
||||
"ic_mean": round(ic_mean, 4),
|
||||
"ic_std": round(ic_std, 4),
|
||||
"ic_per_day": ic_rows,
|
||||
"verdict": verdict,
|
||||
}
|
||||
|
||||
|
||||
def _empty_result(days: int, horizon: int, min_news_count: int) -> Dict[str, Any]:
|
||||
return {
|
||||
"horizon_days": horizon,
|
||||
"min_news_count": min_news_count,
|
||||
"window_days": days,
|
||||
"ic_count": 0,
|
||||
"ic_mean": None,
|
||||
"ic_std": None,
|
||||
"ic_per_day": [],
|
||||
"verdict": "skip",
|
||||
}
|
||||
Reference in New Issue
Block a user