Files
web-page-backend/stock-lab/app/screener/schema.py
gahusb 943f676414 fix(ai_news): set weight=0 and add Spearman IC validation harness
검증 전 gradient 차단 + IC 측정 인프라.

- schema.py: DEFAULT_WEIGHTS["ai_news"] 0.8 → 0.0
  + 1회성 migration: 기존 운영 row 의 0.8 값 자동 reset
  (사용자가 명시 조정한 다른 값은 그대로 유지)
- ai_news/validation.py: compute_ic() — 일자별 score_raw × forward
  return Spearman 상관, ic_mean/ic_std/ic_per_day 반환, verdict 분류
  (skip/weak/strong)
- router.py: GET /api/stock/screener/ai-news/ic?days=30&horizon=1
- 단위 테스트 5개: empty DB, strong +IC, random ≈0 IC, min_news_count
  필터, horizon=5

배경: adversarial review 결과 — ai_news 가중치 0.8 이 검증 없이 출시됨.
4주+ 데이터 누적 후 IC > 0.05 확인 전까지 데이터 수집은 계속하되
가중합 영향만 차단. 운영 DB row 의 0.8 → 0.0 자동 reset 도 같은 의도.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 01:06:02 +09:00

179 lines
6.0 KiB
Python

"""Screener schema bootstrap. Called once at module import via db.py."""
import json
import sqlite3
from datetime import datetime, timezone
DEFAULT_WEIGHTS = {
"foreign_buy": 1.0,
"volume_surge": 1.0,
"momentum": 1.0,
"high52w": 1.2,
"rs_rating": 1.2,
"ma_alignment": 1.0,
"vcp_lite": 0.8,
# ai_news: 검증 전 gradient 차단 (4주 IC > 0.05 확인 후 활성화).
# 데이터 수집은 계속, 가중합 영향만 0.
"ai_news": 0.0,
}
DEFAULT_NODE_PARAMS = {
"foreign_buy": {"window_days": 5},
"volume_surge": {"baseline_days": 20, "eval_days": 3},
"momentum": {"window_days": 20},
"high52w": {"window_days": 252},
"rs_rating": {"weights": {"3m": 2, "6m": 1, "9m": 1, "12m": 1}},
"ma_alignment": {"ma_periods": [50, 150, 200]},
"vcp_lite": {"short_window": 40, "long_window": 252},
"ai_news": {"min_news_count": 1},
}
DEFAULT_GATE_PARAMS = {
"min_market_cap_won": 50_000_000_000,
"min_avg_value_won": 500_000_000,
"min_listed_days": 60,
"skip_managed": True,
"skip_preferred": True,
"skip_spac": True,
"skip_halted_days": 3,
}
DDL = """
CREATE TABLE IF NOT EXISTS krx_master (
ticker TEXT PRIMARY KEY,
name TEXT NOT NULL,
market TEXT NOT NULL,
market_cap INTEGER,
is_managed INTEGER NOT NULL DEFAULT 0,
is_preferred INTEGER NOT NULL DEFAULT 0,
is_spac INTEGER NOT NULL DEFAULT 0,
listed_date TEXT,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS krx_daily_prices (
ticker TEXT NOT NULL,
date TEXT NOT NULL,
open INTEGER, high INTEGER, low INTEGER, close INTEGER,
volume INTEGER,
value INTEGER,
PRIMARY KEY (ticker, date)
);
CREATE INDEX IF NOT EXISTS idx_prices_date ON krx_daily_prices(date);
CREATE TABLE IF NOT EXISTS krx_flow (
ticker TEXT NOT NULL,
date TEXT NOT NULL,
foreign_net INTEGER,
institution_net INTEGER,
PRIMARY KEY (ticker, date)
);
CREATE INDEX IF NOT EXISTS idx_flow_date ON krx_flow(date);
CREATE TABLE IF NOT EXISTS screener_settings (
id INTEGER PRIMARY KEY CHECK (id = 1),
weights_json TEXT NOT NULL,
node_params_json TEXT NOT NULL,
gate_params_json TEXT NOT NULL,
top_n INTEGER NOT NULL DEFAULT 20,
rr_ratio REAL NOT NULL DEFAULT 2.0,
atr_window INTEGER NOT NULL DEFAULT 14,
atr_stop_mult REAL NOT NULL DEFAULT 2.0,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS screener_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
asof TEXT NOT NULL,
mode TEXT NOT NULL,
status TEXT NOT NULL,
error TEXT,
started_at TEXT NOT NULL,
finished_at TEXT,
weights_json TEXT NOT NULL,
node_params_json TEXT NOT NULL,
gate_params_json TEXT NOT NULL,
top_n INTEGER NOT NULL,
survivors_count INTEGER,
telegram_sent INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_runs_asof ON screener_runs(asof DESC);
CREATE TABLE IF NOT EXISTS screener_results (
run_id INTEGER NOT NULL,
rank INTEGER NOT NULL,
ticker TEXT NOT NULL,
name TEXT NOT NULL,
total_score REAL NOT NULL,
scores_json TEXT NOT NULL,
close INTEGER,
market_cap INTEGER,
entry_price INTEGER,
stop_price INTEGER,
target_price INTEGER,
atr14 REAL,
PRIMARY KEY (run_id, ticker),
FOREIGN KEY (run_id) REFERENCES screener_runs(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_results_run_rank ON screener_results(run_id, rank);
CREATE TABLE IF NOT EXISTS news_sentiment (
ticker TEXT NOT NULL,
date TEXT NOT NULL,
score_raw REAL NOT NULL,
reason TEXT NOT NULL DEFAULT '',
news_count INTEGER NOT NULL DEFAULT 0,
tokens_input INTEGER NOT NULL DEFAULT 0,
tokens_output INTEGER NOT NULL DEFAULT 0,
model TEXT NOT NULL DEFAULT 'claude-haiku-4-5-20251001',
created_at TEXT NOT NULL DEFAULT (datetime('now','localtime')),
PRIMARY KEY (ticker, date)
);
CREATE INDEX IF NOT EXISTS idx_news_sentiment_date ON news_sentiment(date DESC);
"""
def ensure_screener_schema(conn: sqlite3.Connection) -> None:
"""Create tables and seed default settings (idempotent)."""
conn.executescript(DDL)
# ai_news 키 누락 시 1회 보충 (이미 운영 중인 환경에 대해)
row = conn.execute(
"SELECT weights_json, node_params_json FROM screener_settings WHERE id=1"
).fetchone()
if row is not None:
w = json.loads(row[0])
p = json.loads(row[1])
changed = False
if "ai_news" not in w:
w["ai_news"] = DEFAULT_WEIGHTS["ai_news"]
changed = True
# One-time reset: ai_news default 0.8 → 0.0 (검증 전 gradient 차단).
# 사용자가 명시적으로 0.8 외 값을 설정했다면 영향 없음.
elif w.get("ai_news") == 0.8:
w["ai_news"] = 0.0
changed = True
if "ai_news" not in p:
p["ai_news"] = DEFAULT_NODE_PARAMS["ai_news"]
changed = True
if changed:
conn.execute(
"UPDATE screener_settings SET weights_json=?, node_params_json=? WHERE id=1",
(json.dumps(w), json.dumps(p)),
)
existing = conn.execute("SELECT id FROM screener_settings WHERE id=1").fetchone()
if existing is None:
now = datetime.now(timezone.utc).isoformat()
conn.execute(
"""
INSERT INTO screener_settings (
id, weights_json, node_params_json, gate_params_json,
top_n, rr_ratio, atr_window, atr_stop_mult, updated_at
) VALUES (1, ?, ?, ?, 20, 2.0, 14, 2.0, ?)
""",
(
json.dumps(DEFAULT_WEIGHTS),
json.dumps(DEFAULT_NODE_PARAMS),
json.dumps(DEFAULT_GATE_PARAMS),
now,
),
)
conn.commit()