"""Tests for ai_news validation harness (Spearman IC).""" import datetime as dt import sqlite3 import pytest from app.screener.ai_news import validation from app.screener.schema import ensure_screener_schema @pytest.fixture def conn(): c = sqlite3.connect(":memory:") c.row_factory = sqlite3.Row ensure_screener_schema(c) yield c c.close() def _seed_sentiment(conn, date, ticker, score, news_count=3): conn.execute( "INSERT INTO news_sentiment (ticker, date, score_raw, reason, news_count, " "tokens_input, tokens_output, model) " "VALUES (?, ?, ?, 'r', ?, 100, 20, 'm')", (ticker, date, score, news_count), ) def _seed_price(conn, ticker, date, close): conn.execute( "INSERT INTO krx_daily_prices (ticker, date, close) VALUES (?, ?, ?)", (ticker, date, close), ) def test_empty_db_returns_skip(conn): out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14)) assert out["ic_count"] == 0 assert out["verdict"] == "skip" assert out["ic_mean"] is None def test_strong_positive_ic(conn): """5종목 × 12일 — 점수가 높을수록 다음날 수익률 높게 시드 → IC ≈ +1. score 가 변하지 않는 ticker × day-wise close 로 정확한 monotonic 관계 시드. """ base_date = dt.date(2026, 5, 1) # 가격 13일치 시드 (day0..day12). ticker별 base 다르고 (score-기반) day마다 다른 close. for i, ticker in enumerate(["A", "B", "C", "D", "E"]): score = i * 2.0 - 4.0 # ticker별 score 고정 (-4, -2, 0, +2, +4) # day 0 close=100, day n close=100+(score × n) for day in range(13): d = (base_date + dt.timedelta(days=day)).isoformat() _seed_price(conn, ticker, d, 100.0 + score * day) if day < 12: _seed_sentiment(conn, d, ticker, score) conn.commit() out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14)) assert out["ic_count"] >= 10 assert out["ic_mean"] > 0.5 assert out["verdict"] == "strong" def test_zero_ic_random_data(conn): """점수와 수익률이 무관 → IC ≈ 0.""" import random random.seed(42) base_date = dt.date(2026, 5, 1) for ticker in ["A", "B", "C", "D", "E", "F", "G"]: for day in range(13): d = (base_date + dt.timedelta(days=day)).isoformat() _seed_price(conn, ticker, d, 100.0 + random.uniform(-5, 5)) if day < 12: _seed_sentiment(conn, d, ticker, random.uniform(-10, 10)) conn.commit() out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14)) assert out["ic_count"] >= 10 assert abs(out["ic_mean"]) < 0.3 # 약한 신호 — verdict는 weak 가능 assert out["verdict"] in ("weak", "strong") # 시드에 따라 약간 흔들림 def test_min_news_count_filter(conn): """news_count < min_news_count 인 row 는 제외.""" _seed_sentiment(conn, "2026-05-13", "A", 5.0, news_count=0) _seed_sentiment(conn, "2026-05-13", "B", -5.0, news_count=3) _seed_price(conn, "A", "2026-05-13", 100.0) _seed_price(conn, "A", "2026-05-14", 105.0) _seed_price(conn, "B", "2026-05-13", 100.0) _seed_price(conn, "B", "2026-05-14", 95.0) conn.commit() out = validation.compute_ic( conn, days=30, horizon=1, min_news_count=1, asof_today=dt.date(2026, 5, 14), ) # A 가 필터됨 → 1종목만 남으면 Spearman 계산 불가 (< 5) → skip assert out["ic_count"] == 0 def test_horizon_5_days(conn): """horizon=5 면 close[date+5] / close[date] - 1 사용.""" base_date = dt.date(2026, 5, 1) for day in range(20): d = (base_date + dt.timedelta(days=day)).isoformat() for i, ticker in enumerate(["A", "B", "C", "D", "E"]): _seed_sentiment(conn, d, ticker, i * 2.0 - 4.0) # 가격: A=오름, B=오름, C=평, D=내림, E=내림 for day in range(25): d = (base_date + dt.timedelta(days=day)).isoformat() for i, ticker in enumerate(["A", "B", "C", "D", "E"]): slope = i - 2 # -2 ~ +2 _seed_price(conn, ticker, d, 100.0 + slope * day) conn.commit() out = validation.compute_ic(conn, days=30, horizon=5, asof_today=dt.date(2026, 5, 25)) assert out["horizon_days"] == 5 assert out["ic_count"] > 0