Files
web-page-backend/stock/tests/test_ai_news_validation.py
gahusb ace0339d33 refactor: rename stock-lab → stock (graduation)
- git mv stock-lab/ → stock/
- docker-compose.yml: 서비스 키 + container_name + build.context +
  frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL
- agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/
  STOCK_LAB_URL → STOCK_URL
- nginx/default.conf: proxy_pass http://stock-labhttp://stock (3 lines)
- CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신
- stock/ 내부 자기 참조 갱신

lab 네이밍 정책 (feedback_lab_naming.md) graduation.
API URL / Python import / DB 파일명 변경 없음.
2026-05-15 01:45:44 +09:00

121 lines
4.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for ai_news validation harness (Spearman IC)."""
import datetime as dt
import sqlite3
import pytest
from app.screener.ai_news import validation
from app.screener.schema import ensure_screener_schema
@pytest.fixture
def conn():
c = sqlite3.connect(":memory:")
c.row_factory = sqlite3.Row
ensure_screener_schema(c)
yield c
c.close()
def _seed_sentiment(conn, date, ticker, score, news_count=3):
conn.execute(
"INSERT INTO news_sentiment (ticker, date, score_raw, reason, news_count, "
"tokens_input, tokens_output, model) "
"VALUES (?, ?, ?, 'r', ?, 100, 20, 'm')",
(ticker, date, score, news_count),
)
def _seed_price(conn, ticker, date, close):
conn.execute(
"INSERT INTO krx_daily_prices (ticker, date, close) VALUES (?, ?, ?)",
(ticker, date, close),
)
def test_empty_db_returns_skip(conn):
out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14))
assert out["ic_count"] == 0
assert out["verdict"] == "skip"
assert out["ic_mean"] is None
def test_strong_positive_ic(conn):
"""5종목 × 12일 — 점수가 높을수록 다음날 수익률 높게 시드 → IC ≈ +1.
score 가 변하지 않는 ticker × day-wise close 로 정확한 monotonic 관계 시드.
"""
base_date = dt.date(2026, 5, 1)
# 가격 13일치 시드 (day0..day12). ticker별 base 다르고 (score-기반) day마다 다른 close.
for i, ticker in enumerate(["A", "B", "C", "D", "E"]):
score = i * 2.0 - 4.0 # ticker별 score 고정 (-4, -2, 0, +2, +4)
# day 0 close=100, day n close=100+(score × n)
for day in range(13):
d = (base_date + dt.timedelta(days=day)).isoformat()
_seed_price(conn, ticker, d, 100.0 + score * day)
if day < 12:
_seed_sentiment(conn, d, ticker, score)
conn.commit()
out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14))
assert out["ic_count"] >= 10
assert out["ic_mean"] > 0.5
assert out["verdict"] == "strong"
def test_zero_ic_random_data(conn):
"""점수와 수익률이 무관 → IC ≈ 0."""
import random
random.seed(42)
base_date = dt.date(2026, 5, 1)
for ticker in ["A", "B", "C", "D", "E", "F", "G"]:
for day in range(13):
d = (base_date + dt.timedelta(days=day)).isoformat()
_seed_price(conn, ticker, d, 100.0 + random.uniform(-5, 5))
if day < 12:
_seed_sentiment(conn, d, ticker, random.uniform(-10, 10))
conn.commit()
out = validation.compute_ic(conn, days=30, horizon=1, asof_today=dt.date(2026, 5, 14))
assert out["ic_count"] >= 10
assert abs(out["ic_mean"]) < 0.3 # 약한 신호 — verdict는 weak 가능
assert out["verdict"] in ("weak", "strong") # 시드에 따라 약간 흔들림
def test_min_news_count_filter(conn):
"""news_count < min_news_count 인 row 는 제외."""
_seed_sentiment(conn, "2026-05-13", "A", 5.0, news_count=0)
_seed_sentiment(conn, "2026-05-13", "B", -5.0, news_count=3)
_seed_price(conn, "A", "2026-05-13", 100.0)
_seed_price(conn, "A", "2026-05-14", 105.0)
_seed_price(conn, "B", "2026-05-13", 100.0)
_seed_price(conn, "B", "2026-05-14", 95.0)
conn.commit()
out = validation.compute_ic(
conn, days=30, horizon=1, min_news_count=1,
asof_today=dt.date(2026, 5, 14),
)
# A 가 필터됨 → 1종목만 남으면 Spearman 계산 불가 (< 5) → skip
assert out["ic_count"] == 0
def test_horizon_5_days(conn):
"""horizon=5 면 close[date+5] / close[date] - 1 사용."""
base_date = dt.date(2026, 5, 1)
for day in range(20):
d = (base_date + dt.timedelta(days=day)).isoformat()
for i, ticker in enumerate(["A", "B", "C", "D", "E"]):
_seed_sentiment(conn, d, ticker, i * 2.0 - 4.0)
# 가격: A=오름, B=오름, C=평, D=내림, E=내림
for day in range(25):
d = (base_date + dt.timedelta(days=day)).isoformat()
for i, ticker in enumerate(["A", "B", "C", "D", "E"]):
slope = i - 2 # -2 ~ +2
_seed_price(conn, ticker, d, 100.0 + slope * day)
conn.commit()
out = validation.compute_ic(conn, days=30, horizon=5, asof_today=dt.date(2026, 5, 25))
assert out["horizon_days"] == 5
assert out["ic_count"] > 0