refactor(lotto): Phase 1 코드리뷰 반영 (로컬 RNG·write-once·가드·테스트 보강)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,10 +1,10 @@
|
|||||||
"""로또 자가학습 백테스트 — 순수 연산 (FastAPI 의존성 0, Windows 이전 대비)."""
|
"""로또 자가학습 백테스트 — 순수 연산 (FastAPI 의존성 0, Windows 이전 대비)."""
|
||||||
|
import logging
|
||||||
import random
|
import random
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from .analyzer import build_analysis_cache, build_number_weights, score_combination
|
from .analyzer import build_analysis_cache, build_number_weights, score_combination
|
||||||
from .utils import weighted_sample_6
|
from .utils import weighted_sample_6
|
||||||
from .weight_evolver import count_match
|
|
||||||
|
|
||||||
|
|
||||||
def grade_tickets(tickets: List[List[int]], winning6: List[int], bonus: int) -> Dict[str, Any]:
|
def grade_tickets(tickets: List[List[int]], winning6: List[int], bonus: int) -> Dict[str, Any]:
|
||||||
@@ -45,8 +45,7 @@ def prize_counts(hist: Dict[str, Any]) -> Dict[str, int]:
|
|||||||
def generate_pool(cache, number_weights, n: int = 20000,
|
def generate_pool(cache, number_weights, n: int = 20000,
|
||||||
seed: Optional[int] = None) -> List[List[int]]:
|
seed: Optional[int] = None) -> List[List[int]]:
|
||||||
"""가중 샘플링으로 distinct 후보 풀 생성."""
|
"""가중 샘플링으로 distinct 후보 풀 생성."""
|
||||||
if seed is not None:
|
rng = random.Random(seed)
|
||||||
random.seed(seed)
|
|
||||||
seen, pool = set(), []
|
seen, pool = set(), []
|
||||||
attempts, cap = 0, n * 4
|
attempts, cap = 0, n * 4
|
||||||
while len(pool) < n and attempts < cap:
|
while len(pool) < n and attempts < cap:
|
||||||
@@ -56,22 +55,29 @@ def generate_pool(cache, number_weights, n: int = 20000,
|
|||||||
continue
|
continue
|
||||||
seen.add(nums)
|
seen.add(nums)
|
||||||
pool.append(list(nums))
|
pool.append(list(nums))
|
||||||
|
if len(pool) < n:
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"generate_pool: requested %d, got %d", n, len(pool)
|
||||||
|
)
|
||||||
return pool
|
return pool
|
||||||
|
|
||||||
|
|
||||||
def purchase_tickets(pool, cache, W: List[float], k: int) -> List[List[int]]:
|
def purchase_tickets(pool, cache, W: List[float], k: int) -> List[List[int]]:
|
||||||
"""풀을 score_combination(·, W)로 랭킹 → 상위 k장 distinct."""
|
"""풀을 score_combination(·, W)로 랭킹 → 상위 k장 distinct."""
|
||||||
|
if k > len(pool):
|
||||||
|
raise ValueError(f"k={k} exceeds pool size {len(pool)}")
|
||||||
ranked = sorted(pool, key=lambda t: -score_combination(t, cache, W)["score_total"])
|
ranked = sorted(pool, key=lambda t: -score_combination(t, cache, W)["score_total"])
|
||||||
return ranked[:k]
|
return ranked[:k]
|
||||||
|
|
||||||
|
|
||||||
def random_null_tickets(k: int, seed: Optional[int] = None) -> List[List[int]]:
|
def random_null_tickets(k: int, seed: Optional[int] = None) -> List[List[int]]:
|
||||||
"""무작위 distinct 티켓 k장 (null-model 대조군)."""
|
"""무작위 distinct 티켓 k장 (null-model 대조군)."""
|
||||||
if seed is not None:
|
rng = random.Random(seed)
|
||||||
random.seed(seed)
|
|
||||||
seen, out = set(), []
|
seen, out = set(), []
|
||||||
while len(out) < k:
|
guard = 0
|
||||||
nums = tuple(sorted(random.sample(range(1, 46), 6)))
|
while len(out) < k and guard < k * 200:
|
||||||
|
guard += 1
|
||||||
|
nums = tuple(sorted(rng.sample(range(1, 46), 6)))
|
||||||
if nums in seen:
|
if nums in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(nums)
|
seen.add(nums)
|
||||||
@@ -82,18 +88,17 @@ def random_null_tickets(k: int, seed: Optional[int] = None) -> List[List[int]]:
|
|||||||
def coverage_tickets(k: int, seed: Optional[int] = None) -> List[List[int]]:
|
def coverage_tickets(k: int, seed: Optional[int] = None) -> List[List[int]]:
|
||||||
"""greedy 커버리지 — 아직 덜 쓰인 번호를 우선 배치해 번호를 넓게 분산.
|
"""greedy 커버리지 — 아직 덜 쓰인 번호를 우선 배치해 번호를 넓게 분산.
|
||||||
(휠링/보장설계는 향후. 현재는 distinct + 번호 사용 균등화)"""
|
(휠링/보장설계는 향후. 현재는 distinct + 번호 사용 균등화)"""
|
||||||
if seed is not None:
|
rng = random.Random(seed)
|
||||||
random.seed(seed)
|
|
||||||
usage = {n: 0 for n in range(1, 46)}
|
usage = {n: 0 for n in range(1, 46)}
|
||||||
seen, out = set(), []
|
seen, out = set(), []
|
||||||
guard = 0
|
guard = 0
|
||||||
while len(out) < k and guard < k * 50:
|
while len(out) < k and guard < k * 50:
|
||||||
guard += 1
|
guard += 1
|
||||||
ranked = sorted(range(1, 46), key=lambda n: (usage[n], random.random()))
|
ranked = sorted(range(1, 46), key=lambda n: (usage[n], rng.random()))
|
||||||
nums = tuple(sorted(ranked[:6]))
|
nums = tuple(sorted(ranked[:6]))
|
||||||
if nums in seen:
|
if nums in seen:
|
||||||
# 동점 흔들기: 약간 더 깊은 풀에서 샘플
|
# 동점 흔들기: top-6과 disjoint한 영역에서 샘플
|
||||||
nums = tuple(sorted(random.sample(ranked[:12], 6)))
|
nums = tuple(sorted(rng.sample(ranked[6:12], 6)))
|
||||||
if nums in seen:
|
if nums in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(nums)
|
seen.add(nums)
|
||||||
|
|||||||
@@ -1501,10 +1501,10 @@ def save_backtest_run(draw_no, strategy, weight_label, weight_json, trial_id,
|
|||||||
weight_json=excluded.weight_json, trial_id=excluded.trial_id,
|
weight_json=excluded.weight_json, trial_id=excluded.trial_id,
|
||||||
n_tickets=excluded.n_tickets, m3=excluded.m3, m4=excluded.m4,
|
n_tickets=excluded.n_tickets, m3=excluded.m3, m4=excluded.m4,
|
||||||
m5=excluded.m5, m6=excluded.m6, bonus_hits=excluded.bonus_hits,
|
m5=excluded.m5, m6=excluded.m6, bonus_hits=excluded.bonus_hits,
|
||||||
best_match=excluded.best_match, avg_meta_score=excluded.avg_meta_score,
|
best_match=excluded.best_match, avg_meta_score=excluded.avg_meta_score
|
||||||
created_at=datetime('now')
|
|
||||||
""",
|
""",
|
||||||
(draw_no, strategy, weight_label,
|
(draw_no, strategy, weight_label,
|
||||||
|
# weight_json must be a dict/list (not a pre-serialized string) to avoid double-encoding
|
||||||
json.dumps(weight_json) if weight_json is not None else None,
|
json.dumps(weight_json) if weight_json is not None else None,
|
||||||
trial_id, n_tickets,
|
trial_id, n_tickets,
|
||||||
hist.get("m3",0), hist.get("m4",0), hist.get("m5",0), hist.get("m6",0),
|
hist.get("m3",0), hist.get("m4",0), hist.get("m5",0), hist.get("m6",0),
|
||||||
@@ -1536,8 +1536,7 @@ def save_winner_calibration(draw_no, winning, scores, percentile,
|
|||||||
score_frequency=excluded.score_frequency, score_fingerprint=excluded.score_fingerprint,
|
score_frequency=excluded.score_frequency, score_fingerprint=excluded.score_fingerprint,
|
||||||
score_gap=excluded.score_gap, score_cooccur=excluded.score_cooccur,
|
score_gap=excluded.score_gap, score_cooccur=excluded.score_cooccur,
|
||||||
score_diversity=excluded.score_diversity, percentile=excluded.percentile,
|
score_diversity=excluded.score_diversity, percentile=excluded.percentile,
|
||||||
my_pick_avg=excluded.my_pick_avg, cache_draws=excluded.cache_draws,
|
my_pick_avg=excluded.my_pick_avg, cache_draws=excluded.cache_draws
|
||||||
created_at=datetime('now')
|
|
||||||
""",
|
""",
|
||||||
(draw_no, json.dumps(winning), scores["score_total"], scores["score_frequency"],
|
(draw_no, json.dumps(winning), scores["score_total"], scores["score_frequency"],
|
||||||
scores["score_fingerprint"], scores["score_gap"], scores["score_cooccur"],
|
scores["score_fingerprint"], scores["score_gap"], scores["score_cooccur"],
|
||||||
@@ -1557,7 +1556,7 @@ def get_calibration_history(limit: int = 52) -> List[Dict[str, Any]]:
|
|||||||
(limit,)).fetchall()
|
(limit,)).fetchall()
|
||||||
return [dict(r) for r in rows]
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
def get_calibrated_draw_nos() -> set:
|
def get_calibrated_draw_nos() -> set[int]:
|
||||||
with _conn() as conn:
|
with _conn() as conn:
|
||||||
return {r["draw_no"] for r in
|
return {r["draw_no"] for r in
|
||||||
conn.execute("SELECT draw_no FROM winner_calibration").fetchall()}
|
conn.execute("SELECT draw_no FROM winner_calibration").fetchall()}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from app import backtest as bt
|
from app import backtest as bt
|
||||||
from app.analyzer import build_analysis_cache, score_combination
|
from app.analyzer import build_analysis_cache, build_number_weights, score_combination
|
||||||
|
|
||||||
|
|
||||||
def _toy_draws(n=120):
|
def _toy_draws(n=120):
|
||||||
@@ -38,16 +38,17 @@ def test_grade_tickets_histogram_and_prizes():
|
|||||||
|
|
||||||
def test_purchase_tickets_distinct_and_count():
|
def test_purchase_tickets_distinct_and_count():
|
||||||
draws = _toy_draws()
|
draws = _toy_draws()
|
||||||
cache = bt.build_analysis_cache(draws)
|
cache = build_analysis_cache(draws)
|
||||||
nw = bt.build_number_weights(cache)
|
nw = build_number_weights(cache)
|
||||||
pool = bt.generate_pool(cache, nw, n=2000, seed=7)
|
pool = bt.generate_pool(cache, nw, n=2000, seed=7)
|
||||||
W = [0.25, 0.30, 0.20, 0.15, 0.10]
|
W = [0.25, 0.30, 0.20, 0.15, 0.10]
|
||||||
bought = bt.purchase_tickets(pool, cache, W, k=50)
|
bought = bt.purchase_tickets(pool, cache, W, k=50)
|
||||||
assert len(bought) == 50
|
assert len(bought) == 50
|
||||||
assert len({tuple(t) for t in bought}) == 50 # distinct
|
assert len({tuple(t) for t in bought}) == 50 # distinct
|
||||||
# W로 랭킹된 상위 → 평균 분석치가 풀 평균보다 높아야
|
# W로 랭킹된 상위 k → 평균 점수가 풀 전체 평균 이상이어야
|
||||||
avg_bought = sum(score_combination(t, cache, W)["score_total"] for t in bought) / 50
|
avg_bought = sum(score_combination(t, cache, W)["score_total"] for t in bought) / 50
|
||||||
assert avg_bought > 0
|
avg_pool = sum(score_combination(t, cache, W)["score_total"] for t in pool) / len(pool)
|
||||||
|
assert avg_bought >= avg_pool
|
||||||
|
|
||||||
|
|
||||||
def test_random_null_and_coverage_distinct():
|
def test_random_null_and_coverage_distinct():
|
||||||
@@ -57,3 +58,24 @@ def test_random_null_and_coverage_distinct():
|
|||||||
flat = {n for t in cov for n in t}
|
flat = {n for t in cov for n in t}
|
||||||
assert len(cov) == 9 and len({tuple(t) for t in cov}) == 9
|
assert len(cov) == 9 and len({tuple(t) for t in cov}) == 9
|
||||||
assert len(flat) >= 40 # 커버리지 전략은 번호를 넓게 퍼뜨림
|
assert len(flat) >= 40 # 커버리지 전략은 번호를 넓게 퍼뜨림
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_pool_partial_fill(monkeypatch):
|
||||||
|
"""weighted_sample_6이 항상 같은 조합만 반환하도록 패치 → cap에 먼저 걸려 len < n — 예외 없이 반환."""
|
||||||
|
import random as _r
|
||||||
|
_r.seed(42)
|
||||||
|
tiny_draws = [(i, sorted(_r.sample(range(1, 46), 6))) for i in range(1, 10)]
|
||||||
|
cache = build_analysis_cache(tiny_draws)
|
||||||
|
nw = build_number_weights(cache)
|
||||||
|
|
||||||
|
# weighted_sample_6을 항상 동일한 하나의 조합만 반환하도록 패치
|
||||||
|
# → 두 번째 시도부터 seen에 막혀 n개를 채울 수 없고 cap=n*4 이후 종료
|
||||||
|
import app.backtest as _bt_mod
|
||||||
|
monkeypatch.setattr(_bt_mod, "weighted_sample_6", lambda _w: [1, 2, 3, 4, 5, 6])
|
||||||
|
|
||||||
|
n = 50
|
||||||
|
pool = bt.generate_pool(cache, nw, n=n, seed=0)
|
||||||
|
# 예외 없이 반환해야 하고, 결과는 n 미만이어야 하며 모두 distinct
|
||||||
|
assert isinstance(pool, list)
|
||||||
|
assert len(pool) < n
|
||||||
|
assert len({tuple(t) for t in pool}) == len(pool)
|
||||||
|
|||||||
@@ -29,3 +29,50 @@ def test_backtest_runs_unique(monkeypatch):
|
|||||||
rows = db.get_backtest_runs(draw_no=100)
|
rows = db.get_backtest_runs(draw_no=100)
|
||||||
assert len(rows) == 1
|
assert len(rows) == 1
|
||||||
assert rows[0]["m3"] == 2 # 마지막 값으로 갱신
|
assert rows[0]["m3"] == 2 # 마지막 값으로 갱신
|
||||||
|
|
||||||
|
|
||||||
|
_SCORES = {
|
||||||
|
"score_total": 1.23,
|
||||||
|
"score_frequency": 0.30,
|
||||||
|
"score_fingerprint": 0.25,
|
||||||
|
"score_gap": 0.20,
|
||||||
|
"score_cooccur": 0.28,
|
||||||
|
"score_diversity": 0.20,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_winner_calibration_upsert(monkeypatch):
|
||||||
|
"""save_winner_calibration 두 번 호출 시 upsert — 행 1개, 값은 마지막 것."""
|
||||||
|
db = _fresh_db(monkeypatch)
|
||||||
|
winning = [3, 7, 15, 22, 33, 41]
|
||||||
|
db.save_winner_calibration(draw_no=200, winning=winning,
|
||||||
|
scores=_SCORES, percentile=75.0,
|
||||||
|
my_pick_avg=0.9, cache_draws=100)
|
||||||
|
# 두 번째 저장 — percentile, my_pick_avg 업데이트
|
||||||
|
scores2 = {**_SCORES, "score_total": 2.00}
|
||||||
|
db.save_winner_calibration(draw_no=200, winning=winning,
|
||||||
|
scores=scores2, percentile=80.0,
|
||||||
|
my_pick_avg=1.1, cache_draws=110)
|
||||||
|
row = db.get_winner_calibration(200)
|
||||||
|
assert row is not None
|
||||||
|
# 행이 1개만 존재하는지 확인
|
||||||
|
with db._conn() as conn:
|
||||||
|
cnt = conn.execute(
|
||||||
|
"SELECT COUNT(*) AS c FROM winner_calibration WHERE draw_no=200"
|
||||||
|
).fetchone()["c"]
|
||||||
|
assert cnt == 1
|
||||||
|
assert row["percentile"] == 80.0
|
||||||
|
assert row["score_total"] == 2.00
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_calibrated_draw_nos(monkeypatch):
|
||||||
|
"""저장된 draw_no 집합이 get_calibrated_draw_nos에 포함되어야 한다."""
|
||||||
|
db = _fresh_db(monkeypatch)
|
||||||
|
winning = [1, 2, 3, 4, 5, 6]
|
||||||
|
for draw_no in (301, 302, 303):
|
||||||
|
db.save_winner_calibration(draw_no=draw_no, winning=winning,
|
||||||
|
scores=_SCORES, percentile=50.0,
|
||||||
|
my_pick_avg=0.5, cache_draws=50)
|
||||||
|
nos = db.get_calibrated_draw_nos()
|
||||||
|
assert isinstance(nos, set)
|
||||||
|
assert {301, 302, 303}.issubset(nos)
|
||||||
|
|||||||
Reference in New Issue
Block a user