lotto lab 추천 알고리즘 및 시뮬레이션 강화

2026-02-23 22:32:14 +09:00
parent c96815c2e3
commit 71d9d7a571
4 changed files with 868 additions and 147 deletions
--- a/backend/app/generator.py
+++ b/backend/app/generator.py
@@ -1,100 +1,154 @@
+"""
+시뮬레이션 엔진 - lotto-lab 고도화
+
+[몬테카를로 시뮬레이션 흐름]
+1. 역대 당첨번호 기반 통계 캐시 구성 (build_analysis_cache)
+2. 통계 가중치로 N개 후보 조합 생성 (weighted sampling)
+3. 5가지 기법으로 각 후보 스코어링 (score_combination)
+4. 상위 top_k개 선별하여 DB 저장 (simulation_candidates, best_picks 교체)
+
+[시뮬레이션 파라미터]
+- n_candidates: 1회 시뮬레이션당 생성 후보 수 (기본 20,000)
+- top_k: 선별 및 저장할 상위 개수 (기본 100)
+- best_n: best_picks에 올릴 최상위 개수 (기본 20)
+"""
+
 import random
-import json
 from typing import Dict, Any, List, Optional

-from .db import _conn, save_recommendation_dedup, get_latest_draw, get_all_draw_numbers
-from .recommender import recommend_numbers
-from .utils import calc_metrics, calc_recent_overlap
+from .db import (
+    get_latest_draw,
+    get_all_draw_numbers,
+    save_simulation_run,
+    save_simulation_candidates_bulk,
+    replace_best_picks,
+)
+from .analyzer import build_analysis_cache, build_number_weights, score_combination

-# 순환 참조 방지를 위해 main.py의 calc_metrics 등을 utils.py가 아닌 여기서 재정의하거나 
-# main.py에서 generator를 import할 때 함수 내부에서 하도록 처리.
-# 여기서는 코드가 중복되더라도 안전하게 독립적으로 구현하거나, db/collector만 import.

-def _get_top_performing_params(limit: int = 20) -> List[Dict[str, Any]]:
+def _weighted_sample_6(weights: Dict[int, float]) -> List[int]:
    """
-    최근 1~5등에 당첨된 추천들의 파라미터 조회
+    가중 확률 샘플링으로 중복 없이 6개 번호 추출.
+    weights: {1: w1, 2: w2, ..., 45: w45}
    """
-    sql = """
-    SELECT params 
-    FROM recommendations 
-    WHERE rank > 0 AND rank <= 5 
-    ORDER BY id DESC 
-    LIMIT ?
-    """
-    with _conn() as conn:
-        rows = conn.execute(sql, (limit,)).fetchall()
-    
-    return [json.loads(r["params"]) for r in rows]
+    pool = list(range(1, 46))
+    chosen: List[int] = []
+    for _ in range(6):
+        total = sum(weights[n] for n in pool)
+        r = random.random() * total
+        acc = 0.0
+        for n in pool:
+            acc += weights[n]
+            if acc >= r:
+                chosen.append(n)
+                pool.remove(n)
+                break
+    return chosen

-def _perturb_param(val: float, delta: float, min_val: float, max_val: float, is_int: bool = False) -> float:
-    change = random.uniform(-delta, delta)
-    new_val = val + change
-    new_val = max(min_val, min(new_val, max_val))
-    return int(round(new_val)) if is_int else round(new_val, 2)

-def generate_smart_recommendations(count: int = 10) -> int:
+def run_simulation(
+    n_candidates: int = 20000,
+    top_k: int = 100,
+    best_n: int = 20,
+) -> Dict[str, Any]:
    """
-    지능형 자동 생성: 과거 성적 우수 파라미터 기반으로 생성
+    몬테카를로 시뮬레이션 실행 메인 함수.
+
+    Args:
+        n_candidates: 생성할 후보 조합 수 (기본 20,000)
+        top_k: DB에 저장할 상위 후보 수 (기본 100)
+        best_n: best_picks에 올릴 최상위 수 (기본 20)
+
+    Returns:
+        {run_id, total_generated, top_k_selected, avg_score, best_score, based_on_draw}
+        또는 {"error": ...}
    """
    draws = get_all_draw_numbers()
    if not draws:
-        return 0
-        
+        return {"error": "당첨번호 데이터가 없습니다. 먼저 동기화를 실행하세요."}
+
    latest = get_latest_draw()
-    based_on = latest["drw_no"] if latest else None
-    
-    # 1. 성공 사례 조회 (Feedback)
-    top_params = _get_top_performing_params()
-    
-    generated_count = 0
-    
-    for _ in range(count):
-        # 전략 선택: 이력이 있으면 70% 확률로 모방(Exploitation), 30%는 랜덤(Exploration)
-        use_history = (len(top_params) > 0) and (random.random() < 0.7)
-        
-        if use_history:
-            # 과거 우수 파라미터 중 하나 선택하여 변형
-            base = random.choice(top_params)
-            
-            # 파라미터 변형 (유전 알고리즘과 유사)
-            p_window = _perturb_param(base.get("recent_window", 200), 50, 10, 500, True)
-            p_weight = _perturb_param(base.get("recent_weight", 2.0), 1.0, 0.1, 10.0, False)
-            p_avoid = _perturb_param(base.get("avoid_recent_k", 5), 2, 0, 20, True)
-            
-            # Constraints 로직은 복잡하니 일단 랜덤성 부여하거나 유지
-            # (여기서는 기본 파라미터 위주로 튜닝)
-            
-            params = {
-                "recent_window": p_window,
-                "recent_weight": p_weight,
-                "avoid_recent_k": p_avoid,
-                "strategy": "smart_feedback"
-            }
-        else:
-            # 완전 랜덤 탐색
-            params = {
-                "recent_window": random.randint(50, 400),
-                "recent_weight": round(random.uniform(0.5, 5.0), 2),
-                "avoid_recent_k": random.randint(0, 10),
-                "strategy": "random_exploration"
-            }
-            
-        # 생성 시도
-        try:
-            # recommend_numbers는 db.py/main.py 로직과 독립적이므로 여기서 사용 가능
-            # 단, recommend_numbers 함수가 어디 있는지 확인 (recommender.py)
-            res = recommend_numbers(
-                draws, 
-                recent_window=params["recent_window"],
-                recent_weight=params["recent_weight"],
-                avoid_recent_k=params["avoid_recent_k"]
-            )
-            
-            save_recommendation_dedup(based_on, res["numbers"], params)
-            generated_count += 1
-            
-        except Exception as e:
-            print(f"Gen Error: {e}")
+    based_on_draw = latest["drw_no"] if latest else None
+
+    # ── 1. 통계 캐시 및 가중치 구성 (시뮬레이션 전체에서 재사용) ────────────
+    cache = build_analysis_cache(draws)
+    weights = build_number_weights(cache)
+
+    # ── 2. 후보 생성 및 스코어링 ──────────────────────────────────────────────
+    candidates: List[Dict[str, Any]] = []
+    seen_keys: set = set()
+    max_attempts = n_candidates * 3  # 중복 제거 여유분
+
+    attempts = 0
+    while len(candidates) < n_candidates and attempts < max_attempts:
+        attempts += 1
+        nums = _weighted_sample_6(weights)
+        key = tuple(sorted(nums))
+        if key in seen_keys:
            continue
-            
-    return generated_count
+        seen_keys.add(key)
+
+        scores = score_combination(nums, cache)
+        candidates.append({
+            "numbers": sorted(nums),
+            **scores,
+        })
+
+    # ── 3. 점수 내림차순 정렬 및 상위 선별 ──────────────────────────────────
+    candidates.sort(key=lambda x: -x["score_total"])
+    top_candidates = candidates[:top_k]
+
+    # is_best 플래그 표시
+    best_keys = {tuple(c["numbers"]) for c in top_candidates[:best_n]}
+    for c in top_candidates:
+        c["is_best"] = tuple(c["numbers"]) in best_keys
+
+    avg_score = (
+        sum(c["score_total"] for c in top_candidates) / len(top_candidates)
+        if top_candidates else 0.0
+    )
+    best_score = top_candidates[0]["score_total"] if top_candidates else 0.0
+
+    # ── 4. DB 저장 ────────────────────────────────────────────────────────────
+    run_id = save_simulation_run(
+        strategy="monte_carlo",
+        total_generated=len(candidates),
+        top_k_selected=len(top_candidates),
+        avg_score=avg_score,
+        notes=f"based_on_draw={based_on_draw}, history={len(draws)}회",
+    )
+
+    # 상위 top_k개만 DB에 저장 (전체 20,000개는 메모리에서만 처리)
+    save_simulation_candidates_bulk(run_id, top_candidates, based_on_draw)
+
+    # best_picks 교체 (상위 best_n개)
+    best_picks_data = [
+        {
+            "numbers": c["numbers"],
+            "score_total": c["score_total"],
+            "rank_in_run": i + 1,
+        }
+        for i, c in enumerate(top_candidates[:best_n])
+    ]
+    replace_best_picks(best_picks_data, run_id, based_on_draw)
+
+    return {
+        "run_id": run_id,
+        "total_generated": len(candidates),
+        "top_k_selected": len(top_candidates),
+        "best_n_saved": len(best_picks_data),
+        "avg_score": round(avg_score, 6),
+        "best_score": round(best_score, 6),
+        "based_on_draw": based_on_draw,
+    }
+
+
+def generate_smart_recommendations(count: int = 10) -> int:
+    """
+    하위 호환성 유지용 래퍼.
+    내부적으로 run_simulation을 호출하며, 기존 /api/admin/auto_gen 등에서 계속 사용 가능.
+    """
+    result = run_simulation(n_candidates=5000, top_k=count, best_n=count)
+    if "error" in result:
+        return 0
+    return result.get("best_n_saved", 0)