반복적인 IPC 오류 해결, 봇 오류 해결, 인증 오류 해결, 서버 자원 할당 오류 해결, 코드 리팩토링

2026-02-14 18:03:13 +09:00
parent 4fd0aa91bc
commit 9dbf6e6791
15 changed files with 1452 additions and 847 deletions
--- a/modules/analysis/deep_learning.py
+++ b/modules/analysis/deep_learning.py
@@ -1,40 +1,36 @@
+import os
+import time
 import torch
 import torch.nn as nn
 import numpy as np
 from sklearn.preprocessing import MinMaxScaler

+from modules.config import Config
+
+# cuDNN 벤치마크 활성화 (고정 입력 크기에 대해 최적 커널 자동 선택)
+torch.backends.cudnn.benchmark = True
+
+
 class Attention(nn.Module):
-    """Attention Mechanism for LSTM"""
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
-        self.hidden_size = hidden_size
        self.attn = nn.Linear(hidden_size, 1)
-        
+
    def forward(self, lstm_output):
-        # lstm_output: [batch_size, seq_len, hidden_size]
-        # attn_weights: [batch_size, seq_len, 1]
        attn_weights = torch.softmax(self.attn(lstm_output), dim=1)
-        # context: [batch_size, hidden_size]
        context = torch.sum(attn_weights * lstm_output, dim=1)
        return context, attn_weights

+
 class AdvancedLSTM(nn.Module):
-    """
-    [RTX 5070 Ti Optimized] High-Capacity LSTM with Attention
-    - Hidden Size: 512 (Rich Feature Extraction)
-    - Layers: 4 (Deep Reasoning)
-    - Attention: Focus on critical time steps
-    """
    def __init__(self, input_size=1, hidden_size=512, num_layers=4, output_size=1, dropout=0.3):
        super(AdvancedLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
-        
-        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
-                           batch_first=True, dropout=dropout)
-        
+
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
+                            batch_first=True, dropout=dropout)
        self.attention = Attention(hidden_size)
-        
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
@@ -45,162 +41,371 @@ class AdvancedLSTM(nn.Module):
        )

    def forward(self, x):
-        # x: [batch, seq, feature]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
-        
-        # LSTM Output
-        lstm_out, _ = self.lstm(x, (h0, c0)) # [batch, seq, hidden]
-        
-        # Attention Mechanism
-        context, _ = self.attention(lstm_out) # [batch, hidden]
-        
-        # Final Prediction
+        lstm_out, _ = self.lstm(x, (h0, c0))
+        context, _ = self.attention(lstm_out)
        out = self.fc(context)
        return out

+
+def _unload_ollama():
+    """LSTM 학습 전 Ollama 모델 언로드하여 GPU 메모리 확보"""
+    try:
+        import requests
+        url = f"{Config.OLLAMA_API_URL}/api/generate"
+        requests.post(url, json={
+            "model": Config.OLLAMA_MODEL,
+            "keep_alive": 0
+        }, timeout=5)
+        print("[AI] Ollama model unloaded (GPU memory freed)")
+        time.sleep(1)  # 메모리 해제 대기
+    except Exception:
+        pass
+
+
+def _preload_ollama():
+    """LSTM 학습 후 Ollama 모델 다시 로드"""
+    try:
+        import requests
+        url = f"{Config.OLLAMA_API_URL}/api/generate"
+        requests.post(url, json={
+            "model": Config.OLLAMA_MODEL,
+            "prompt": "",
+            "keep_alive": "10m"
+        }, timeout=10)
+    except Exception:
+        pass
+
+
+def _log_gpu_memory(tag=""):
+    """GPU 메모리 사용량 로깅"""
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated(0) / 1024**3
+        reserved = torch.cuda.memory_reserved(0) / 1024**3
+        print(f"[AI GPU {tag}] Allocated: {allocated:.2f}GB / Reserved: {reserved:.2f}GB")
+
+
 class PricePredictor:
    """
-    주가 예측을 위한 고성능 Deep Learning 모델 (RTX 5070 Ti Edition)
+    주가 예측 Deep Learning 모델 (GPU 최적화)
+    - 전체 학습 데이터를 GPU에 상주 (CPU↔GPU 전송 최소화)
+    - Ollama 모델 언로드/리로드로 GPU 메모리 확보
+    - Early Stopping + Mixed Precision (FP16)
+    - 종목별 모델 체크포인트
    """
    def __init__(self):
        self.scaler = MinMaxScaler(feature_range=(0, 1))
-        
-        # [Hardware Spec] RTX 5070 Ti (16GB VRAM) 맞춤 설정
+
        self.hidden_size = 512
        self.num_layers = 4
-        
-        self.model = AdvancedLSTM(input_size=1, hidden_size=self.hidden_size, 
-                                num_layers=self.num_layers, dropout=0.3)
+
+        self.model = AdvancedLSTM(input_size=1, hidden_size=self.hidden_size,
+                                  num_layers=self.num_layers, dropout=0.3)
        self.criterion = nn.MSELoss()
-        
+
        # CUDA 설정
        self.device = torch.device('cpu')
-        
+        self.use_amp = False
+
        if torch.cuda.is_available():
            try:
                gpu_name = torch.cuda.get_device_name(0)
                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
-                
-                # GPU 할당
+
                self.device = torch.device('cuda')
                self.model.to(self.device)
-                
-                # Warm-up (컴파일 최적화 유도)
-                dummy = torch.zeros(1, 60, 1).to(self.device)
-                _ = self.model(dummy)
-                
-                print(f"🚀 [AI] Powered by {gpu_name} ({vram_gb:.1f}GB) - High Performance Mode On")
-                
+
+                # Mixed Precision (Compute Capability >= 7.0: Volta 이상)
+                if torch.cuda.get_device_capability(0)[0] >= 7:
+                    self.use_amp = True
+
+                # Warm-up: CUDA 커널 컴파일 유도
+                dummy = torch.zeros(1, 60, 1, device=self.device)
+                with torch.no_grad():
+                    _ = self.model(dummy)
+                torch.cuda.synchronize()
+
+                print(f"[AI] GPU Mode: {gpu_name} ({vram_gb:.1f}GB)"
+                      f" | FP16={'ON' if self.use_amp else 'OFF'}"
+                      f" | cuDNN Benchmark=ON")
+                _log_gpu_memory("init")
+
            except Exception as e:
-                print(f"⚠️ [AI] GPU Init Failed: {e}")
+                print(f"[AI] GPU Init Failed ({e}), falling back to CPU")
                self.device = torch.device('cpu')
+                self.model.to(self.device)
        else:
-            print("⚠️ [AI] Running on CPU (Low Performance)")
-        
-        # Optimizer 설정 (AdamW가 일반화 성능이 좀 더 좋음)
+            print("[AI] No CUDA GPU detected. Running on CPU.")
+
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=0.0005, weight_decay=1e-4)
-        
-        # 학습 파라미터 강화
+        self.scaler_amp = torch.amp.GradScaler('cuda') if self.use_amp else None
+
        self.batch_size = 64
-        self.epochs = 200 # 충분한 학습
-        self.seq_length = 60 # 60일(약 3개월) 패턴 분석
-        
+        self.max_epochs = 200
+        self.seq_length = 60
+        self.patience = 15
+
        self.training_status = {
            "is_training": False,
-            "loss": 0.0
+            "loss": 0.0,
+            "current_ticker": None
        }

    @staticmethod
    def verify_hardware():
-        """서버 시작 시 하드웨어 가속 여부 점검 및 로그 출력"""
        if torch.cuda.is_available():
            try:
                gpu_name = torch.cuda.get_device_name(0)
                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
-                print(f"🚀 [AI Check] Hardware Detected: {gpu_name} ({vram_gb:.1f}GB VRAM)")
-                print(f"   ✅ High Performance Mode is READY.")
+                print(f"[AI Check] {gpu_name} ({vram_gb:.1f}GB VRAM) | cuDNN={torch.backends.cudnn.is_available()}")
                return True
            except Exception as e:
-                print(f"⚠️ [AI Check] GPU Error: {e}")
+                print(f"[AI Check] GPU Error: {e}")
                return False
-        else:
-            print("⚠️ [AI Check] No GPU Detected. Running in CPU Mode.")
-            return False
+        print("[AI Check] No GPU. CPU Mode.")
+        return False

-    def train_and_predict(self, prices, forecast_days=1):
-        """
-        Online Learning & Prediction
-        """
-        # 데이터가 최소 시퀀스 길이 + 여유분보다 적으면 예측 불가
+    def _get_checkpoint_path(self, ticker):
+        return os.path.join(Config.MODEL_DIR, f"{ticker}_lstm.pt")
+
+    def _load_checkpoint(self, ticker):
+        path = self._get_checkpoint_path(ticker)
+        if os.path.exists(path):
+            try:
+                checkpoint = torch.load(path, map_location=self.device, weights_only=True)
+                self.model.load_state_dict(checkpoint['model_state_dict'])
+                self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+                print(f"[AI] Checkpoint loaded: {ticker}")
+                return True
+            except Exception as e:
+                print(f"[AI] Checkpoint load failed ({ticker}): {e}")
+        return False
+
+    def _save_checkpoint(self, ticker, epoch, loss):
+        path = self._get_checkpoint_path(ticker)
+        try:
+            torch.save({
+                'model_state_dict': self.model.state_dict(),
+                'optimizer_state_dict': self.optimizer.state_dict(),
+                'epoch': epoch,
+                'loss': loss
+            }, path)
+        except Exception as e:
+            print(f"[AI] Checkpoint save failed ({ticker}): {e}")
+
+    def train_and_predict(self, prices, forecast_days=1, ticker=None):
        if len(prices) < (self.seq_length + 10):
-            return None 
-            
-        # 1. 데이터 전처리
+            return None
+
+        is_gpu = self.device.type == 'cuda'
+
+        # --- Ollama 모델 언로드 (GPU 메모리 확보) ---
+        if is_gpu:
+            _unload_ollama()
+            torch.cuda.empty_cache()
+            _log_gpu_memory("pre-train")
+
+        t_start = time.time()
+
+        # 1. 데이터 전처리 (CPU에서 numpy 작업)
        data = np.array(prices).reshape(-1, 1)
        scaled_data = self.scaler.fit_transform(data)
-        
-        x_train, y_train = [], []
+
+        x_seqs, y_seqs = [], []
        for i in range(len(scaled_data) - self.seq_length):
-            x_train.append(scaled_data[i:i+self.seq_length])
-            y_train.append(scaled_data[i+self.seq_length])
-            
-        x_train_t = torch.FloatTensor(np.array(x_train)).to(self.device)
-        y_train_t = torch.FloatTensor(np.array(y_train)).to(self.device)
-        
-        # 2. 학습
+            x_seqs.append(scaled_data[i:i + self.seq_length])
+            y_seqs.append(scaled_data[i + self.seq_length])
+
+        # 2. 텐서 생성 → 즉시 GPU로 이동 (이후 CPU↔GPU 전송 없음)
+        x_all = torch.FloatTensor(np.array(x_seqs)).to(self.device)
+        y_all = torch.FloatTensor(np.array(y_seqs)).to(self.device)
+
+        # Validation split (80/20)
+        split_idx = int(len(x_all) * 0.8)
+        x_train = x_all[:split_idx]
+        y_train = y_all[:split_idx]
+        x_val = x_all[split_idx:]
+        y_val = y_all[split_idx:]
+
+        dataset_size = len(x_train)
+
+        # 3. 체크포인트 로드
+        has_checkpoint = False
+        if ticker:
+            has_checkpoint = self._load_checkpoint(ticker)
+        max_epochs = 50 if has_checkpoint else self.max_epochs
+
+        # 4. 학습 (전체 데이터 GPU 상주, DataLoader 미사용)
        self.model.train()
        self.training_status["is_training"] = True
-        
-        dataset_size = len(x_train_t)
+        if ticker:
+            self.training_status["current_ticker"] = ticker
+
+        best_val_loss = float('inf')
+        patience_counter = 0
        final_loss = 0.0
-        
-        for epoch in range(self.epochs):
-            perm = torch.randperm(dataset_size).to(self.device)
-            x_shuffled = x_train_t[perm]
-            y_shuffled = y_train_t[perm]
-            
+        actual_epochs = 0
+
+        for epoch in range(max_epochs):
+            # --- Training (GPU 내에서 셔플 + 미니배치) ---
+            perm = torch.randperm(dataset_size, device=self.device)
+            x_shuffled = x_train[perm]
+            y_shuffled = y_train[perm]
+
            epoch_loss = 0.0
            steps = 0
-            
+
            for i in range(0, dataset_size, self.batch_size):
-                batch_x = x_shuffled[i:min(i+self.batch_size, dataset_size)]
-                batch_y = y_shuffled[i:min(i+self.batch_size, dataset_size)]
-                
-                self.optimizer.zero_grad()
-                outputs = self.model(batch_x)
-                loss = self.criterion(outputs, batch_y)
-                loss.backward()
-                self.optimizer.step()
-                
+                end = min(i + self.batch_size, dataset_size)
+                batch_x = x_shuffled[i:end]
+                batch_y = y_shuffled[i:end]
+
+                self.optimizer.zero_grad(set_to_none=True)
+
+                if self.use_amp:
+                    with torch.amp.autocast('cuda'):
+                        outputs = self.model(batch_x)
+                        loss = self.criterion(outputs, batch_y)
+                    self.scaler_amp.scale(loss).backward()
+                    self.scaler_amp.step(self.optimizer)
+                    self.scaler_amp.update()
+                else:
+                    outputs = self.model(batch_x)
+                    loss = self.criterion(outputs, batch_y)
+                    loss.backward()
+                    self.optimizer.step()
+
                epoch_loss += loss.item()
                steps += 1
-            
-            final_loss = epoch_loss / max(1, steps)
-            
+
+            train_loss = epoch_loss / max(1, steps)
+
+            # --- Validation (GPU에서 직접 수행) ---
+            self.model.eval()
+            with torch.no_grad():
+                if self.use_amp:
+                    with torch.amp.autocast('cuda'):
+                        val_out = self.model(x_val)
+                        val_loss = self.criterion(val_out, y_val).item()
+                else:
+                    val_out = self.model(x_val)
+                    val_loss = self.criterion(val_out, y_val).item()
+            self.model.train()
+
+            final_loss = train_loss
+            actual_epochs = epoch + 1
+
+            if val_loss < best_val_loss:
+                best_val_loss = val_loss
+                patience_counter = 0
+            else:
+                patience_counter += 1
+                if patience_counter >= self.patience:
+                    break
+
        self.training_status["is_training"] = False
        self.training_status["loss"] = final_loss
-            
-        # 3. 예측
+
+        if is_gpu:
+            torch.cuda.synchronize()
+
+        elapsed = time.time() - t_start
+        print(f"[AI] {ticker or '?'}: {actual_epochs} epochs in {elapsed:.1f}s"
+              f" | loss={final_loss:.6f} val={best_val_loss:.6f}"
+              f" | device={self.device}")
+
+        # 5. 체크포인트 저장
+        if ticker:
+            self._save_checkpoint(ticker, actual_epochs, final_loss)
+
+        # 6. 예측
        self.model.eval()
        with torch.no_grad():
-            last_seq = torch.FloatTensor(scaled_data[-self.seq_length:]).unsqueeze(0).to(self.device)
-            predicted_scaled = self.model(last_seq)
-            predicted_price = self.scaler.inverse_transform(predicted_scaled.cpu().numpy())[0][0]
-            
+            last_seq = torch.FloatTensor(
+                scaled_data[-self.seq_length:]
+            ).unsqueeze(0).to(self.device)
+
+            if self.use_amp:
+                with torch.amp.autocast('cuda'):
+                    predicted_scaled = self.model(last_seq)
+            else:
+                predicted_scaled = self.model(last_seq)
+
+            predicted_price = self.scaler.inverse_transform(
+                predicted_scaled.cpu().float().numpy())[0][0]
+
+        # 7. GPU 메모리 정리 + Ollama 리로드
+        if is_gpu:
+            # 학습 중간 텐서 해제
+            del x_all, y_all, x_train, y_train, x_val, y_val
+            torch.cuda.empty_cache()
+            _log_gpu_memory("post-train")
+            _preload_ollama()
+
        current_price = prices[-1]
        trend = "UP" if predicted_price > current_price else "DOWN"
        change_rate = ((predicted_price - current_price) / current_price) * 100
-        
-        # 신뢰도 점수 (Loss가 낮을수록 높음, 0~1)
-        # Loss가 0.001이면 0.99, 0.01이면 0.9 정도 나오게 조정
        confidence = 1.0 / (1.0 + (final_loss * 100))
-        
+
        return {
            "current": current_price,
            "predicted": float(predicted_price),
            "change_rate": round(change_rate, 2),
            "trend": trend,
            "loss": final_loss,
-            "confidence": round(confidence, 2)
+            "confidence": round(confidence, 2),
+            "epochs": actual_epochs,
+            "device": str(self.device)
        }
+
+    def batch_predict(self, prices_dict):
+        results = {}
+        seqs = []
+        metas = []
+
+        for ticker, prices in prices_dict.items():
+            if len(prices) < (self.seq_length + 10):
+                results[ticker] = None
+                continue
+
+            data = np.array(prices).reshape(-1, 1)
+            scaler = MinMaxScaler(feature_range=(0, 1))
+            scaled_data = scaler.fit_transform(data)
+
+            seq = torch.FloatTensor(scaled_data[-self.seq_length:]).unsqueeze(0)
+            seqs.append(seq)
+            metas.append((ticker, scaler, prices[-1]))
+
+        if not seqs:
+            return results
+
+        # 배치로 합쳐서 한번에 GPU 추론
+        batch = torch.cat(seqs, dim=0).to(self.device)
+
+        self.model.eval()
+        with torch.no_grad():
+            if self.use_amp:
+                with torch.amp.autocast('cuda'):
+                    preds = self.model(batch)
+            else:
+                preds = self.model(batch)
+
+            preds_cpu = preds.cpu().float().numpy()
+
+        for i, (ticker, scaler, current_price) in enumerate(metas):
+            predicted_price = scaler.inverse_transform(preds_cpu[i:i+1])[0][0]
+            trend = "UP" if predicted_price > current_price else "DOWN"
+            change_rate = ((predicted_price - current_price) / current_price) * 100
+
+            results[ticker] = {
+                "current": current_price,
+                "predicted": float(predicted_price),
+                "change_rate": round(change_rate, 2),
+                "trend": trend
+            }
+
+        if self.device.type == 'cuda':
+            torch.cuda.empty_cache()
+
+        return results