주식자동매매 AI 프로그램 초기 모델

2026-02-04 23:29:06 +09:00
parent 41df1a38d3
commit 7d5f62f844
20 changed files with 2987 additions and 0 deletions
--- a/modules/analysis/deep_learning.py
+++ b/modules/analysis/deep_learning.py
@@ -0,0 +1,206 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+
+class Attention(nn.Module):
+    """Attention Mechanism for LSTM"""
+    def __init__(self, hidden_size):
+        super(Attention, self).__init__()
+        self.hidden_size = hidden_size
+        self.attn = nn.Linear(hidden_size, 1)
+        
+    def forward(self, lstm_output):
+        # lstm_output: [batch_size, seq_len, hidden_size]
+        # attn_weights: [batch_size, seq_len, 1]
+        attn_weights = torch.softmax(self.attn(lstm_output), dim=1)
+        # context: [batch_size, hidden_size]
+        context = torch.sum(attn_weights * lstm_output, dim=1)
+        return context, attn_weights
+
+class AdvancedLSTM(nn.Module):
+    """
+    [RTX 5070 Ti Optimized] High-Capacity LSTM with Attention
+    - Hidden Size: 512 (Rich Feature Extraction)
+    - Layers: 4 (Deep Reasoning)
+    - Attention: Focus on critical time steps
+    """
+    def __init__(self, input_size=1, hidden_size=512, num_layers=4, output_size=1, dropout=0.3):
+        super(AdvancedLSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
+                           batch_first=True, dropout=dropout)
+        
+        self.attention = Attention(hidden_size)
+        
+        self.fc = nn.Sequential(
+            nn.Linear(hidden_size, hidden_size // 2),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_size // 2, hidden_size // 4),
+            nn.ReLU(),
+            nn.Linear(hidden_size // 4, output_size)
+        )
+
+    def forward(self, x):
+        # x: [batch, seq, feature]
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
+        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
+        
+        # LSTM Output
+        lstm_out, _ = self.lstm(x, (h0, c0)) # [batch, seq, hidden]
+        
+        # Attention Mechanism
+        context, _ = self.attention(lstm_out) # [batch, hidden]
+        
+        # Final Prediction
+        out = self.fc(context)
+        return out
+
+class PricePredictor:
+    """
+    주가 예측을 위한 고성능 Deep Learning 모델 (RTX 5070 Ti Edition)
+    """
+    def __init__(self):
+        self.scaler = MinMaxScaler(feature_range=(0, 1))
+        
+        # [Hardware Spec] RTX 5070 Ti (16GB VRAM) 맞춤 설정
+        self.hidden_size = 512
+        self.num_layers = 4
+        
+        self.model = AdvancedLSTM(input_size=1, hidden_size=self.hidden_size, 
+                                num_layers=self.num_layers, dropout=0.3)
+        self.criterion = nn.MSELoss()
+        
+        # CUDA 설정
+        self.device = torch.device('cpu')
+        
+        if torch.cuda.is_available():
+            try:
+                gpu_name = torch.cuda.get_device_name(0)
+                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
+                
+                # GPU 할당
+                self.device = torch.device('cuda')
+                self.model.to(self.device)
+                
+                # Warm-up (컴파일 최적화 유도)
+                dummy = torch.zeros(1, 60, 1).to(self.device)
+                _ = self.model(dummy)
+                
+                print(f"🚀 [AI] Powered by {gpu_name} ({vram_gb:.1f}GB) - High Performance Mode On")
+                
+            except Exception as e:
+                print(f"⚠️ [AI] GPU Init Failed: {e}")
+                self.device = torch.device('cpu')
+        else:
+            print("⚠️ [AI] Running on CPU (Low Performance)")
+        
+        # Optimizer 설정 (AdamW가 일반화 성능이 좀 더 좋음)
+        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=0.0005, weight_decay=1e-4)
+        
+        # 학습 파라미터 강화
+        self.batch_size = 64
+        self.epochs = 200 # 충분한 학습
+        self.seq_length = 60 # 60일(약 3개월) 패턴 분석
+        
+        self.training_status = {
+            "is_training": False,
+            "loss": 0.0
+        }
+
+    @staticmethod
+    def verify_hardware():
+        """서버 시작 시 하드웨어 가속 여부 점검 및 로그 출력"""
+        if torch.cuda.is_available():
+            try:
+                gpu_name = torch.cuda.get_device_name(0)
+                vram_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
+                print(f"🚀 [AI Check] Hardware Detected: {gpu_name} ({vram_gb:.1f}GB VRAM)")
+                print(f"   ✅ High Performance Mode is READY.")
+                return True
+            except Exception as e:
+                print(f"⚠️ [AI Check] GPU Error: {e}")
+                return False
+        else:
+            print("⚠️ [AI Check] No GPU Detected. Running in CPU Mode.")
+            return False
+
+    def train_and_predict(self, prices, forecast_days=1):
+        """
+        Online Learning & Prediction
+        """
+        # 데이터가 최소 시퀀스 길이 + 여유분보다 적으면 예측 불가
+        if len(prices) < (self.seq_length + 10):
+            return None 
+            
+        # 1. 데이터 전처리
+        data = np.array(prices).reshape(-1, 1)
+        scaled_data = self.scaler.fit_transform(data)
+        
+        x_train, y_train = [], []
+        for i in range(len(scaled_data) - self.seq_length):
+            x_train.append(scaled_data[i:i+self.seq_length])
+            y_train.append(scaled_data[i+self.seq_length])
+            
+        x_train_t = torch.FloatTensor(np.array(x_train)).to(self.device)
+        y_train_t = torch.FloatTensor(np.array(y_train)).to(self.device)
+        
+        # 2. 학습
+        self.model.train()
+        self.training_status["is_training"] = True
+        
+        dataset_size = len(x_train_t)
+        final_loss = 0.0
+        
+        for epoch in range(self.epochs):
+            perm = torch.randperm(dataset_size).to(self.device)
+            x_shuffled = x_train_t[perm]
+            y_shuffled = y_train_t[perm]
+            
+            epoch_loss = 0.0
+            steps = 0
+            
+            for i in range(0, dataset_size, self.batch_size):
+                batch_x = x_shuffled[i:min(i+self.batch_size, dataset_size)]
+                batch_y = y_shuffled[i:min(i+self.batch_size, dataset_size)]
+                
+                self.optimizer.zero_grad()
+                outputs = self.model(batch_x)
+                loss = self.criterion(outputs, batch_y)
+                loss.backward()
+                self.optimizer.step()
+                
+                epoch_loss += loss.item()
+                steps += 1
+            
+            final_loss = epoch_loss / max(1, steps)
+            
+        self.training_status["is_training"] = False
+        self.training_status["loss"] = final_loss
+            
+        # 3. 예측
+        self.model.eval()
+        with torch.no_grad():
+            last_seq = torch.FloatTensor(scaled_data[-self.seq_length:]).unsqueeze(0).to(self.device)
+            predicted_scaled = self.model(last_seq)
+            predicted_price = self.scaler.inverse_transform(predicted_scaled.cpu().numpy())[0][0]
+            
+        current_price = prices[-1]
+        trend = "UP" if predicted_price > current_price else "DOWN"
+        change_rate = ((predicted_price - current_price) / current_price) * 100
+        
+        # 신뢰도 점수 (Loss가 낮을수록 높음, 0~1)
+        # Loss가 0.001이면 0.99, 0.01이면 0.9 정도 나오게 조정
+        confidence = 1.0 / (1.0 + (final_loss * 100))
+        
+        return {
+            "current": current_price,
+            "predicted": float(predicted_price),
+            "change_rate": round(change_rate, 2),
+            "trend": trend,
+            "loss": final_loss,
+            "confidence": round(confidence, 2)
+        }
--- a/modules/analysis/macro.py
+++ b/modules/analysis/macro.py
@@ -0,0 +1,139 @@
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from modules.services.kis import KISClient
+
+class MacroAnalyzer:
+    """
+    KIS API를 활용한 거시경제(시장 지수) 분석 모듈
+    yfinance 대신 한국투자증권 API를 사용하여 안정적인 KOSPI, KOSDAQ 데이터를 수집함.
+    """
+    
+    @staticmethod
+    def get_macro_status(kis_client):
+        """
+        시장 주요 지수(KOSPI, KOSDAQ)를 조회하여 시장 위험도를 평가함.
+        Args:
+            kis_client (KISClient): 인증된 KIS API 클라이언트 인스턴스
+            
+        Returns:
+            dict: 시장 상태 (SAFE, CAUTION, DANGER) 및 지표 데이터
+        """
+        indicators = {
+            "KOSPI": "0001",
+            "KOSDAQ": "1001"
+        }
+        
+        results = {}
+        risk_score = 0
+        
+        print("🌍 [Macro] Fetching market indices via KIS API...")
+        
+        for name, code in indicators.items():
+            data = kis_client.get_current_index(code)
+            if data:
+                price = data['price']
+                change = data['change']
+                results[name] = {"price": price, "change": change}
+                print(f"   - {name}: {price} ({change}%)")
+                
+                # 리스크 평가 로직 (단순화: 2% 이상 폭락 장이면 위험)
+                if change <= -2.0:
+                    risk_score += 2 # 패닉 상태
+                elif change <= -1.0:
+                    risk_score += 1 # 주의 상태
+            else:
+                results[name] = {"price": 0, "change": 0}
+        
+        # [신규] 시장 스트레스 지수(MSI) 추가
+        kospi_stress = MacroAnalyzer.calculate_stress_index(kis_client, "0001")
+        results['MSI'] = kospi_stress
+        print(f"   - Market Stress Index: {kospi_stress}")
+        
+        if kospi_stress >= 50:
+            risk_score += 2 # 매우 위험
+        elif kospi_stress >= 30:
+            risk_score += 1 # 위험
+
+        # 시장 상태 정의
+        status = "SAFE"
+        if risk_score >= 2:
+            status = "DANGER" # 매수 중단 권장
+        elif risk_score >= 1:
+            status = "CAUTION" # 보수적 매매
+            
+        return {
+            "status": status,
+            "risk_score": risk_score, 
+            "indicators": results
+        }
+
+    @staticmethod
+    def calculate_stress_index(kis_client, market_code="0001"):
+        """
+        시장 스트레스 지수(MSI) 계산
+        - 0~100 사이의 값 (높을수록 위험)
+        - 요소: 변동성(Volatility), 추세 이격도(MA Divergence)
+        """
+        import numpy as np
+        
+        # 일봉 데이터 조회 (약 3개월치 = 60일 이상)
+        prices = kis_client.get_daily_index_price(market_code, period="D")
+        if not prices or len(prices) < 20:
+            return 0
+            
+        prices = np.array(prices)
+        
+        # 1. 역사적 변동성 (20일)
+        # 로그 수익률 계산
+        returns = np.diff(np.log(prices)) 
+        # 연환산 변동성 (Trading days = 252)
+        volatility = np.std(returns[-20:]) * np.sqrt(252) * 100 
+        
+        # 2. 이동평균 이격도
+        ma20 = np.mean(prices[-20:])
+        current_price = prices[-1]
+        disparity = (current_price - ma20) / ma20 * 100
+        
+        # 3. 스트레스 점수 산출
+        # 변동성이 20% 넘어가면 위험, 이격도가 -5% 이하면 위험
+        stress_score = 0
+        
+        # 변동성 기여 (평소 10~15%, 30% 이상 공포)
+        # 10 이하면 0점, 40 이상이면 60점 만점
+        v_score = min(max((volatility - 10) * 2, 0), 60)
+        
+        # 하락 추세 기여 (-10% 이격이면 +40점)
+        d_score = 0
+        if disparity < 0:
+            d_score = min(abs(disparity) * 4, 40)
+            
+        total_stress = v_score + d_score
+        return round(total_stress, 2)
+
+if __name__ == "__main__":
+    # 테스트를 위한 코드
+    load_dotenv()
+    
+    # 환경변수 로딩 및 클라이언트 초기화
+    if os.getenv("KIS_ENV_TYPE") == "real":
+        app_key = os.getenv("KIS_REAL_APP_KEY")
+        app_secret = os.getenv("KIS_REAL_APP_SECRET")
+        account = os.getenv("KIS_REAL_ACCOUNT")
+        is_virtual = False
+    else:
+        app_key = os.getenv("KIS_VIRTUAL_APP_KEY")
+        app_secret = os.getenv("KIS_VIRTUAL_APP_SECRET")
+        account = os.getenv("KIS_VIRTUAL_ACCOUNT")
+        is_virtual = True
+        
+    kis = KISClient(app_key, app_secret, account, is_virtual)
+    
+    # 토큰 발급 (필요 시)
+    kis.ensure_token()
+    
+    # 분석 실행
+    report = MacroAnalyzer.get_macro_status(kis)
+    print("\n📊 [Macro Report]")
+    print(f"Status: {report['status']}")
+    print(f"Data: {report['indicators']}")
--- a/modules/analysis/technical.py
+++ b/modules/analysis/technical.py
@@ -0,0 +1,190 @@
+import pandas as pd
+import numpy as np
+
+class TechnicalAnalyzer:
+    """
+    Pandas를 활용한 기술적 지표 계산 모듈
+    CPU 멀티코어 성능(9800X3D)을 십분 활용하기 위해 복잡한 연산은 여기서 처리
+    """
+    
+    @staticmethod
+    def calculate_rsi(prices, period=14):
+        """RSI(Relative Strength Index) 계산"""
+        if len(prices) < period:
+            return 50.0 # 데이터 부족 시 중립
+            
+        delta = pd.Series(prices).diff()
+        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
+        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
+        
+        rs = gain / loss
+        rsi = 100 - (100 / (1 + rs))
+        return rsi.iloc[-1]
+
+    @staticmethod
+    def calculate_ma(prices, period=20):
+        """이동평균선(Moving Average) 계산"""
+        if len(prices) < period:
+            return prices[-1] if prices else 0
+        return pd.Series(prices).rolling(window=period).mean().iloc[-1]
+
+    @staticmethod
+    def calculate_macd(prices, fast=12, slow=26, signal=9):
+        """MACD (Moving Average Convergence Divergence) 계산"""
+        if len(prices) < slow + signal:
+            return 0, 0, 0 # 데이터 부족
+            
+        s = pd.Series(prices)
+        ema_fast = s.ewm(span=fast, adjust=False).mean()
+        ema_slow = s.ewm(span=slow, adjust=False).mean()
+        macd = ema_fast - ema_slow
+        signal_line = macd.ewm(span=signal, adjust=False).mean()
+        histogram = macd - signal_line
+        
+        return macd.iloc[-1], signal_line.iloc[-1], histogram.iloc[-1]
+
+    @staticmethod
+    def calculate_bollinger_bands(prices, period=20, num_std=2):
+        """Bollinger Bands 계산 (상단, 중단, 하단)"""
+        if len(prices) < period:
+            return 0, 0, 0
+            
+        s = pd.Series(prices)
+        sma = s.rolling(window=period).mean()
+        std = s.rolling(window=period).std()
+        
+        upper = sma + (std * num_std)
+        lower = sma - (std * num_std)
+        
+        return upper.iloc[-1], sma.iloc[-1], lower.iloc[-1]
+
+    @staticmethod
+    def calculate_stochastic(prices, high_prices=None, low_prices=None, n=14, k=3, d=3):
+        """Stochastic Oscillator (Fast/Slow)
+        고가/저가 데이터가 없으면 종가(prices)로 추정 계산
+        """
+        if len(prices) < n:
+            return 50, 50
+            
+        close = pd.Series(prices)
+        # 고가/저가 데이터가 별도로 없으면 종가로 대체 (정확도는 떨어짐)
+        high = pd.Series(high_prices) if high_prices else close
+        low = pd.Series(low_prices) if low_prices else close
+        
+        # 최근 n일간 최고가/최저가
+        highest_high = high.rolling(window=n).max()
+        lowest_low = low.rolling(window=n).min()
+        
+        # Fast %K
+        fast_k = ((close - lowest_low) / (highest_high - lowest_low + 1e-9)) * 100
+        # Slow %K (= Fast %D)
+        slow_k = fast_k.rolling(window=k).mean()
+        # Slow %D
+        slow_d = slow_k.rolling(window=d).mean()
+        
+        return slow_k.iloc[-1], slow_d.iloc[-1]
+
+    @staticmethod
+    def get_technical_score(current_price, prices_history, volume_history=None):
+        """
+        기술적 지표 통합 점수(0.0 ~ 1.0) 계산 (고도화됨)
+        - RSI, 이격도, MACD, Bollinger Bands, Stochastic 종합
+        - [New] Volume Analysis (Whale Activity)
+        """
+        if not prices_history or len(prices_history) < 30:
+            return 0.5, 50.0 # 데이터 부족 시 중립
+            
+        scores = []
+        
+        # 1. RSI (비중 30%)
+        # 30 이하(과매도) -> 1.0, 70 이상(과매수) -> 0.0
+        rsi = TechnicalAnalyzer.calculate_rsi(prices_history)
+        if rsi <= 30: rsi_score = 1.0
+        elif rsi >= 70: rsi_score = 0.0
+        else: rsi_score = 1.0 - ((rsi - 30) / 40.0) # 선형 보간
+        scores.append(rsi_score * 0.3)
+        
+        # 2. 이격도 (비중 20%)
+        ma20 = TechnicalAnalyzer.calculate_ma(prices_history, 20)
+        disparity = (current_price - ma20) / ma20
+        # 이격도가 마이너스일수록(저평가) 점수 높음
+        if disparity < -0.05: disp_score = 1.0     # -5% 이상 하락
+        elif disparity > 0.05: disp_score = 0.0    # +5% 이상 상승
+        else: disp_score = 0.5 - (disparity * 10)  # -0.05~0.05 사이
+        scores.append(disp_score * 0.2)
+        
+        # 3. MACD (비중 20%)
+        # MACD가 Signal선 위에 있으면 상승세 (매수)
+        macd, signal, hist = TechnicalAnalyzer.calculate_macd(prices_history)
+        if hist > 0 and macd > 0: macd_score = 0.8  # 상승 추세 가속
+        elif hist > 0 and macd <= 0: macd_score = 0.6 # 상승 반전 초기
+        elif hist < 0 and macd > 0: macd_score = 0.4 # 하락 반전 초기
+        else: macd_score = 0.2 # 하락 추세
+        scores.append(macd_score * 0.2)
+        
+        # 4. Bollinger Bands (비중 15%)
+        # 하단 밴드 근처 -> 매수(1.0), 상단 밴드 근처 -> 매도(0.0)
+        up, mid, low = TechnicalAnalyzer.calculate_bollinger_bands(prices_history)
+        if current_price <= low: bb_score = 1.0
+        bb_score_base = 0.0
+        if current_price <= low: bb_score_base = 1.0
+        elif current_price >= up: bb_score_base = 0.0
+        else:
+            # 밴드 내 위치 비율 (Position %B) 유사 계산
+            # 하단(0) ~ 상단(1) -> 점수는 1 ~ 0 역순
+            pos = (current_price - low) / (up - low + 1e-9)
+            bb_score_base = 1.0 - pos
+        
+        # 추가 점수 로직 (기존 tech_score += 0.2를 bb_score에 반영)
+        if current_price < low:  # 과매도 (저점 매수 기회)
+            bb_score = min(1.0, bb_score_base + 0.2) # 최대 1.0
+        else:
+            bb_score = bb_score_base
+        scores.append(bb_score * 0.15)
+        
+        # 5. Stochastic (비중 15%)
+        # K가 20 미만 -> 과매도(매수), 80 이상 -> 과매수(매도)
+        slow_k, slow_d = TechnicalAnalyzer.calculate_stochastic(prices_history)
+        st_score_base = 0.0
+        if slow_k < 20: st_score_base = 1.0
+        elif slow_k > 80: st_score_base = 0.0
+        else: st_score_base = 1.0 - (slow_k / 100.0)
+        
+        # 추가 점수 로직 (기존 tech_score += 0.2 / -= 0.1를 st_score에 반영)
+        if slow_k < 20: # 과매도
+             st_score = min(1.0, st_score_base + 0.2)
+        elif slow_k > 80: # 과매수
+             st_score = max(0.0, st_score_base - 0.1)
+        else:
+             st_score = st_score_base
+        scores.append(st_score * 0.15)
+        
+        total_score = sum(scores)
+        
+        # [신규] 거래량 폭증 분석 (Whale Tracking)
+        volume_ratio = 1.0
+        if volume_history and len(volume_history) >= 5:
+            vol_s = pd.Series(volume_history)
+            avg_vol = vol_s.rolling(window=5).mean().iloc[-2] # 어제까지의 5일 평균
+            current_vol = volume_history[-1]
+            if avg_vol > 0:
+                volume_ratio = current_vol / avg_vol
+                
+            # 평소 거래량의 3배(300%) 이상 터지면 세력 유입 가능성 높음 -> 가산점
+            if volume_ratio >= 3.0:
+                 total_score += 0.1 # 강력한 매수 신호
+        
+        # 0.0 ~ 1.0 클리핑
+        total_score = max(0.0, min(1.0, total_score))
+        
+        # [신규] 변동성(Volatility) 계산
+        # 최근 20일간 일일 변동폭의 표준편차를 평균 가격으로 나눔
+        if len(prices_history) > 1:
+            # list 입력 대응
+            prices_np = np.array(prices_history)
+            changes = np.diff(prices_np) / prices_np[:-1]
+            volatility = np.std(changes) * 100 # 퍼센트 단위
+        else:
+            volatility = 0.0
+
+        return round(total_score, 4), round(rsi, 2), round(volatility, 2), round(volume_ratio, 1)