refactor: web-ai V1 assets → signal_v1/ (graduation prep)

Atomic mv of root V1 assets (main_server.py + modules/ + data/ +
tests/ + entry scripts + docs + logs) into signal_v1/ subdirectory.
load_dotenv() updated to load web-ai/.env explicitly via Path.

Adds web-ai/CLAUDE.md (workspace guide) and web-ai/start.bat
(signal_v1 entry wrapper). Prepares for signal_v2/ Phase 2.

Tests: signal_v1/tests/unit baseline preserved (no regression).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 03:00:11 +09:00
parent 42b91d03cf
commit 7ea1a21487
39 changed files with 722 additions and 691 deletions

View File

@@ -0,0 +1,110 @@
import psutil
from datetime import datetime
from modules.config import Config
class SystemMonitor:
def __init__(self, messenger, ollama_manager):
self.messenger = messenger
self.ollama_monitor = ollama_manager
self.last_health_check = datetime.now()
# CPU 서킷 브레이커 상태
self._cpu_overload_count = 0 # 연속 과부하 횟수
self._circuit_open = False # 서킷 브레이커 발동 여부
self._circuit_open_since = None
def is_cpu_critical(self):
"""서킷 브레이커가 발동 상태인지 반환 (True이면 분석 사이클 스킵)"""
return self._circuit_open
def reset_circuit(self):
"""서킷 브레이커 수동 리셋"""
if self._circuit_open:
print("[Monitor] CPU Circuit Breaker RESET")
self._circuit_open = False
self._cpu_overload_count = 0
self._circuit_open_since = None
def check_health(self):
"""시스템 상태 점검 및 알림 (CPU, RAM, GPU) - 3분마다 실행"""
now = datetime.now()
if (now - self.last_health_check).total_seconds() < 180:
return
self.last_health_check = now
alerts = []
# 1. CPU Check
cpu_usage = psutil.cpu_percent(interval=1) # 1초 측정 (더 정확)
if cpu_usage > Config.CPU_CIRCUIT_BREAKER_THRESHOLD:
self._cpu_overload_count += 1
# 상위 프로세스 조회
top_processes = []
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent']):
try:
if proc.info['name'] in ('System Idle Process', 'Idle'):
continue
top_processes.append(proc.info)
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
top_processes.sort(key=lambda x: x['cpu_percent'], reverse=True)
top_3_str = ""
for p in top_processes[:3]:
top_3_str += f"\n- {p['name']} ({p['cpu_percent']}%)"
# 서킷 브레이커 발동 조건
if self._cpu_overload_count >= Config.CPU_CIRCUIT_BREAKER_CONSECUTIVE:
if not self._circuit_open:
self._circuit_open = True
self._circuit_open_since = now
alerts.append(
f"🔴 [CPU Circuit Breaker OPEN] {cpu_usage}% × {self._cpu_overload_count}회 연속\n"
f"⛔ 분석 사이클 일시 중단 (5분 후 자동 복구)\nTop Processes:{top_3_str}"
)
print(f"[Monitor] CPU Circuit Breaker OPEN! CPU={cpu_usage}%")
else:
alerts.append(
f"⚠️ [CPU Overload] Usage: {cpu_usage}% ({self._cpu_overload_count}회)\nTop Processes:{top_3_str}"
)
else:
# CPU 정상 → 카운터 리셋
if self._cpu_overload_count > 0:
print(f"[Monitor] CPU 정상화 ({cpu_usage}%). 카운터 리셋.")
self._cpu_overload_count = 0
# 서킷 브레이커가 열린 후 5분 경과 시 자동 복구
if self._circuit_open and self._circuit_open_since:
elapsed = (now - self._circuit_open_since).total_seconds()
if elapsed >= 300: # 5분
self._circuit_open = False
self._circuit_open_since = None
alerts.append("✅ [CPU Circuit Breaker CLOSED] 시스템 안정화. 분석 재개.")
print("[Monitor] CPU Circuit Breaker CLOSED. 분석 재개.")
# 2. RAM Check
ram = psutil.virtual_memory()
if ram.percent > 90:
alerts.append(f"⚠️ [RAM High] Usage: {ram.percent}% (Free: {ram.available / 1024**3:.1f}GB)")
# 3. GPU Check
if self.ollama_monitor:
gpu_status = self.ollama_monitor.get_gpu_status()
temp = gpu_status.get('temp', 0)
if temp > 80:
alerts.append(f"🔥 [GPU Overheat] Temp: {temp}°C")
# 알림 전송 (텔레그램 비활성화 - 콘솔 로그만 사용)
if alerts:
# 콘솔에만 출력
for alert in alerts:
print(f"[Monitor] {alert}")
# [비활성화] 텔레그램 알림 - 필요시 재활성화
# msg = "🔔 <b>[System Health Alert]</b>\n" + "\n".join(alerts)
# if self.messenger:
# self.messenger.send_message(msg)