""" 프로세스 생명주기 관리 - 메모리 기반 PID 관리 (pids.txt 폐기) - Watchdog 헬스체크 - 자동 재시작 (최대 3회) """ import os import time import threading from multiprocessing.shared_memory import SharedMemory from modules.config import Config class ProcessTracker: """메모리 기반 프로세스 추적기""" # 클래스 변수: 등록된 프로세스 정보 _processes = {} # {name: pid} _lock = threading.Lock() # 하위 호환: 기존 pids.txt 정리용 FILE_PATH = "pids.txt" @staticmethod def register(name): """현재 프로세스 등록 (메모리 기반)""" pid = os.getpid() with ProcessTracker._lock: ProcessTracker._processes[name] = pid print(f"[Process] Registered: {name} (PID: {pid})") @staticmethod def unregister(name): """프로세스 등록 해제""" with ProcessTracker._lock: ProcessTracker._processes.pop(name, None) @staticmethod def get_all(): """등록된 모든 프로세스 반환""" with ProcessTracker._lock: return dict(ProcessTracker._processes) @staticmethod def check_and_kill_zombies(): """이전 실행의 좀비 프로세스 정리 + stale SharedMemory 정리""" # 1. pids.txt 기반 좀비 정리 (하위 호환) if os.path.exists(ProcessTracker.FILE_PATH): try: import psutil current_pid = os.getpid() with open(ProcessTracker.FILE_PATH, "r", encoding="utf-8") as f: lines = f.readlines() killed_count = 0 for line in lines: if ":" not in line or "Running Processes" in line: continue try: pid = int(line.split(":")[0].strip()) if pid == current_pid: continue if psutil.pid_exists(pid): proc = psutil.Process(pid) if "python" in proc.name().lower(): print(f"[Process] Killing zombie: PID {pid} ({line.strip()})") proc.kill() killed_count += 1 except (ValueError, psutil.NoSuchProcess, psutil.AccessDenied): continue if killed_count > 0: print(f"[Process] Cleaned up {killed_count} zombie processes.") except Exception as e: print(f"[Process] Zombie cleanup failed: {e}") # pids.txt 삭제 (더 이상 사용하지 않음) try: os.remove(ProcessTracker.FILE_PATH) except Exception: pass # 2. Stale SharedMemory 정리 try: shm = SharedMemory(name=Config.SHM_NAME, create=False) shm.close() shm.unlink() print(f"[Process] Cleaned stale SharedMemory: {Config.SHM_NAME}") except FileNotFoundError: pass except Exception: pass @staticmethod def clear(): """등록 정보 초기화""" with ProcessTracker._lock: ProcessTracker._processes.clear() class ProcessWatchdog: """자식 프로세스 감시 및 자동 재시작""" def __init__(self, shutdown_event=None): self.shutdown_event = shutdown_event self._watched = {} # {name: {process, target, args, restart_count}} self._thread = None self._running = False def watch(self, name, process, target, args=()): """프로세스를 감시 대상에 등록""" self._watched[name] = { 'process': process, 'target': target, 'args': args, 'restart_count': 0 } def start(self): """Watchdog 스레드 시작""" self._running = True self._thread = threading.Thread(target=self._watchdog_loop, daemon=True) self._thread.start() print(f"[Watchdog] Started (interval: {Config.WATCHDOG_INTERVAL}s)") def stop(self): """Watchdog 중지""" self._running = False if self._thread: self._thread.join(timeout=5) def get_process(self, name): """감시 중인 프로세스 반환""" entry = self._watched.get(name) return entry['process'] if entry else None def _watchdog_loop(self): """주기적으로 자식 프로세스 상태 확인""" import multiprocessing while self._running: if self.shutdown_event and self.shutdown_event.is_set(): break for name, entry in list(self._watched.items()): proc = entry['process'] if proc.is_alive(): continue # 프로세스가 죽었음 exit_code = proc.exitcode restart_count = entry['restart_count'] if restart_count >= Config.MAX_RESTART_COUNT: print(f"[Watchdog] {name} crashed (exit={exit_code}). " f"Max restarts ({Config.MAX_RESTART_COUNT}) reached. Giving up.") continue print(f"[Watchdog] {name} crashed (exit={exit_code}). " f"Restarting... ({restart_count + 1}/{Config.MAX_RESTART_COUNT})") try: new_proc = multiprocessing.Process( target=entry['target'], args=entry['args'] ) new_proc.start() entry['process'] = new_proc entry['restart_count'] = restart_count + 1 print(f"[Watchdog] {name} restarted (new PID: {new_proc.pid})") except Exception as e: print(f"[Watchdog] Failed to restart {name}: {e}") # 인터벌 대기 (shutdown_event 체크하면서) for _ in range(Config.WATCHDOG_INTERVAL): if not self._running: break if self.shutdown_event and self.shutdown_event.is_set(): break time.sleep(1)