diff --git a/agent-office/app/node_monitor.py b/agent-office/app/node_monitor.py index f7d89af..954cd6e 100644 --- a/agent-office/app/node_monitor.py +++ b/agent-office/app/node_monitor.py @@ -107,7 +107,11 @@ async def check_and_alert(status=None) -> list[str]: """ from .telegram.messaging import send_raw from .db import add_log - st = status or await collect_status() + try: + st = status or await collect_status() + except Exception: + logger.exception("collect_status 예외") + return [] sent: list[str] = [] for w in st["workers"]: name, alive = w["name"], w.get("alive", False) @@ -129,7 +133,7 @@ async def check_and_alert(status=None) -> list[str]: if (await send_raw(text=text)).get("ok"): add_log("node_monitor", f"{name} dead-letter {dl}", "warning") sent.append(text) - _dl_notified[name] = dl + _dl_notified[name] = dl elif dl == 0: _dl_notified.pop(name, None) return sent diff --git a/agent-office/tests/test_node_monitor.py b/agent-office/tests/test_node_monitor.py index 4287cea..c2f0918 100644 --- a/agent-office/tests/test_node_monitor.py +++ b/agent-office/tests/test_node_monitor.py @@ -1,6 +1,7 @@ # agent-office/tests/test_node_monitor.py import json, pytest from app import node_monitor +import app.node_monitor as nm class FakeRedis: """worker heartbeat + queue llen + scan_iter 흉내.""" @@ -98,8 +99,6 @@ async def test_llen_exception_returns_redis_ok_false(): assert st["redis_ok"] is False -import app.node_monitor as nm - @pytest.mark.asyncio async def test_alert_on_alive_to_dead(monkeypatch): sent = [] @@ -124,3 +123,25 @@ async def test_alert_on_dead_letter_growth(monkeypatch): s = {"workers": [{"name":"video-render","alive":True,"dead_letter":2}], "links": []} await nm.check_and_alert(status=s) assert any("dead-letter" in t for t in sent) + +@pytest.mark.asyncio +async def test_dl_notified_not_updated_on_telegram_failure(monkeypatch): + """텔레그램 실패(ok=False) 시 _dl_notified 갱신 안 됨 → 다음 사이클에서 재시도.""" + calls = [] + async def fake_send_raw(text, **kw): + calls.append(text) + if len(calls) == 1: + return {"ok": False} # 첫 호출: 텔레그램 다운 + return {"ok": True} # 두 번째 호출: 성공 + monkeypatch.setattr("app.telegram.messaging.send_raw", fake_send_raw) + monkeypatch.setattr("app.db.add_log", lambda *a, **k: None) + nm._node_state.clear(); nm._dl_notified.clear() + s = {"workers": [{"name": "video-render", "alive": True, "dead_letter": 2}], "links": []} + # 첫 호출: 텔레그램 다운 → ok=False → _dl_notified 갱신 안 됨 + result1 = await nm.check_and_alert(status=s) + assert result1 == [] + assert nm._dl_notified.get("video-render", 0) == 0 + # 두 번째 호출: 같은 dl=2 → _dl_notified 미갱신으로 조건 재만족 → 재시도 발송 + result2 = await nm.check_and_alert(status=s) + assert any("dead-letter" in t for t in result2) + assert nm._dl_notified.get("video-render") == 2