From b49cc14ef38664d88a896bce4179ac40887a7b63 Mon Sep 17 00:00:00 2001 From: gahusb Date: Mon, 29 Jun 2026 18:13:33 +0900 Subject: [PATCH] =?UTF-8?q?fix(agent-office):=20dead-letter=20=5Fdl=5Fnoti?= =?UTF-8?q?fied=20=EA=B0=B1=EC=8B=A0=EC=9D=84=20=EB=B0=9C=EC=86=A1?= =?UTF-8?q?=EC=84=B1=EA=B3=B5=20=EC=8B=9C=EB=A1=9C=20=ED=95=9C=EC=A0=95=20?= =?UTF-8?q?+=20collect=5Fstatus=20=EC=98=88=EC=99=B8=EB=B0=A9=EC=96=B4=20(?= =?UTF-8?q?B4=20=EB=A6=AC=EB=B7=B0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _dl_notified[name] = dl을 if ok: 블록 안으로 이동 — 텔레그램 실패 시 갱신 방지 - check_and_alert에 collect_status try/except 추가 — 스케줄러 잡 생존 보장 - tests: import app.node_monitor as nm 최상단 이동 - tests: test_dl_notified_not_updated_on_telegram_failure 회귀 테스트 추가 Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_019LV86jBozkNhSFXJA412fq --- agent-office/app/node_monitor.py | 8 ++++++-- agent-office/tests/test_node_monitor.py | 25 +++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/agent-office/app/node_monitor.py b/agent-office/app/node_monitor.py index f7d89af..954cd6e 100644 --- a/agent-office/app/node_monitor.py +++ b/agent-office/app/node_monitor.py @@ -107,7 +107,11 @@ async def check_and_alert(status=None) -> list[str]: """ from .telegram.messaging import send_raw from .db import add_log - st = status or await collect_status() + try: + st = status or await collect_status() + except Exception: + logger.exception("collect_status 예외") + return [] sent: list[str] = [] for w in st["workers"]: name, alive = w["name"], w.get("alive", False) @@ -129,7 +133,7 @@ async def check_and_alert(status=None) -> list[str]: if (await send_raw(text=text)).get("ok"): add_log("node_monitor", f"{name} dead-letter {dl}", "warning") sent.append(text) - _dl_notified[name] = dl + _dl_notified[name] = dl elif dl == 0: _dl_notified.pop(name, None) return sent diff --git a/agent-office/tests/test_node_monitor.py b/agent-office/tests/test_node_monitor.py index 4287cea..c2f0918 100644 --- a/agent-office/tests/test_node_monitor.py +++ b/agent-office/tests/test_node_monitor.py @@ -1,6 +1,7 @@ # agent-office/tests/test_node_monitor.py import json, pytest from app import node_monitor +import app.node_monitor as nm class FakeRedis: """worker heartbeat + queue llen + scan_iter 흉내.""" @@ -98,8 +99,6 @@ async def test_llen_exception_returns_redis_ok_false(): assert st["redis_ok"] is False -import app.node_monitor as nm - @pytest.mark.asyncio async def test_alert_on_alive_to_dead(monkeypatch): sent = [] @@ -124,3 +123,25 @@ async def test_alert_on_dead_letter_growth(monkeypatch): s = {"workers": [{"name":"video-render","alive":True,"dead_letter":2}], "links": []} await nm.check_and_alert(status=s) assert any("dead-letter" in t for t in sent) + +@pytest.mark.asyncio +async def test_dl_notified_not_updated_on_telegram_failure(monkeypatch): + """텔레그램 실패(ok=False) 시 _dl_notified 갱신 안 됨 → 다음 사이클에서 재시도.""" + calls = [] + async def fake_send_raw(text, **kw): + calls.append(text) + if len(calls) == 1: + return {"ok": False} # 첫 호출: 텔레그램 다운 + return {"ok": True} # 두 번째 호출: 성공 + monkeypatch.setattr("app.telegram.messaging.send_raw", fake_send_raw) + monkeypatch.setattr("app.db.add_log", lambda *a, **k: None) + nm._node_state.clear(); nm._dl_notified.clear() + s = {"workers": [{"name": "video-render", "alive": True, "dead_letter": 2}], "links": []} + # 첫 호출: 텔레그램 다운 → ok=False → _dl_notified 갱신 안 됨 + result1 = await nm.check_and_alert(status=s) + assert result1 == [] + assert nm._dl_notified.get("video-render", 0) == 0 + # 두 번째 호출: 같은 dl=2 → _dl_notified 미갱신으로 조건 재만족 → 재시도 발송 + result2 = await nm.check_and_alert(status=s) + assert any("dead-letter" in t for t in result2) + assert nm._dl_notified.get("video-render") == 2