fix(agent-office): dead-letter _dl_notified 갱신을 발송성공 시로 한정 + collect_status 예외방어 (B4 리뷰)

- _dl_notified[name] = dl을 if ok: 블록 안으로 이동 — 텔레그램 실패 시 갱신 방지
- check_and_alert에 collect_status try/except 추가 — 스케줄러 잡 생존 보장
- tests: import app.node_monitor as nm 최상단 이동
- tests: test_dl_notified_not_updated_on_telegram_failure 회귀 테스트 추가

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_019LV86jBozkNhSFXJA412fq
This commit is contained in:
2026-06-29 18:13:33 +09:00
parent 5d5ff27d29
commit b49cc14ef3
2 changed files with 29 additions and 4 deletions

View File

@@ -107,7 +107,11 @@ async def check_and_alert(status=None) -> list[str]:
"""
from .telegram.messaging import send_raw
from .db import add_log
st = status or await collect_status()
try:
st = status or await collect_status()
except Exception:
logger.exception("collect_status 예외")
return []
sent: list[str] = []
for w in st["workers"]:
name, alive = w["name"], w.get("alive", False)
@@ -129,7 +133,7 @@ async def check_and_alert(status=None) -> list[str]:
if (await send_raw(text=text)).get("ok"):
add_log("node_monitor", f"{name} dead-letter {dl}", "warning")
sent.append(text)
_dl_notified[name] = dl
_dl_notified[name] = dl
elif dl == 0:
_dl_notified.pop(name, None)
return sent

View File

@@ -1,6 +1,7 @@
# agent-office/tests/test_node_monitor.py
import json, pytest
from app import node_monitor
import app.node_monitor as nm
class FakeRedis:
"""worker heartbeat + queue llen + scan_iter 흉내."""
@@ -98,8 +99,6 @@ async def test_llen_exception_returns_redis_ok_false():
assert st["redis_ok"] is False
import app.node_monitor as nm
@pytest.mark.asyncio
async def test_alert_on_alive_to_dead(monkeypatch):
sent = []
@@ -124,3 +123,25 @@ async def test_alert_on_dead_letter_growth(monkeypatch):
s = {"workers": [{"name":"video-render","alive":True,"dead_letter":2}], "links": []}
await nm.check_and_alert(status=s)
assert any("dead-letter" in t for t in sent)
@pytest.mark.asyncio
async def test_dl_notified_not_updated_on_telegram_failure(monkeypatch):
"""텔레그램 실패(ok=False) 시 _dl_notified 갱신 안 됨 → 다음 사이클에서 재시도."""
calls = []
async def fake_send_raw(text, **kw):
calls.append(text)
if len(calls) == 1:
return {"ok": False} # 첫 호출: 텔레그램 다운
return {"ok": True} # 두 번째 호출: 성공
monkeypatch.setattr("app.telegram.messaging.send_raw", fake_send_raw)
monkeypatch.setattr("app.db.add_log", lambda *a, **k: None)
nm._node_state.clear(); nm._dl_notified.clear()
s = {"workers": [{"name": "video-render", "alive": True, "dead_letter": 2}], "links": []}
# 첫 호출: 텔레그램 다운 → ok=False → _dl_notified 갱신 안 됨
result1 = await nm.check_and_alert(status=s)
assert result1 == []
assert nm._dl_notified.get("video-render", 0) == 0
# 두 번째 호출: 같은 dl=2 → _dl_notified 미갱신으로 조건 재만족 → 재시도 발송
result2 = await nm.check_and_alert(status=s)
assert any("dead-letter" in t for t in result2)
assert nm._dl_notified.get("video-render") == 2