fix(agent-office): dead-letter _dl_notified 갱신을 발송성공 시로 한정 + collect_status 예외방어 (B4 리뷰)
- _dl_notified[name] = dl을 if ok: 블록 안으로 이동 — 텔레그램 실패 시 갱신 방지 - check_and_alert에 collect_status try/except 추가 — 스케줄러 잡 생존 보장 - tests: import app.node_monitor as nm 최상단 이동 - tests: test_dl_notified_not_updated_on_telegram_failure 회귀 테스트 추가 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_019LV86jBozkNhSFXJA412fq
This commit is contained in:
@@ -107,7 +107,11 @@ async def check_and_alert(status=None) -> list[str]:
|
|||||||
"""
|
"""
|
||||||
from .telegram.messaging import send_raw
|
from .telegram.messaging import send_raw
|
||||||
from .db import add_log
|
from .db import add_log
|
||||||
st = status or await collect_status()
|
try:
|
||||||
|
st = status or await collect_status()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("collect_status 예외")
|
||||||
|
return []
|
||||||
sent: list[str] = []
|
sent: list[str] = []
|
||||||
for w in st["workers"]:
|
for w in st["workers"]:
|
||||||
name, alive = w["name"], w.get("alive", False)
|
name, alive = w["name"], w.get("alive", False)
|
||||||
@@ -129,7 +133,7 @@ async def check_and_alert(status=None) -> list[str]:
|
|||||||
if (await send_raw(text=text)).get("ok"):
|
if (await send_raw(text=text)).get("ok"):
|
||||||
add_log("node_monitor", f"{name} dead-letter {dl}", "warning")
|
add_log("node_monitor", f"{name} dead-letter {dl}", "warning")
|
||||||
sent.append(text)
|
sent.append(text)
|
||||||
_dl_notified[name] = dl
|
_dl_notified[name] = dl
|
||||||
elif dl == 0:
|
elif dl == 0:
|
||||||
_dl_notified.pop(name, None)
|
_dl_notified.pop(name, None)
|
||||||
return sent
|
return sent
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# agent-office/tests/test_node_monitor.py
|
# agent-office/tests/test_node_monitor.py
|
||||||
import json, pytest
|
import json, pytest
|
||||||
from app import node_monitor
|
from app import node_monitor
|
||||||
|
import app.node_monitor as nm
|
||||||
|
|
||||||
class FakeRedis:
|
class FakeRedis:
|
||||||
"""worker heartbeat + queue llen + scan_iter 흉내."""
|
"""worker heartbeat + queue llen + scan_iter 흉내."""
|
||||||
@@ -98,8 +99,6 @@ async def test_llen_exception_returns_redis_ok_false():
|
|||||||
assert st["redis_ok"] is False
|
assert st["redis_ok"] is False
|
||||||
|
|
||||||
|
|
||||||
import app.node_monitor as nm
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_alert_on_alive_to_dead(monkeypatch):
|
async def test_alert_on_alive_to_dead(monkeypatch):
|
||||||
sent = []
|
sent = []
|
||||||
@@ -124,3 +123,25 @@ async def test_alert_on_dead_letter_growth(monkeypatch):
|
|||||||
s = {"workers": [{"name":"video-render","alive":True,"dead_letter":2}], "links": []}
|
s = {"workers": [{"name":"video-render","alive":True,"dead_letter":2}], "links": []}
|
||||||
await nm.check_and_alert(status=s)
|
await nm.check_and_alert(status=s)
|
||||||
assert any("dead-letter" in t for t in sent)
|
assert any("dead-letter" in t for t in sent)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_dl_notified_not_updated_on_telegram_failure(monkeypatch):
|
||||||
|
"""텔레그램 실패(ok=False) 시 _dl_notified 갱신 안 됨 → 다음 사이클에서 재시도."""
|
||||||
|
calls = []
|
||||||
|
async def fake_send_raw(text, **kw):
|
||||||
|
calls.append(text)
|
||||||
|
if len(calls) == 1:
|
||||||
|
return {"ok": False} # 첫 호출: 텔레그램 다운
|
||||||
|
return {"ok": True} # 두 번째 호출: 성공
|
||||||
|
monkeypatch.setattr("app.telegram.messaging.send_raw", fake_send_raw)
|
||||||
|
monkeypatch.setattr("app.db.add_log", lambda *a, **k: None)
|
||||||
|
nm._node_state.clear(); nm._dl_notified.clear()
|
||||||
|
s = {"workers": [{"name": "video-render", "alive": True, "dead_letter": 2}], "links": []}
|
||||||
|
# 첫 호출: 텔레그램 다운 → ok=False → _dl_notified 갱신 안 됨
|
||||||
|
result1 = await nm.check_and_alert(status=s)
|
||||||
|
assert result1 == []
|
||||||
|
assert nm._dl_notified.get("video-render", 0) == 0
|
||||||
|
# 두 번째 호출: 같은 dl=2 → _dl_notified 미갱신으로 조건 재만족 → 재시도 발송
|
||||||
|
result2 = await nm.check_and_alert(status=s)
|
||||||
|
assert any("dead-letter" in t for t in result2)
|
||||||
|
assert nm._dl_notified.get("video-render") == 2
|
||||||
|
|||||||
Reference in New Issue
Block a user