feat(render-workers): 4 render 워커 heartbeat 배선 + poll_once 카운터
- services/_shared/heartbeat.py (A1) WorkerStats/utc_now_iso/heartbeat_loop 소비 - image-render / video-render / music-render / insta-render 각 worker.py: stats = WorkerStats() 모듈 레벨 추가, poll_once에서 dispatch 전 busy=True, ack 후 jobs_done+1 / fail 후 jobs_failed+1 + last_job_at + busy=False - 각 main.py: lifespan에 aioredis(decode_responses=False) + heartbeat_loop 태스크 spawn, 종료 시 cancel + aclose - 각 tests/test_worker.py: test_poll_once_increments_jobs_done 추가 (image:flux / video:sora / music:suno / insta:_process_one mock) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_019LV86jBozkNhSFXJA412fq
This commit is contained in:
@@ -7,12 +7,15 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import redis.asyncio as aioredis
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
import worker
|
||||
from _shared.heartbeat import heartbeat_loop
|
||||
from providers.sync_ops import (
|
||||
generate_lyrics, get_credits,
|
||||
get_timestamped_lyrics, generate_style_boost,
|
||||
@@ -25,15 +28,19 @@ logger = logging.getLogger(__name__)
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
worker_task = asyncio.create_task(worker.worker_loop())
|
||||
hb_redis = aioredis.from_url(os.getenv("REDIS_URL", "redis://192.168.45.54:6379"), decode_responses=False)
|
||||
hb_task = asyncio.create_task(heartbeat_loop(hb_redis, "music-render", "render", worker.stats))
|
||||
logger.info("music-render lifespan 시작")
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
worker_task.cancel()
|
||||
try:
|
||||
await worker_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
for t in (worker_task, hb_task):
|
||||
t.cancel()
|
||||
try:
|
||||
await t
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await hb_redis.aclose()
|
||||
logger.info("music-render lifespan 종료")
|
||||
|
||||
|
||||
|
||||
@@ -167,3 +167,25 @@ async def test_poll_once_returns_false_on_timeout(monkeypatch):
|
||||
dispatch_mock.assert_not_called()
|
||||
fake_queue.ack.assert_not_awaited()
|
||||
fake_queue.fail.assert_not_awaited()
|
||||
|
||||
|
||||
# ----- heartbeat stats 카운터 -----
|
||||
|
||||
class _OneJobQueue:
|
||||
def __init__(self): self.acked = False
|
||||
async def dequeue(self, timeout=5):
|
||||
if self.acked: return None
|
||||
return ({"job_type": "suno_generation", "task_id": "t1", "params": {}}, b"raw")
|
||||
async def ack(self, raw): self.acked = True
|
||||
async def fail(self, raw, payload): pass
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_poll_once_increments_jobs_done(monkeypatch):
|
||||
worker.stats.jobs_done = 0
|
||||
monkeypatch.setattr(worker, "run_suno_generation", lambda task_id, params: None)
|
||||
handled = await worker.poll_once(_OneJobQueue())
|
||||
assert handled is True
|
||||
assert worker.stats.jobs_done == 1
|
||||
assert worker.stats.busy is False
|
||||
assert worker.stats.last_job_at is not None
|
||||
|
||||
@@ -21,6 +21,7 @@ from providers.suno import (
|
||||
)
|
||||
from providers.local import run_local_generation
|
||||
from _shared.reliable_queue import ReliableQueue
|
||||
from _shared.heartbeat import WorkerStats, utc_now_iso
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -28,6 +29,8 @@ REDIS_URL = os.getenv("REDIS_URL", "redis://192.168.45.54:6379")
|
||||
QUEUE_KEY = "queue:music-render"
|
||||
PAUSED_KEY = "queue:paused"
|
||||
|
||||
stats = WorkerStats()
|
||||
|
||||
# Maps job_type → module-level function name (string).
|
||||
# _dispatch resolves the name via globals() at call time so unittest.mock.patch
|
||||
# on "worker.<name>" is correctly intercepted.
|
||||
@@ -74,6 +77,7 @@ async def poll_once(queue: ReliableQueue) -> bool:
|
||||
if result is None:
|
||||
return False
|
||||
payload, raw = result
|
||||
stats.busy = True
|
||||
try:
|
||||
# sync provider 함수 — thread로 실행해서 이벤트 루프 블로킹 방지
|
||||
await asyncio.to_thread(_dispatch, payload)
|
||||
@@ -81,8 +85,14 @@ async def poll_once(queue: ReliableQueue) -> bool:
|
||||
logger.exception("dispatch unhandled exception task_id=%s",
|
||||
payload.get("task_id"))
|
||||
await queue.fail(raw, payload)
|
||||
stats.jobs_failed += 1
|
||||
stats.last_job_at = utc_now_iso()
|
||||
stats.busy = False
|
||||
return True
|
||||
await queue.ack(raw)
|
||||
stats.jobs_done += 1
|
||||
stats.last_job_at = utc_now_iso()
|
||||
stats.busy = False
|
||||
return True
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user