fix(insta-render): F6 ReliableQueue 적용 — BLMOVE + ack/fail (F6 part 2)

- worker.py: BLPOP → ReliableQueue.dequeue / ack / fail / startup recovery
- _process_one: 예외 시 webhook(failed) 후 raise — poll_once가 fail(raw, payload)
  로 retry/dead-letter 처리
- poll_once 함수 추가 (테스트 단위)
- Dockerfile: build context=services/ 로 올리고 _shared 포함, PYTHONPATH=/app
- docker-compose.yml: insta-render build context 갱신

기존 webhook 호출 동작은 그대로 (멱등) — retry 시 매번 NAS에 failed 통보되어도
마지막 상태만 보임. dead-letter는 운영 모니터링으로 별도 처리.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-25 20:13:24 +09:00
parent 32308bede6
commit 1e6638a64b
5 changed files with 128 additions and 23 deletions

View File

@@ -1,11 +1,10 @@
"""Redis BLPOP worker — queue:insta-render → render_slate → NAS webhook.
"""Redis ReliableQueue worker — F6 신뢰성 패턴 (BLMOVE + ack/fail + recovery).
queue:paused가 set이면 대기 (task-watcher가 박재오 활동 감지 시 set).
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
from typing import Any
@@ -14,6 +13,7 @@ import httpx
import redis.asyncio as aioredis
from card_renderer import render_slate
from _shared.reliable_queue import ReliableQueue
logger = logging.getLogger(__name__)
@@ -57,7 +57,10 @@ async def _fetch_slate(client: httpx.AsyncClient, slate_id: int) -> dict:
async def _process_one(client: httpx.AsyncClient, payload: dict) -> None:
"""단일 작업 처리: fetch slate → render → webhook."""
"""단일 작업 처리: fetch slate → render → webhook. 예외 발생 시 webhook(failed) 호출 후 raise.
F6: webhook 통신 외 예외는 poll_once가 fail(raw, payload)로 retry/dead-letter 처리.
"""
task_id = payload["task_id"]
params = payload.get("params", {})
slate_id = params.get("slate_id")
@@ -69,7 +72,6 @@ async def _process_one(client: httpx.AsyncClient, payload: dict) -> None:
slate = await _fetch_slate(client, slate_id)
await _post_update(client, task_id, "processing", 50)
paths = await render_slate(slate, slate_id, template=template)
# 결과 URL은 첫 페이지의 nginx 경로
first_url = f"{INSTA_MEDIA_URL_PREFIX}/{slate_id}/01.png"
await _post_update(
client, task_id, "succeeded", 100, result_path=first_url
@@ -78,29 +80,46 @@ async def _process_one(client: httpx.AsyncClient, payload: dict) -> None:
except Exception as e:
logger.exception("render task=%s 실패", task_id)
await _post_update(client, task_id, "failed", 0, error=str(e))
raise
async def poll_once(queue: ReliableQueue, client: httpx.AsyncClient) -> bool:
"""1 cycle: dequeue → _process_one → ack/fail. Returns True if a job handled."""
result = await queue.dequeue(timeout=5)
if result is None:
return False
payload, raw = result
try:
await _process_one(client, payload)
except Exception:
await queue.fail(raw, payload)
return True
await queue.ack(raw)
return True
async def worker_loop():
"""무한 루프 — paused 체크 → BLPOP → process_one."""
"""무한 루프 — paused 체크 → ReliableQueue.dequeue → process_one → ack/fail."""
redis = aioredis.from_url(REDIS_URL, decode_responses=False)
queue = ReliableQueue(redis, queue_key=QUEUE_KEY)
async with httpx.AsyncClient() as client:
logger.info("insta-render worker started (queue=%s)", QUEUE_KEY)
logger.info("insta-render worker started worker_id=%s queue=%s",
queue.worker_id, QUEUE_KEY)
# F6: startup recovery — 이전 crash 시 잔존 orphan 재큐
try:
recovered = await queue.recover()
if recovered:
logger.info("recovered %d orphaned items at startup", recovered)
except Exception:
logger.exception("startup recover failed")
while True:
try:
paused = await redis.get(PAUSED_KEY)
if paused == b"1":
await asyncio.sleep(10)
continue
item = await redis.blpop(QUEUE_KEY, timeout=1)
if item is None:
continue
_, raw = item
try:
payload = json.loads(raw)
except json.JSONDecodeError:
logger.error("invalid queue payload: %r", raw[:200])
continue
await _process_one(client, payload)
await poll_once(queue, client)
except asyncio.CancelledError:
logger.info("worker_loop cancelled")
raise