fix(tarot): Claude 응답 시간 단축 + nginx timeout 정리

504 Gateway Timeout 근본 원인은 DSM Reverse Proxy의 60s 기본 timeout
(agent-office는 200 OK 정상 응답했으나 client 도달 전 DSM이 끊음).
사용자 측 DSM Reverse Proxy timeout 늘리기 별도 필요.

코드 측 대응:
- pipeline.py max_tokens 2048 → 1400 (응답 시간 단축, 3-card spread 충분)
- pipeline.py에 latency_ms·tokens 로그 출력 (모니터링)
- nginx /api/agent-office/에 proxy_send_timeout 300s, proxy_connect_timeout 60s
  추가 (proxy_read_timeout은 WebSocket 위해 86400s 유지)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 15:43:27 +09:00
parent 3bf87a93fb
commit 20691b5057
2 changed files with 13 additions and 4 deletions

View File

@@ -1,5 +1,6 @@
"""Tarot 파이프라인 — Claude Sonnet 호출 + 파싱 폴백 + reroll 1회.""" """Tarot 파이프라인 — Claude Sonnet 호출 + 파싱 폴백 + reroll 1회."""
import json import json
import logging
import time import time
from typing import Any, Dict from typing import Any, Dict
@@ -12,6 +13,9 @@ from ..config import (
TAROT_COST_OUTPUT_PER_M, TAROT_COST_OUTPUT_PER_M,
TAROT_TIMEOUT_SEC, TAROT_TIMEOUT_SEC,
) )
logger = logging.getLogger("agent-office.tarot")
from ..models import TarotInterpretRequest from ..models import TarotInterpretRequest
from .prompt import SYSTEM_PROMPT, build_user_message from .prompt import SYSTEM_PROMPT, build_user_message
from .schema import validate_interpretation from .schema import validate_interpretation
@@ -62,7 +66,7 @@ async def _call_claude(user_text: str, feedback: str = "") -> tuple[dict, dict,
user_text = f"이전 응답이 다음 이유로 거절됨: {feedback}\n올바른 스키마(시스템 지침)로 다시 응답.\n\n{user_text}" user_text = f"이전 응답이 다음 이유로 거절됨: {feedback}\n올바른 스키마(시스템 지침)로 다시 응답.\n\n{user_text}"
payload = { payload = {
"model": TAROT_MODEL, "model": TAROT_MODEL,
"max_tokens": 2048, "max_tokens": 1400, # 응답 시간 단축 — 3-card spread evidence·interactions 포함 충분
"system": [{"type": "text", "text": SYSTEM_PROMPT, "system": [{"type": "text", "text": SYSTEM_PROMPT,
"cache_control": {"type": "ephemeral"}}], "cache_control": {"type": "ephemeral"}}],
"messages": [{"role": "user", "content": [{"type": "text", "text": user_text}]}], "messages": [{"role": "user", "content": [{"type": "text", "text": user_text}]}],
@@ -82,11 +86,14 @@ async def _call_claude(user_text: str, feedback: str = "") -> tuple[dict, dict,
raw_text = "".join( raw_text = "".join(
b.get("text", "") for b in resp.get("content", []) if b.get("type") == "text" b.get("text", "") for b in resp.get("content", []) if b.get("type") == "text"
) )
parsed = _extract_json(raw_text)
usage = resp.get("usage", {}) or {} usage = resp.get("usage", {}) or {}
tokens_in = int(usage.get("input_tokens", 0) or 0)
tokens_out = int(usage.get("output_tokens", 0) or 0)
logger.info("tarot claude call: latency=%dms, in=%d, out=%d", latency_ms, tokens_in, tokens_out)
parsed = _extract_json(raw_text)
meta = { meta = {
"tokens_in": int(usage.get("input_tokens", 0) or 0), "tokens_in": tokens_in,
"tokens_out": int(usage.get("output_tokens", 0) or 0), "tokens_out": tokens_out,
"latency_ms": latency_ms, "latency_ms": latency_ms,
} }
return parsed, meta, raw_text return parsed, meta, raw_text

View File

@@ -357,6 +357,8 @@ server {
proxy_set_header Upgrade $http_upgrade; proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade"; proxy_set_header Connection "upgrade";
proxy_read_timeout 86400s; proxy_read_timeout 86400s;
proxy_send_timeout 300s;
proxy_connect_timeout 60s;
proxy_pass http://$agent_office_backend$request_uri; proxy_pass http://$agent_office_backend$request_uri;
} }