diff --git a/tarot-lab/app/pipeline.py b/tarot-lab/app/pipeline.py index d5dc21c..0704a67 100644 --- a/tarot-lab/app/pipeline.py +++ b/tarot-lab/app/pipeline.py @@ -28,6 +28,15 @@ class TarotError(Exception): pass +class TarotTruncated(Exception): + """Claude 응답이 max_tokens에서 잘림 — reroll feedback에 길이 단축 요청 필요.""" + + def __init__(self, raw_text: str, limit: int): + self.raw_text = raw_text + self.limit = limit + super().__init__(f"Claude 응답이 max_tokens={limit}에서 잘림") + + def calc_cost(tokens_in: int, tokens_out: int) -> float: return ( tokens_in / 1_000_000 * TAROT_COST_INPUT_PER_M @@ -64,9 +73,10 @@ async def _call_claude(user_text: str, feedback: str = "") -> tuple[dict, dict, raise TarotError("ANTHROPIC_API_KEY missing") if feedback: user_text = f"이전 응답이 다음 이유로 거절됨: {feedback}\n올바른 스키마(시스템 지침)로 다시 응답.\n\n{user_text}" + max_tokens = 2800 # 3-card spread JSON(summary + cards×3 evidence + interactions + advice) 안전 마진 payload = { "model": TAROT_MODEL, - "max_tokens": 1400, # 응답 시간 단축 — 3-card spread evidence·interactions 포함 충분 + "max_tokens": max_tokens, "system": [{"type": "text", "text": SYSTEM_PROMPT, "cache_control": {"type": "ephemeral"}}], "messages": [{"role": "user", "content": [{"type": "text", "text": user_text}]}], @@ -89,7 +99,15 @@ async def _call_claude(user_text: str, feedback: str = "") -> tuple[dict, dict, usage = resp.get("usage", {}) or {} tokens_in = int(usage.get("input_tokens", 0) or 0) tokens_out = int(usage.get("output_tokens", 0) or 0) - logger.info("tarot claude call: latency=%dms, in=%d, out=%d", latency_ms, tokens_in, tokens_out) + stop_reason = resp.get("stop_reason", "") + logger.info( + "tarot claude call: latency=%dms, in=%d, out=%d, stop=%s", + latency_ms, tokens_in, tokens_out, stop_reason, + ) + if stop_reason == "max_tokens": + # truncation은 JSON 파싱 시도 전에 단락 — _extract_json은 거의 항상 실패하고 + # 일반 JSONDecodeError로 reroll되면 모델이 길이를 줄일 힌트를 못 받음. + raise TarotTruncated(raw_text, max_tokens) parsed = _extract_json(raw_text) meta = { "tokens_in": tokens_in, @@ -116,8 +134,15 @@ async def interpret(req: TarotInterpretRequest) -> Dict[str, Any]: parsed, meta, _raw = await _call_claude(user_text, feedback=last_error) except httpx.HTTPError as e: raise TarotError(f"Claude HTTP error: {e}") from e + except TarotTruncated as e: + last_error = ( + f"이전 응답이 max_tokens={e.limit}에서 잘렸습니다. " + "각 카드의 interpretation·evidence·advice를 1~2문장으로 축약해 " + "전체 JSON 길이를 줄여 다시 응답하세요." + ) + continue except json.JSONDecodeError as e: - last_error = f"JSON 파싱 실패: {e}" + last_error = f"JSON 파싱 실패: {e}. 모든 문자열을 닫고 유효한 JSON으로 재작성." continue total_in += meta["tokens_in"] total_out += meta["tokens_out"] diff --git a/tarot-lab/tests/test_pipeline.py b/tarot-lab/tests/test_pipeline.py index 49f1983..878e30e 100644 --- a/tarot-lab/tests/test_pipeline.py +++ b/tarot-lab/tests/test_pipeline.py @@ -47,10 +47,11 @@ def _valid_response_json(): } -def _claude_envelope(text: str, in_tok=100, out_tok=200): +def _claude_envelope(text: str, in_tok=100, out_tok=200, stop_reason="end_turn"): return { "content": [{"type": "text", "text": text}], "usage": {"input_tokens": in_tok, "output_tokens": out_tok}, + "stop_reason": stop_reason, } @@ -112,3 +113,33 @@ async def test_interpret_http_error(): def test_calc_cost(): cost = pipeline.calc_cost(1_000_000, 1_000_000) assert cost == pipeline.TAROT_COST_INPUT_PER_M + pipeline.TAROT_COST_OUTPUT_PER_M + + +@respx.mock +async def test_interpret_truncated_then_success(): + """1차 응답이 max_tokens에서 잘림 → 2차에서 정상 JSON 반환.""" + truncated_text = '{"summary": "흐름이 있음", "cards": [{"position": "과거", "card": "the-fool", "reversed": false, "interpretation": "끝나지 않은 문장' + valid = json.dumps(_valid_response_json()) + respx.post("https://api.anthropic.com/v1/messages").mock( + side_effect=[ + httpx.Response(200, json=_claude_envelope(truncated_text, stop_reason="max_tokens")), + httpx.Response(200, json=_claude_envelope(valid)), + ] + ) + result = await pipeline.interpret(_req()) + assert result["reroll_count"] == 1 + assert "interpretation_json" in result + + +@respx.mock +async def test_interpret_truncated_twice_raises(): + """두 번 모두 max_tokens 잘림 → TarotError, 메시지에 'max_tokens' 포함.""" + truncated_text = '{"summary": "...", "cards": [{"position":' + respx.post("https://api.anthropic.com/v1/messages").mock( + return_value=httpx.Response( + 200, json=_claude_envelope(truncated_text, stop_reason="max_tokens") + ) + ) + with pytest.raises(pipeline.TarotError) as exc_info: + await pipeline.interpret(_req()) + assert "max_tokens" in str(exc_info.value)