web-page-backend/agent-office/app/agents/classify_intent.py

"""텔레그램 사용자 응답 자연어 분류 — 화이트리스트 우선, 모호 시 LLM."""
import os
import json
import logging
import httpx

logger = logging.getLogger("agent-office.classify_intent")

CLAUDE_HAIKU_DEFAULT = "claude-haiku-4-5-20251001"

APPROVE_WORDS = {
    "승인", "시작", "진행", "ok", "okay", "agree",
    "네", "예", "좋아", "좋아요", "go", "yes", "y",
}
REJECT_WORDS = {"반려", "거절", "취소", "no", "nope", "n"}


def _get_api_key() -> str:
    return os.getenv("ANTHROPIC_API_KEY", "")


def _get_model() -> str:
    return os.getenv("CLAUDE_HAIKU_MODEL", CLAUDE_HAIKU_DEFAULT)


def classify(text: str) -> tuple[str, str | None]:
    """returns (intent, feedback) — intent ∈ {approve, reject, unclear}"""
    if not text:
        return ("unclear", None)
    t = text.strip().lower()
    if t in APPROVE_WORDS:
        return ("approve", None)
    if t in REJECT_WORDS:
        return ("reject", None)
    # 반려 단어로 시작 + 추가 텍스트
    for w in REJECT_WORDS:
        if t.startswith(w):
            rest = text.strip()[len(w):].lstrip(" ,.-:").strip()
            if rest:
                return ("reject", rest)
    # 승인 단어로 시작 (긍정 의도면 추가 텍스트 무시)
    for w in APPROVE_WORDS:
        if t.startswith(w + " ") or t == w:
            return ("approve", None)
    return _llm_classify(text)


def _llm_classify(text: str) -> tuple[str, str | None]:
    api_key = _get_api_key()
    if not api_key:
        return ("unclear", None)
    prompt = (
        "사용자 응답을 분류하세요. JSON으로만 응답.\n"
        f'응답: "{text}"\n\n'
        '출력: {"intent":"approve|reject|unclear","feedback":"반려면 수정 방향, 아니면 빈 문자열"}'
    )
    try:
        resp = httpx.post(
            "https://api.anthropic.com/v1/messages",
            headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"},
            json={"model": _get_model(), "max_tokens": 200,
                  "messages": [{"role": "user", "content": prompt}]},
            timeout=15,
        )
        resp.raise_for_status()
        text_out = resp.json()["content"][0]["text"]
        start = text_out.find("{")
        end = text_out.rfind("}") + 1
        if start < 0 or end <= start:
            return ("unclear", None)
        data = json.loads(text_out[start:end])
        return (data.get("intent", "unclear"), data.get("feedback") or None)
    except (httpx.HTTPError, httpx.TimeoutException, KeyError, ValueError, json.JSONDecodeError) as e:
        logger.warning("LLM 분류 실패: %s", e)
        return ("unclear", None)