File size: 4,776 Bytes
c318017
f338c84
c318017
cf207fa
 
 
 
f338c84
 
 
 
c318017
f338c84
 
 
 
c318017
cf207fa
 
 
f338c84
c318017
 
 
 
 
 
 
 
cf207fa
c318017
 
 
 
 
 
f338c84
cf207fa
 
 
f338c84
 
 
cf207fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f338c84
 
 
 
 
 
 
cf207fa
f338c84
 
 
 
c318017
f338c84
cf207fa
 
f338c84
 
cf207fa
 
 
f338c84
 
cf207fa
c318017
f338c84
 
cf207fa
 
 
 
 
 
 
 
 
 
f338c84
 
 
c318017
f338c84
 
 
 
 
 
 
 
 
 
 
 
 
 
cf207fa
c318017
f338c84
 
 
 
 
 
c318017
 
f338c84
 
 
 
 
 
 
 
 
c318017
f338c84
cf207fa
c318017
f338c84
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""제목 교열 단일 호출 러너 — solar-pro2 고정.

production 환경(extension) 이 solar-pro2 를 사용하므로 데모도 동일 모델로 고정.

본문 입력 여부에 따라 자동 분기:
  - body 가 비어있으면  → `prompt_dev_v1` (제목만, 현 production 동작)
  - body 가 있으면      → `prompt_dev_v2` (본문 컨텍스트 + PRIORITY 7)
"""

from __future__ import annotations

import re
import time
from pathlib import Path
from typing import Any

MODEL = "solar-pro2"
PROMPT_ROOT = Path(__file__).resolve().parent / "prompts"
PROMPT_V1_DIR = PROMPT_ROOT / "prompt_dev_v1"
PROMPT_V2_DIR = PROMPT_ROOT / "prompt_dev_v2"

# solar-pro2 가 가끔 응답 본문 앞에 reasoning trace 를 emit 하고 `</think>` 로 닫는
# 경우가 있음 (보통은 paired `<think>...</think>` 인데 unpaired 가 발생). upstage
# provider 의 paired-tag strip 도 우회되므로, 데모 단에서 결정적으로 제거한다.
_PAIRED_THINK = re.compile(r"<think>.*?</think>", re.DOTALL)
_ORPHAN_THINK_PREFIX = re.compile(r"^.*?</think>\s*", re.DOTALL)


def _strip_think(raw: str) -> str:
    """`<think>...</think>` 및 unpaired `</think>` 앞부분 모두 제거."""
    s = _PAIRED_THINK.sub("", raw)
    if "</think>" in s:
        s = _ORPHAN_THINK_PREFIX.sub("", s, count=1)
    s = s.replace("</think>", "").replace("<think>", "")
    return s.strip()


def _load_prompts(prompt_dir: Path) -> tuple[str, str]:
    system = (prompt_dir / "system.txt").read_text(encoding="utf-8")
    user = (prompt_dir / "user.txt").read_text(encoding="utf-8")
    return system, user


def load_default_prompts() -> tuple[str, str]:
    """제목-only 모드 기본 프롬프트 (prompt_dev_v1)."""
    return _load_prompts(PROMPT_V1_DIR)


def load_body_prompts() -> tuple[str, str]:
    """본문 활용 모드 프롬프트 (prompt_dev_v2)."""
    return _load_prompts(PROMPT_V2_DIR)


def render_user_message(user_template: str, original: str, category: str, body: str = "") -> str:
    """`{{original}}`, `{{category}}`, `{{body}}` placeholder 치환."""
    return (
        user_template.replace("{{original}}", original)
        .replace("{{category}}", category)
        .replace("{{body}}", body)
    )


def run_title_proofread(
    *,
    client: Any,
    original: str,
    category: str,
    body: str = "",
    temperature: float = 0.0,
    reasoning_effort: str = "low",
    max_tokens: int = 2000,
) -> dict[str, Any]:
    """단일 LLM 호출. 모델은 항상 `solar-pro2`.

    body 가 비어있으면 v1 (제목 only) prompt, 있으면 v2 (본문 컨텍스트) prompt 사용.

    Returns:
        {
          "output": str,          # 모델 응답 (strip + think-token 제거 후)
          "user_message": str,    # placeholder 치환된 실 user content
          "prompt_version": str,  # "v1" | "v2"
          "model": str,
          "latency_ms": int,
          "usage": dict,
          "error": str | None,
        }
    """
    body_clean = body.strip()
    if body_clean:
        system_prompt, user_template = load_body_prompts()
        prompt_version = "v2"
    else:
        system_prompt, user_template = load_default_prompts()
        prompt_version = "v1"

    user_msg = render_user_message(user_template, original, category, body_clean)

    start = time.time()
    try:
        kwargs: dict[str, Any] = {
            "model": MODEL,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_msg},
            ],
            "temperature": float(temperature),
            "max_tokens": int(max_tokens),
        }
        if reasoning_effort:
            kwargs["reasoning_effort"] = reasoning_effort
        resp = client.chat.completions.create(**kwargs)
    except Exception as exc:  # noqa: BLE001
        return {
            "output": "",
            "user_message": user_msg,
            "prompt_version": prompt_version,
            "model": MODEL,
            "latency_ms": int((time.time() - start) * 1000),
            "usage": {},
            "error": f"{type(exc).__name__}: {exc}",
        }

    elapsed_ms = int((time.time() - start) * 1000)
    raw = resp.choices[0].message.content or ""
    cleaned = _strip_think(raw)
    usage = getattr(resp, "usage", None)
    usage_dict: dict[str, int] = {}
    if usage:
        for k in ("prompt_tokens", "completion_tokens", "total_tokens"):
            v = getattr(usage, k, None)
            if v is not None:
                usage_dict[k] = v

    return {
        "output": cleaned,
        "user_message": user_msg,
        "prompt_version": prompt_version,
        "model": MODEL,
        "latency_ms": elapsed_ms,
        "usage": usage_dict,
        "error": None,
    }