| """제목 교열 단일 호출 러너 — solar-pro2 고정. |
| |
| production 환경(extension) 이 solar-pro2 를 사용하므로 데모도 동일 모델로 고정. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import re |
| import time |
| from pathlib import Path |
| from typing import Any |
|
|
| MODEL = "solar-pro2" |
| DEFAULT_PROMPT_DIR = Path(__file__).resolve().parent / "prompts" / "prompt_dev_v1" |
|
|
| |
| |
| |
| _PAIRED_THINK = re.compile(r"<think>.*?</think>", re.DOTALL) |
| _ORPHAN_THINK_PREFIX = re.compile(r"^.*?</think>\s*", re.DOTALL) |
|
|
|
|
| def _strip_think(raw: str) -> str: |
| """`<think>...</think>` 및 unpaired `</think>` 앞부분 모두 제거. |
| |
| 가드 순서: |
| 1. paired `<think>...</think>` 블록 제거 |
| 2. 그래도 `</think>` 가 남아 있으면 → 첫 등장 위치 이전을 전부 reasoning |
| trace 로 간주하고 잘라냄 (가장 흔한 누출 패턴) |
| 3. 남은 `<think>` / `</think>` 토큰 잔존도 제거 |
| """ |
| s = _PAIRED_THINK.sub("", raw) |
| if "</think>" in s: |
| s = _ORPHAN_THINK_PREFIX.sub("", s, count=1) |
| s = s.replace("</think>", "").replace("<think>", "") |
| return s.strip() |
|
|
|
|
| def load_default_prompts() -> tuple[str, str]: |
| """`prompt_dev_v1` 의 system.txt + user.txt 를 그대로 반환.""" |
| system = (DEFAULT_PROMPT_DIR / "system.txt").read_text(encoding="utf-8") |
| user = (DEFAULT_PROMPT_DIR / "user.txt").read_text(encoding="utf-8") |
| return system, user |
|
|
|
|
| def render_user_message(user_template: str, original: str, category: str) -> str: |
| """`{{original}}`, `{{category}}` placeholder 치환.""" |
| return user_template.replace("{{original}}", original).replace("{{category}}", category) |
|
|
|
|
| def run_title_proofread( |
| *, |
| client: Any, |
| original: str, |
| category: str, |
| system_prompt: str, |
| user_template: str, |
| temperature: float = 0.0, |
| reasoning_effort: str = "low", |
| max_tokens: int = 2000, |
| ) -> dict[str, Any]: |
| """단일 LLM 호출. 모델은 항상 `solar-pro2`. |
| |
| Returns: |
| { |
| "output": str, # 모델 응답 (strip + think-token 제거 후) |
| "user_message": str, # placeholder 치환된 실 user content |
| "model": str, |
| "latency_ms": int, |
| "usage": dict, # {prompt_tokens, completion_tokens, total_tokens} |
| "error": str | None, |
| } |
| """ |
| user_msg = render_user_message(user_template, original, category) |
| start = time.time() |
| try: |
| kwargs: dict[str, Any] = { |
| "model": MODEL, |
| "messages": [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_msg}, |
| ], |
| "temperature": float(temperature), |
| "max_tokens": int(max_tokens), |
| } |
| if reasoning_effort: |
| kwargs["reasoning_effort"] = reasoning_effort |
| resp = client.chat.completions.create(**kwargs) |
| except Exception as exc: |
| return { |
| "output": "", |
| "user_message": user_msg, |
| "model": MODEL, |
| "latency_ms": int((time.time() - start) * 1000), |
| "usage": {}, |
| "error": f"{type(exc).__name__}: {exc}", |
| } |
|
|
| elapsed_ms = int((time.time() - start) * 1000) |
| raw = resp.choices[0].message.content or "" |
| cleaned = _strip_think(raw) |
| usage = getattr(resp, "usage", None) |
| usage_dict: dict[str, int] = {} |
| if usage: |
| for k in ("prompt_tokens", "completion_tokens", "total_tokens"): |
| v = getattr(usage, k, None) |
| if v is not None: |
| usage_dict[k] = v |
|
|
| return { |
| "output": cleaned, |
| "user_message": user_msg, |
| "model": MODEL, |
| "latency_ms": elapsed_ms, |
| "usage": usage_dict, |
| "error": None, |
| } |
|
|