Spaces:
Sleeping
Sleeping
File size: 4,776 Bytes
c318017 f338c84 c318017 cf207fa f338c84 c318017 f338c84 c318017 cf207fa f338c84 c318017 cf207fa c318017 f338c84 cf207fa f338c84 cf207fa f338c84 cf207fa f338c84 c318017 f338c84 cf207fa f338c84 cf207fa f338c84 cf207fa c318017 f338c84 cf207fa f338c84 c318017 f338c84 cf207fa c318017 f338c84 c318017 f338c84 c318017 f338c84 cf207fa c318017 f338c84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | """제목 교열 단일 호출 러너 — solar-pro2 고정.
production 환경(extension) 이 solar-pro2 를 사용하므로 데모도 동일 모델로 고정.
본문 입력 여부에 따라 자동 분기:
- body 가 비어있으면 → `prompt_dev_v1` (제목만, 현 production 동작)
- body 가 있으면 → `prompt_dev_v2` (본문 컨텍스트 + PRIORITY 7)
"""
from __future__ import annotations
import re
import time
from pathlib import Path
from typing import Any
MODEL = "solar-pro2"
PROMPT_ROOT = Path(__file__).resolve().parent / "prompts"
PROMPT_V1_DIR = PROMPT_ROOT / "prompt_dev_v1"
PROMPT_V2_DIR = PROMPT_ROOT / "prompt_dev_v2"
# solar-pro2 가 가끔 응답 본문 앞에 reasoning trace 를 emit 하고 `</think>` 로 닫는
# 경우가 있음 (보통은 paired `<think>...</think>` 인데 unpaired 가 발생). upstage
# provider 의 paired-tag strip 도 우회되므로, 데모 단에서 결정적으로 제거한다.
_PAIRED_THINK = re.compile(r"<think>.*?</think>", re.DOTALL)
_ORPHAN_THINK_PREFIX = re.compile(r"^.*?</think>\s*", re.DOTALL)
def _strip_think(raw: str) -> str:
"""`<think>...</think>` 및 unpaired `</think>` 앞부분 모두 제거."""
s = _PAIRED_THINK.sub("", raw)
if "</think>" in s:
s = _ORPHAN_THINK_PREFIX.sub("", s, count=1)
s = s.replace("</think>", "").replace("<think>", "")
return s.strip()
def _load_prompts(prompt_dir: Path) -> tuple[str, str]:
system = (prompt_dir / "system.txt").read_text(encoding="utf-8")
user = (prompt_dir / "user.txt").read_text(encoding="utf-8")
return system, user
def load_default_prompts() -> tuple[str, str]:
"""제목-only 모드 기본 프롬프트 (prompt_dev_v1)."""
return _load_prompts(PROMPT_V1_DIR)
def load_body_prompts() -> tuple[str, str]:
"""본문 활용 모드 프롬프트 (prompt_dev_v2)."""
return _load_prompts(PROMPT_V2_DIR)
def render_user_message(user_template: str, original: str, category: str, body: str = "") -> str:
"""`{{original}}`, `{{category}}`, `{{body}}` placeholder 치환."""
return (
user_template.replace("{{original}}", original)
.replace("{{category}}", category)
.replace("{{body}}", body)
)
def run_title_proofread(
*,
client: Any,
original: str,
category: str,
body: str = "",
temperature: float = 0.0,
reasoning_effort: str = "low",
max_tokens: int = 2000,
) -> dict[str, Any]:
"""단일 LLM 호출. 모델은 항상 `solar-pro2`.
body 가 비어있으면 v1 (제목 only) prompt, 있으면 v2 (본문 컨텍스트) prompt 사용.
Returns:
{
"output": str, # 모델 응답 (strip + think-token 제거 후)
"user_message": str, # placeholder 치환된 실 user content
"prompt_version": str, # "v1" | "v2"
"model": str,
"latency_ms": int,
"usage": dict,
"error": str | None,
}
"""
body_clean = body.strip()
if body_clean:
system_prompt, user_template = load_body_prompts()
prompt_version = "v2"
else:
system_prompt, user_template = load_default_prompts()
prompt_version = "v1"
user_msg = render_user_message(user_template, original, category, body_clean)
start = time.time()
try:
kwargs: dict[str, Any] = {
"model": MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_msg},
],
"temperature": float(temperature),
"max_tokens": int(max_tokens),
}
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
resp = client.chat.completions.create(**kwargs)
except Exception as exc: # noqa: BLE001
return {
"output": "",
"user_message": user_msg,
"prompt_version": prompt_version,
"model": MODEL,
"latency_ms": int((time.time() - start) * 1000),
"usage": {},
"error": f"{type(exc).__name__}: {exc}",
}
elapsed_ms = int((time.time() - start) * 1000)
raw = resp.choices[0].message.content or ""
cleaned = _strip_think(raw)
usage = getattr(resp, "usage", None)
usage_dict: dict[str, int] = {}
if usage:
for k in ("prompt_tokens", "completion_tokens", "total_tokens"):
v = getattr(usage, k, None)
if v is not None:
usage_dict[k] = v
return {
"output": cleaned,
"user_message": user_msg,
"prompt_version": prompt_version,
"model": MODEL,
"latency_ms": elapsed_ms,
"usage": usage_dict,
"error": None,
}
|