Spaces:

dev-strender
/

proofread-20261h-demo

Running

App Files Files Community

proofread-20261h-demo / title_proofread /runner.py

dev-strender

feat(title-proofread): address customer feedback round 1

c318017 2 days ago

raw

history blame contribute delete

4.08 kB

	"""제목 교열 단일 호출 러너 — solar-pro2 고정.

	production 환경(extension) 이 solar-pro2 를 사용하므로 데모도 동일 모델로 고정.
	"""

	from __future__ import annotations

	import re
	import time
	from pathlib import Path
	from typing import Any

	MODEL = "solar-pro2"
	DEFAULT_PROMPT_DIR = Path(__file__).resolve().parent / "prompts" / "prompt_dev_v1"

	# solar-pro2 가 가끔 응답 본문 앞에 reasoning trace 를 emit 하고 `</think>` 로 닫는
	# 경우가 있음 (보통은 paired `<think>...</think>` 인데 unpaired 가 발생). upstage
	# provider 의 paired-tag strip 도 우회되므로, 데모 단에서 결정적으로 제거한다.
	_PAIRED_THINK = re.compile(r"<think>.*?</think>", re.DOTALL)
	_ORPHAN_THINK_PREFIX = re.compile(r"^.?</think>\s", re.DOTALL)


	def _strip_think(raw: str) -> str:
	"""`<think>...</think>` 및 unpaired `</think>` 앞부분 모두 제거.

	가드 순서:
	1. paired `<think>...</think>` 블록 제거
	2. 그래도 `</think>` 가 남아 있으면 → 첫 등장 위치 이전을 전부 reasoning
	trace 로 간주하고 잘라냄 (가장 흔한 누출 패턴)
	3. 남은 `<think>` / `</think>` 토큰 잔존도 제거
	"""
	s = _PAIRED_THINK.sub("", raw)
	if "</think>" in s:
	s = _ORPHAN_THINK_PREFIX.sub("", s, count=1)
	s = s.replace("</think>", "").replace("<think>", "")
	return s.strip()


	def load_default_prompts() -> tuple[str, str]:
	"""`prompt_dev_v1` 의 system.txt + user.txt 를 그대로 반환."""
	system = (DEFAULT_PROMPT_DIR / "system.txt").read_text(encoding="utf-8")
	user = (DEFAULT_PROMPT_DIR / "user.txt").read_text(encoding="utf-8")
	return system, user


	def render_user_message(user_template: str, original: str, category: str) -> str:
	"""`{{original}}`, `{{category}}` placeholder 치환."""
	return user_template.replace("{{original}}", original).replace("{{category}}", category)


	def run_title_proofread(
	*,
	client: Any,
	original: str,
	category: str,
	system_prompt: str,
	user_template: str,
	temperature: float = 0.0,
	reasoning_effort: str = "low",
	max_tokens: int = 2000,
	) -> dict[str, Any]:
	"""단일 LLM 호출. 모델은 항상 `solar-pro2`.

	Returns:
	{
	"output": str, # 모델 응답 (strip + think-token 제거 후)
	"user_message": str, # placeholder 치환된 실 user content
	"model": str,
	"latency_ms": int,
	"usage": dict, # {prompt_tokens, completion_tokens, total_tokens}
	"error": str \| None,
	}
	"""
	user_msg = render_user_message(user_template, original, category)
	start = time.time()
	try:
	kwargs: dict[str, Any] = {
	"model": MODEL,
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_msg},
	],
	"temperature": float(temperature),
	"max_tokens": int(max_tokens),
	}
	if reasoning_effort:
	kwargs["reasoning_effort"] = reasoning_effort
	resp = client.chat.completions.create(**kwargs)
	except Exception as exc: # noqa: BLE001
	return {
	"output": "",
	"user_message": user_msg,
	"model": MODEL,
	"latency_ms": int((time.time() - start) * 1000),
	"usage": {},
	"error": f"{type(exc).__name__}: {exc}",
	}

	elapsed_ms = int((time.time() - start) * 1000)
	raw = resp.choices[0].message.content or ""
	cleaned = _strip_think(raw)
	usage = getattr(resp, "usage", None)
	usage_dict: dict[str, int] = {}
	if usage:
	for k in ("prompt_tokens", "completion_tokens", "total_tokens"):
	v = getattr(usage, k, None)
	if v is not None:
	usage_dict[k] = v

	return {
	"output": cleaned,
	"user_message": user_msg,
	"model": MODEL,
	"latency_ms": elapsed_ms,
	"usage": usage_dict,
	"error": None,
	}