Spaces:

RamsesCamas
/

DocAgentSystem

Sleeping

App Files Files Community

DocAgentSystem / prompting /promptkit.py

RamsesCamas

Initial clean commit for HF Space deployment

d0d2f42 about 1 month ago

raw

history blame contribute delete

10.2 kB

	"""Prompt engineering toolkit: templates, chains, registry, and evaluation."""

	from __future__ import annotations

	import json
	import re
	import time
	import unicodedata
	from dataclasses import dataclass, field
	from typing import Any

	from core.logger import get_logger

	logger = get_logger(__name__)


	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------

	def _normalize(s: str) -> str:
	"""Lowercase, strip, and remove accents for comparison."""
	s = s.strip().lower()
	nfkd = unicodedata.normalize("NFKD", s)
	return "".join(c for c in nfkd if not unicodedata.combining(c))


	def _extract_json(text: str) -> dict \| None:
	"""Try to parse JSON from LLM output, handling markdown wrappers."""
	text = text.strip()
	# 1. Direct parse
	try:
	obj = json.loads(text)
	if isinstance(obj, dict):
	return obj
	except json.JSONDecodeError:
	pass
	# 2. Markdown code block
	match = re.search(r"```(?:json)?\s\n?(.?)\n?\s*```", text, re.DOTALL)
	if match:
	try:
	obj = json.loads(match.group(1).strip())
	if isinstance(obj, dict):
	return obj
	except json.JSONDecodeError:
	pass
	# 3. Find first {...} (non-greedy, flat)
	match = re.search(r"\{[^{}]*\}", text)
	if match:
	try:
	obj = json.loads(match.group(0))
	if isinstance(obj, dict):
	return obj
	except json.JSONDecodeError:
	pass
	return None


	# ---------------------------------------------------------------------------
	# PromptTemplate
	# ---------------------------------------------------------------------------

	class PromptTemplate:
	"""A reusable prompt with variable substitution and metadata."""

	def __init__(
	self,
	name: str,
	template: str,
	metadata: dict \| None = None,
	) -> None:
	self.name = name
	self.template = template
	self.metadata = metadata or {}

	def render(self, **kwargs: Any) -> str:
	"""Substitute variables in the template."""
	return self.template.format(**kwargs)

	def render_with_examples(
	self, examples: list[dict], **kwargs: Any
	) -> str:
	"""Format few-shot examples and inject them into the template.

	Each example dict should have ``input`` and ``output`` keys.
	The formatted block is passed as the ``{examples}`` variable.
	"""
	lines: list[str] = []
	for ex in examples:
	lines.append(f"Input: {ex['input']}\nOutput: {ex['output']}")
	kwargs["examples"] = "\n\n".join(lines)
	return self.template.format(**kwargs)

	def __repr__(self) -> str:
	version = self.metadata.get("version", "?")
	return f"PromptTemplate(name={self.name!r}, version={version!r})"


	# ---------------------------------------------------------------------------
	# PromptRegistry
	# ---------------------------------------------------------------------------

	class PromptRegistry:
	"""Centralized dictionary of PromptTemplates."""

	def __init__(self) -> None:
	self._templates: dict[str, PromptTemplate] = {}

	def register(self, template: PromptTemplate) -> None:
	self._templates[template.name] = template

	def get(self, name: str) -> PromptTemplate:
	if name not in self._templates:
	raise KeyError(f"Template '{name}' not found in registry.")
	return self._templates[name]

	def list_all(self) -> list[str]:
	return list(self._templates.keys())

	def get_version(self, name: str) -> str:
	return self.get(name).metadata.get("version", "unknown")


	# ---------------------------------------------------------------------------
	# PromptChain
	# ---------------------------------------------------------------------------

	@dataclass
	class ChainResult:
	"""Result of running a PromptChain."""
	steps: list[dict]
	final_response: str
	total_tokens: int
	total_latency_s: float


	class PromptChain:
	"""Execute a sequence of PromptTemplates, piping outputs forward."""

	def __init__(self, templates: list[PromptTemplate]) -> None:
	self.templates = templates

	def run(self, llm_client: Any, initial_input: dict) -> ChainResult:
	steps: list[dict] = []
	current_vars = dict(initial_input)
	total_tokens = 0
	total_latency_s = 0.0

	for i, template in enumerate(self.templates):
	prompt = template.render(**current_vars)
	result = llm_client.chat(prompt)

	response_text = result["response"]
	metadata = result["metadata"]
	tokens = metadata["usage"]["total_tokens"]
	latency_s = metadata["latency_ms"] / 1000

	steps.append({
	"step": i + 1,
	"template": template.name,
	"prompt": prompt,
	"response": response_text,
	"tokens": tokens,
	"latency_s": latency_s,
	})

	total_tokens += tokens
	total_latency_s += latency_s

	# Make output available for subsequent steps
	current_vars[f"step_{i + 1}_output"] = response_text
	current_vars["extraction_result"] = response_text

	return ChainResult(
	steps=steps,
	final_response=steps[-1]["response"] if steps else "",
	total_tokens=total_tokens,
	total_latency_s=total_latency_s,
	)


	# ---------------------------------------------------------------------------
	# evaluate_prompt
	# ---------------------------------------------------------------------------

	@dataclass
	class EvalMetrics:
	"""Metrics returned by evaluate_prompt."""
	accuracy: float
	json_parse_rate: float
	campos_correctos_rate: float
	tokens_promedio: float
	latencia_promedio: float
	details: list[dict] = field(default_factory=list)


	def evaluate_prompt(
	prompt_or_chain: PromptTemplate \| PromptChain,
	llm_client: Any,
	golden_set: list[dict],
	*,
	input_key: str = "ticket",
	delay: float = 1.0,
	verbose: bool = False,
	) -> EvalMetrics:
	"""Run prompt_or_chain against every example in golden_set and score.

	Returns accuracy, JSON parse rate, per-field correctness, average tokens,
	and average latency.
	"""
	total = len(golden_set)
	json_ok = 0
	correct = 0
	campos_correctos = 0
	campos_total = 0
	total_tokens = 0
	total_latency = 0.0
	details: list[dict] = []

	for idx, item in enumerate(golden_set):
	input_text = item["input"]
	expected = item["expected"]

	# -- call the LLM ------------------------------------------------
	try:
	if isinstance(prompt_or_chain, PromptChain):
	chain_result = prompt_or_chain.run(
	llm_client, {input_key: input_text}
	)
	response_text = chain_result.final_response
	tokens = chain_result.total_tokens
	latency_s = chain_result.total_latency_s
	else:
	rendered = prompt_or_chain.render(**{input_key: input_text})
	result = llm_client.chat(rendered)
	response_text = result["response"]
	tokens = result["metadata"]["usage"]["total_tokens"]
	latency_s = result["metadata"]["latency_ms"] / 1000
	except Exception as exc:
	logger.warning("LLM error on item %d: %s", idx, exc)
	details.append({
	"input": input_text,
	"expected": expected,
	"response": str(exc),
	"parsed": None,
	"correct": False,
	"json_valid": False,
	})
	campos_total += 2
	if delay > 0 and idx < total - 1:
	time.sleep(delay)
	continue

	total_tokens += tokens
	total_latency += latency_s

	# -- parse & compare ---------------------------------------------
	parsed = _extract_json(response_text)
	json_valid = parsed is not None
	if json_valid:
	json_ok += 1

	cat_match = False
	pri_match = False

	if parsed:
	cat_match = (
	_normalize(str(parsed.get("categoria", "")))
	== _normalize(str(expected.get("categoria", "")))
	)
	pri_match = (
	_normalize(str(parsed.get("prioridad", "")))
	== _normalize(str(expected.get("prioridad", "")))
	)
	if cat_match:
	campos_correctos += 1
	if pri_match:
	campos_correctos += 1

	campos_total += 2

	if cat_match and pri_match:
	correct += 1

	detail = {
	"input": input_text,
	"expected": expected,
	"response": response_text,
	"parsed": parsed,
	"correct": cat_match and pri_match,
	"json_valid": json_valid,
	"cat_match": cat_match,
	"pri_match": pri_match,
	"tokens": tokens,
	"latency_s": latency_s,
	}
	details.append(detail)

	if verbose:
	status = "OK" if detail["correct"] else "FAIL"
	print(f" [{idx + 1}/{total}] {status} \| {input_text[:60]}")
	if parsed:
	print(f" Esperado : {expected}")
	print(f" Obtenido : {parsed}")
	else:
	print(f" Respuesta (no JSON): {response_text[:120]}")

	if delay > 0 and idx < total - 1:
	time.sleep(delay)

	return EvalMetrics(
	accuracy=correct / total if total else 0,
	json_parse_rate=json_ok / total if total else 0,
	campos_correctos_rate=campos_correctos / campos_total if campos_total else 0,
	tokens_promedio=total_tokens / total if total else 0,
	latencia_promedio=total_latency / total if total else 0,
	details=details,
	)