Spaces:

Justinroy
/

ups-contract-faq2

Sleeping

Justin Tippins

Use dynamic retrieval for answering and excerpt preview

2876211 3 months ago

2.94 kB

	import json
	from typing import Dict, List, Optional

	from openai_client import llm_text
	from retriever import retrieve_top_chunks

	def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]:
	excerpts = []
	for c in chunks:
	excerpts.append({
	"chunk_id": c.get("chunk_id"),
	"article": c.get("article"),
	"section": c.get("section"),
	"page_start": c.get("page_start"),
	"page_end": c.get("page_end"),
	"text": (c.get("text") or "")[:max_chars],
	})
	return excerpts

	def answer_question_dynamic(
	question: str,
	top_k: int = 10,
	model: str = "gpt-4.1-mini",
	) -> str:
	"""
	Retrieve top chunks for the question, then answer using ONLY those excerpts.
	"""
	chunks = retrieve_top_chunks(question, top_k=top_k)
	if not chunks:
	return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built."

	excerpts = _format_excerpts(chunks)

	prompt = f"""
	You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups.
	Scope: UPS/Teamsters National Agreement ONLY.

	RULES (non-negotiable):
	- Answer using ONLY the excerpts provided.
	- Every material claim must be supported by a citation to one or more chunk_ids.
	- If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts."
	- Do not add external policy, practice, or legal advice.

	OUTPUT FORMAT (markdown):
	### Decision Question
	{question}

	### Short Answer
	(1–3 sentences)

	### Mandatory Conditions / Triggers
	- ...

	### Procedural Requirements
	- ...

	### Common Failure Points (Supervisor Risk)
	- ...

	### Contract Citations
	- chunk_id (Article/Section/Page)

	EXCERPTS (chunk_id + metadata + text):
	{json.dumps(excerpts, ensure_ascii=False)}
	""".strip()

	return llm_text(prompt, model=model)

	def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str:
	"""
	Show retrieved excerpts so the user can validate what will ground the answer.
	"""
	chunks = retrieve_top_chunks(question, top_k=top_k)
	if not chunks:
	return "_No retrieved excerpts. Try another query or rebuild KB._"

	blocks = []
	for c in chunks:
	cid = c.get("chunk_id")
	article = c.get("article")
	section = c.get("section")
	p1 = c.get("page_start")
	p2 = c.get("page_end")

	meta = f"Article {article}" if article else "Article —"
	if section:
	meta += f", Section {section}"
	if p1 is not None:
	meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})"

	text = (c.get("text") or "").strip()
	text = text[:max_chars] + ("…" if len(text) > max_chars else "")
	blocks.append(f"### {cid}\n{meta}\n\n> {text.replace('\\n', '\\n> ')}")

	return "\n\n---\n\n".join(blocks)