ups-contract-faq2 / answer_generator.py
Justin Tippins
Use dynamic retrieval for answering and excerpt preview
2876211
import json
from typing import Dict, List, Optional
from openai_client import llm_text
from retriever import retrieve_top_chunks
def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]:
excerpts = []
for c in chunks:
excerpts.append({
"chunk_id": c.get("chunk_id"),
"article": c.get("article"),
"section": c.get("section"),
"page_start": c.get("page_start"),
"page_end": c.get("page_end"),
"text": (c.get("text") or "")[:max_chars],
})
return excerpts
def answer_question_dynamic(
question: str,
top_k: int = 10,
model: str = "gpt-4.1-mini",
) -> str:
"""
Retrieve top chunks for the question, then answer using ONLY those excerpts.
"""
chunks = retrieve_top_chunks(question, top_k=top_k)
if not chunks:
return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built."
excerpts = _format_excerpts(chunks)
prompt = f"""
You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups.
Scope: UPS/Teamsters National Agreement ONLY.
RULES (non-negotiable):
- Answer using ONLY the excerpts provided.
- Every material claim must be supported by a citation to one or more chunk_ids.
- If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts."
- Do not add external policy, practice, or legal advice.
OUTPUT FORMAT (markdown):
### Decision Question
{question}
### Short Answer
(1–3 sentences)
### Mandatory Conditions / Triggers
- ...
### Procedural Requirements
- ...
### Common Failure Points (Supervisor Risk)
- ...
### Contract Citations
- chunk_id (Article/Section/Page)
EXCERPTS (chunk_id + metadata + text):
{json.dumps(excerpts, ensure_ascii=False)}
""".strip()
return llm_text(prompt, model=model)
def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str:
"""
Show retrieved excerpts so the user can validate what will ground the answer.
"""
chunks = retrieve_top_chunks(question, top_k=top_k)
if not chunks:
return "_No retrieved excerpts. Try another query or rebuild KB._"
blocks = []
for c in chunks:
cid = c.get("chunk_id")
article = c.get("article")
section = c.get("section")
p1 = c.get("page_start")
p2 = c.get("page_end")
meta = f"Article {article}" if article else "Article —"
if section:
meta += f", Section {section}"
if p1 is not None:
meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})"
text = (c.get("text") or "").strip()
text = text[:max_chars] + ("…" if len(text) > max_chars else "")
blocks.append(f"### {cid}\n**{meta}**\n\n> {text.replace('\\n', '\\n> ')}")
return "\n\n---\n\n".join(blocks)