Spaces:
Sleeping
Sleeping
| import json | |
| from typing import Dict, List, Optional | |
| from openai_client import llm_text | |
| from retriever import retrieve_top_chunks | |
| def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]: | |
| excerpts = [] | |
| for c in chunks: | |
| excerpts.append({ | |
| "chunk_id": c.get("chunk_id"), | |
| "article": c.get("article"), | |
| "section": c.get("section"), | |
| "page_start": c.get("page_start"), | |
| "page_end": c.get("page_end"), | |
| "text": (c.get("text") or "")[:max_chars], | |
| }) | |
| return excerpts | |
| def answer_question_dynamic( | |
| question: str, | |
| top_k: int = 10, | |
| model: str = "gpt-4.1-mini", | |
| ) -> str: | |
| """ | |
| Retrieve top chunks for the question, then answer using ONLY those excerpts. | |
| """ | |
| chunks = retrieve_top_chunks(question, top_k=top_k) | |
| if not chunks: | |
| return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built." | |
| excerpts = _format_excerpts(chunks) | |
| prompt = f""" | |
| You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups. | |
| Scope: UPS/Teamsters National Agreement ONLY. | |
| RULES (non-negotiable): | |
| - Answer using ONLY the excerpts provided. | |
| - Every material claim must be supported by a citation to one or more chunk_ids. | |
| - If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts." | |
| - Do not add external policy, practice, or legal advice. | |
| OUTPUT FORMAT (markdown): | |
| ### Decision Question | |
| {question} | |
| ### Short Answer | |
| (1–3 sentences) | |
| ### Mandatory Conditions / Triggers | |
| - ... | |
| ### Procedural Requirements | |
| - ... | |
| ### Common Failure Points (Supervisor Risk) | |
| - ... | |
| ### Contract Citations | |
| - chunk_id (Article/Section/Page) | |
| EXCERPTS (chunk_id + metadata + text): | |
| {json.dumps(excerpts, ensure_ascii=False)} | |
| """.strip() | |
| return llm_text(prompt, model=model) | |
| def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str: | |
| """ | |
| Show retrieved excerpts so the user can validate what will ground the answer. | |
| """ | |
| chunks = retrieve_top_chunks(question, top_k=top_k) | |
| if not chunks: | |
| return "_No retrieved excerpts. Try another query or rebuild KB._" | |
| blocks = [] | |
| for c in chunks: | |
| cid = c.get("chunk_id") | |
| article = c.get("article") | |
| section = c.get("section") | |
| p1 = c.get("page_start") | |
| p2 = c.get("page_end") | |
| meta = f"Article {article}" if article else "Article —" | |
| if section: | |
| meta += f", Section {section}" | |
| if p1 is not None: | |
| meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})" | |
| text = (c.get("text") or "").strip() | |
| text = text[:max_chars] + ("…" if len(text) > max_chars else "") | |
| blocks.append(f"### {cid}\n**{meta}**\n\n> {text.replace('\\n', '\\n> ')}") | |
| return "\n\n---\n\n".join(blocks) | |