import json from typing import Dict, List, Optional from openai_client import llm_text from retriever import retrieve_top_chunks def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]: excerpts = [] for c in chunks: excerpts.append({ "chunk_id": c.get("chunk_id"), "article": c.get("article"), "section": c.get("section"), "page_start": c.get("page_start"), "page_end": c.get("page_end"), "text": (c.get("text") or "")[:max_chars], }) return excerpts def answer_question_dynamic( question: str, top_k: int = 10, model: str = "gpt-4.1-mini", ) -> str: """ Retrieve top chunks for the question, then answer using ONLY those excerpts. """ chunks = retrieve_top_chunks(question, top_k=top_k) if not chunks: return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built." excerpts = _format_excerpts(chunks) prompt = f""" You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups. Scope: UPS/Teamsters National Agreement ONLY. RULES (non-negotiable): - Answer using ONLY the excerpts provided. - Every material claim must be supported by a citation to one or more chunk_ids. - If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts." - Do not add external policy, practice, or legal advice. OUTPUT FORMAT (markdown): ### Decision Question {question} ### Short Answer (1–3 sentences) ### Mandatory Conditions / Triggers - ... ### Procedural Requirements - ... ### Common Failure Points (Supervisor Risk) - ... ### Contract Citations - chunk_id (Article/Section/Page) EXCERPTS (chunk_id + metadata + text): {json.dumps(excerpts, ensure_ascii=False)} """.strip() return llm_text(prompt, model=model) def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str: """ Show retrieved excerpts so the user can validate what will ground the answer. """ chunks = retrieve_top_chunks(question, top_k=top_k) if not chunks: return "_No retrieved excerpts. Try another query or rebuild KB._" blocks = [] for c in chunks: cid = c.get("chunk_id") article = c.get("article") section = c.get("section") p1 = c.get("page_start") p2 = c.get("page_end") meta = f"Article {article}" if article else "Article —" if section: meta += f", Section {section}" if p1 is not None: meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})" text = (c.get("text") or "").strip() text = text[:max_chars] + ("…" if len(text) > max_chars else "") blocks.append(f"### {cid}\n**{meta}**\n\n> {text.replace('\\n', '\\n> ')}") return "\n\n---\n\n".join(blocks)