Spaces:
Sleeping
Sleeping
File size: 2,943 Bytes
3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 3fe16f1 2876211 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import json
from typing import Dict, List, Optional
from openai_client import llm_text
from retriever import retrieve_top_chunks
def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]:
excerpts = []
for c in chunks:
excerpts.append({
"chunk_id": c.get("chunk_id"),
"article": c.get("article"),
"section": c.get("section"),
"page_start": c.get("page_start"),
"page_end": c.get("page_end"),
"text": (c.get("text") or "")[:max_chars],
})
return excerpts
def answer_question_dynamic(
question: str,
top_k: int = 10,
model: str = "gpt-4.1-mini",
) -> str:
"""
Retrieve top chunks for the question, then answer using ONLY those excerpts.
"""
chunks = retrieve_top_chunks(question, top_k=top_k)
if not chunks:
return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built."
excerpts = _format_excerpts(chunks)
prompt = f"""
You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups.
Scope: UPS/Teamsters National Agreement ONLY.
RULES (non-negotiable):
- Answer using ONLY the excerpts provided.
- Every material claim must be supported by a citation to one or more chunk_ids.
- If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts."
- Do not add external policy, practice, or legal advice.
OUTPUT FORMAT (markdown):
### Decision Question
{question}
### Short Answer
(1–3 sentences)
### Mandatory Conditions / Triggers
- ...
### Procedural Requirements
- ...
### Common Failure Points (Supervisor Risk)
- ...
### Contract Citations
- chunk_id (Article/Section/Page)
EXCERPTS (chunk_id + metadata + text):
{json.dumps(excerpts, ensure_ascii=False)}
""".strip()
return llm_text(prompt, model=model)
def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str:
"""
Show retrieved excerpts so the user can validate what will ground the answer.
"""
chunks = retrieve_top_chunks(question, top_k=top_k)
if not chunks:
return "_No retrieved excerpts. Try another query or rebuild KB._"
blocks = []
for c in chunks:
cid = c.get("chunk_id")
article = c.get("article")
section = c.get("section")
p1 = c.get("page_start")
p2 = c.get("page_end")
meta = f"Article {article}" if article else "Article —"
if section:
meta += f", Section {section}"
if p1 is not None:
meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})"
text = (c.get("text") or "").strip()
text = text[:max_chars] + ("…" if len(text) > max_chars else "")
blocks.append(f"### {cid}\n**{meta}**\n\n> {text.replace('\\n', '\\n> ')}")
return "\n\n---\n\n".join(blocks)
|