Spaces:

Justinroy
/

ups-contract-faq2

Sleeping

File size: 2,943 Bytes

import json
from typing import Dict, List, Optional

from openai_client import llm_text
from retriever import retrieve_top_chunks

def _format_excerpts(chunks: List[Dict], max_chars: int = 3200) -> List[Dict]:
    excerpts = []
    for c in chunks:
        excerpts.append({
            "chunk_id": c.get("chunk_id"),
            "article": c.get("article"),
            "section": c.get("section"),
            "page_start": c.get("page_start"),
            "page_end": c.get("page_end"),
            "text": (c.get("text") or "")[:max_chars],
        })
    return excerpts

def answer_question_dynamic(
    question: str,
    top_k: int = 10,
    model: str = "gpt-4.1-mini",
) -> str:
    """
    Retrieve top chunks for the question, then answer using ONLY those excerpts.
    """
    chunks = retrieve_top_chunks(question, top_k=top_k)
    if not chunks:
        return "No relevant contract chunks were retrieved. Try rephrasing the question or confirm KB was built."

    excerpts = _format_excerpts(chunks)

    prompt = f"""
You are a contract-grounded assistant. Audience: UPS supervisors/on-road sups.
Scope: UPS/Teamsters National Agreement ONLY.

RULES (non-negotiable):
- Answer using ONLY the excerpts provided.
- Every material claim must be supported by a citation to one or more chunk_ids.
- If the excerpts do not explicitly address the question, say: "Not explicitly addressed in the provided National Agreement excerpts."
- Do not add external policy, practice, or legal advice.

OUTPUT FORMAT (markdown):
### Decision Question
{question}

### Short Answer
(1–3 sentences)

### Mandatory Conditions / Triggers
- ...

### Procedural Requirements
- ...

### Common Failure Points (Supervisor Risk)
- ...

### Contract Citations
- chunk_id (Article/Section/Page)

EXCERPTS (chunk_id + metadata + text):
{json.dumps(excerpts, ensure_ascii=False)}
""".strip()

    return llm_text(prompt, model=model)

def render_excerpts_markdown(question: str, top_k: int = 8, max_chars: int = 1400) -> str:
    """
    Show retrieved excerpts so the user can validate what will ground the answer.
    """
    chunks = retrieve_top_chunks(question, top_k=top_k)
    if not chunks:
        return "_No retrieved excerpts. Try another query or rebuild KB._"

    blocks = []
    for c in chunks:
        cid = c.get("chunk_id")
        article = c.get("article")
        section = c.get("section")
        p1 = c.get("page_start")
        p2 = c.get("page_end")

        meta = f"Article {article}" if article else "Article —"
        if section:
            meta += f", Section {section}"
        if p1 is not None:
            meta += f" (pp. {p1}-{p2})" if p2 is not None else f" (p. {p1})"

        text = (c.get("text") or "").strip()
        text = text[:max_chars] + ("…" if len(text) > max_chars else "")
        blocks.append(f"### {cid}\n**{meta}**\n\n> {text.replace('\\n', '\\n> ')}")

    return "\n\n---\n\n".join(blocks)