Spaces:

arjun10g
/

RAG-PSYCH

Sleeping

File size: 7,194 Bytes

"""
Generation: call Claude with retrieved chunks, force citations, validate.

The retrieval layer (api/hybrid.py) returns either a ranked list of
RerankedHit objects or `[]` when the best chunk falls below the refusal
threshold. We mirror that contract:

  - Empty hits → return the canonical refusal string without an API call.
    Saves money and guarantees identical refusal behavior across paths.
  - Non-empty → ask Claude to answer from those chunks only, with
    `[chunk_id]` citations after every factual claim. Post-generation we
    parse the cited IDs and flag any that don't appear in the retrieved
    set — that's our hallucination tripwire.

Polarity handling lives in the system prompt as defense-in-depth on top of
the retrieval-time NegEx filter (`api/negation.py`): if a denied/negated
chunk somehow survives RRF + rerank + NegEx, the model is still instructed
not to cite it as positive evidence.

Single-turn for now; Phase 4 wraps this in a FastAPI endpoint with audit
logging. Phase 6 will call it many times from the eval harness — the
system prompt is well below Haiku 4.5's 4096-token cache minimum so
prompt caching isn't worth wiring up here.
"""

from __future__ import annotations

import os
import re
import time
from dataclasses import dataclass

import anthropic

from .hybrid import RerankedHit

REFUSAL_STRING = "The provided notes do not contain information to answer this."

DEFAULT_MODEL = "claude-haiku-4-5"
DEFAULT_MAX_TOKENS = 2048

_CITATION_RE = re.compile(r"\[(\d+)\]")

SYSTEM_PROMPT = """You are a clinical reference assistant for a portfolio RAG demo.
You answer questions strictly from the numbered context chunks the user provides — not
from outside knowledge — but you may reason about hypothetical scenarios by applying
the definitions and criteria in the chunks to the facts the user describes.

RULES (follow exactly, in order):

1. Ground every factual claim in the chunks. Definitions, criteria, symptom lists,
   prevalence figures, treatment options, and any clinical fact you state must be
   followed by a citation in square brackets giving the chunk id, e.g.
   "Generalised anxiety disorder is characterised by marked symptoms of anxiety [42]."
   If multiple chunks support one claim, cite all of them: "[42][57]". Do not import
   facts from training knowledge that are not supported by a chunk.

2. Hypothetical scenarios are allowed. When the user describes a hypothetical patient
   (e.g. "a patient presents with X, Y, and Z — what could this be?"), match the
   described features against the criteria and descriptions in the chunks, and
   report the conditions whose criteria are consistent with that presentation.
   Frame the answer as pattern-matching, not diagnosis. Phrases like "these features
   are consistent with X per [N]", "this presentation could meet criteria for Y [M]",
   or "the described symptoms overlap with the criteria for Z [K]" are appropriate.
   Do NOT write "the patient has" or "the diagnosis is" — the user has not supplied
   a real patient, and this is not a clinical consultation.

3. Differential-style questions. When asked for a differential or "what else could
   this be?", list every candidate condition supported by the chunks, with the
   defining feature(s) that would distinguish each, cited.

4. Polarity check before citing. If a chunk states a patient does NOT have, denies,
   or has no history of a condition, do NOT cite it as evidence FOR that condition.
   "Negative for X", "ruled out X", "without X", and "denies X" are evidence about
   absence, not presence.

5. Refuse only when the chunks genuinely do not cover the topic at all. Respond with
   EXACTLY this string and nothing else:
   "The provided notes do not contain information to answer this."
   Use the refusal when no chunk addresses the question domain, or when the chunks
   discuss only tangential topics. Do NOT refuse merely because the chunks lack the
   exact phrasing of the user's question — if the chunks contain the criteria or
   features the question is about, answer with what the chunks support.

6. Hedge where the chunks are thin. If only one or two chunks marginally address the
   question, say so briefly ("the retrieved material only partially covers this")
   and give the partial answer with citations, rather than refusing.

Output format: prose, complete sentences, citations inline. Match response length
to the question. Simple factual questions get a short answer (2-4 sentences). When
the user asks for criteria, definitions, a full description, a differential, a
symptom list, treatment options, or poses a hypothetical that calls for working
through multiple possibilities, give the full answer the chunks support — do not
truncate. Use short paragraphs or bulleted lists when that makes long answers
easier to scan. Every claim still needs a citation."""


@dataclass(frozen=True)
class Generation:
    answer: str
    cited_ids: list[int]
    invalid_cited_ids: list[int]
    refused: bool
    model: str
    latency_ms: float


def generate(
    query: str,
    reranked_hits: list[RerankedHit],
    *,
    model: str | None = None,
    max_tokens: int = DEFAULT_MAX_TOKENS,
) -> Generation:
    """Call Claude with retrieved chunks; return answer + citation audit.

    `reranked_hits=[]` short-circuits to the refusal path without an API
    call. The `refused` field is True when the model returns the exact
    refusal string (or when we short-circuited).
    """
    model = model or os.environ.get("ANTHROPIC_MODEL", DEFAULT_MODEL)
    if not reranked_hits:
        return Generation(
            answer=REFUSAL_STRING,
            cited_ids=[],
            invalid_cited_ids=[],
            refused=True,
            model=model,
            latency_ms=0.0,
        )

    user_msg = _build_user_message(query, reranked_hits)
    client = anthropic.Anthropic()
    t0 = time.perf_counter()
    response = client.messages.create(
        model=model,
        max_tokens=max_tokens,
        system=SYSTEM_PROMPT,
        messages=[{"role": "user", "content": user_msg}],
    )
    latency_ms = (time.perf_counter() - t0) * 1000

    answer = "".join(b.text for b in response.content if b.type == "text").strip()
    retrieved_ids = {h.hit.chunk_id for h in reranked_hits}
    cited = [int(m) for m in _CITATION_RE.findall(answer)]
    cited_unique = list(dict.fromkeys(cited))
    invalid = [c for c in cited_unique if c not in retrieved_ids]
    refused = (answer == REFUSAL_STRING)

    return Generation(
        answer=answer,
        cited_ids=cited_unique,
        invalid_cited_ids=invalid,
        refused=refused,
        model=model,
        latency_ms=latency_ms,
    )


def _build_user_message(query: str, hits: list[RerankedHit]) -> str:
    blocks = []
    for r in hits:
        h = r.hit
        provenance = h.source_type
        if h.section:
            provenance += f" / {h.section}"
        if h.title:
            provenance += f" / {h.title}"
        blocks.append(f"[{h.chunk_id}] ({provenance})\n{h.chunk_text}")
    chunks_text = "\n\n".join(blocks)
    return f"CONTEXT CHUNKS:\n\n{chunks_text}\n\nQUESTION: {query}"