Spaces:
Sleeping
Sleeping
| """LLM provider interface with a deterministic offline stub. | |
| Every LLM call in the app goes through :class:`LLMProvider`. In production you set | |
| ``ANTHROPIC_API_KEY`` or ``OPENAI_API_KEY`` and a real model answers. With no key set | |
| (CI, tests, local demo) the :class:`StubProvider` produces a grounded answer from the | |
| retrieved context using simple extractive logic — no network, fully reproducible. | |
| The stub is intentionally not a toy: it composes a real answer out of the retrieved | |
| policy/catalog snippets so the end-to-end product (citations, refund decisions, | |
| escalation) is demonstrable with zero paid keys. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from typing import Protocol, Sequence | |
| from .config import Settings, get_settings | |
| class LLMProvider(Protocol): | |
| """Minimal surface the app depends on. Easy to swap or mock.""" | |
| name: str | |
| def answer(self, question: str, context: Sequence[str]) -> str: | |
| """Answer ``question`` grounded in ``context`` snippets.""" | |
| ... | |
| _SENT_SPLIT = re.compile(r"(?<=[.!?])\s+") | |
| _WORD = re.compile(r"[a-z0-9]+") | |
| def _tokens(text: str) -> list[str]: | |
| return _WORD.findall(text.lower()) | |
| class StubProvider: | |
| """Deterministic, offline, network-free provider. | |
| Strategy: rank the sentences inside the retrieved context by lexical overlap | |
| with the question and stitch the best ones into a concise grounded reply. This | |
| keeps answers faithful to the knowledge base (no hallucination) which is exactly | |
| what you want from a support agent. | |
| """ | |
| name = "stub" | |
| def answer(self, question: str, context: Sequence[str]) -> str: | |
| q_terms = set(_tokens(question)) | |
| if not context: | |
| return ( | |
| "I couldn't find that in our policies or catalog, so I'm escalating " | |
| "this to a human teammate." | |
| ) | |
| # Flatten context into candidate sentences. Each is scored by how many query | |
| # terms it covers (overlap). Trivially short fragments (e.g. a bare section | |
| # heading like "Shipping.") are skipped so they can't win on a single keyword | |
| # while carrying no actual answer. Ties prefer the more informative (longer) | |
| # sentence, then earlier source order, keeping the result deterministic. | |
| candidates: list[tuple[int, int, int, str]] = [] | |
| for src_idx, block in enumerate(context): | |
| for sent in _SENT_SPLIT.split(block.strip()): | |
| sent = sent.strip() | |
| sent_tokens = _tokens(sent) | |
| if len(sent_tokens) < 4: | |
| continue | |
| overlap = len(q_terms & set(sent_tokens)) | |
| candidates.append((overlap, len(sent_tokens), -src_idx, sent)) | |
| if not candidates: | |
| return context[0].strip() | |
| # Highest overlap first; then longer (more informative); then earlier source. | |
| candidates.sort(key=lambda c: (c[0], c[1], c[2]), reverse=True) | |
| if candidates[0][0] == 0: | |
| # No lexical match — surface the most relevant retrieved block rather than | |
| # fabricate. The orchestrator's confidence will be low and it will likely | |
| # escalate. | |
| return context[0].strip() | |
| chosen: list[str] = [] | |
| for overlap, _len, _src, sent in candidates: | |
| if overlap == 0: | |
| break | |
| if sent not in chosen: | |
| chosen.append(sent) | |
| if len(chosen) >= 2: | |
| break | |
| return " ".join(chosen) | |
| class AnthropicProvider: | |
| """Real provider backed by the Anthropic Messages API.""" | |
| name = "anthropic" | |
| def __init__(self, settings: Settings): | |
| # Imported lazily so the dependency is optional and the offline path never | |
| # needs it installed. | |
| import anthropic # type: ignore | |
| self._client = anthropic.Anthropic(api_key=settings.anthropic_api_key) | |
| self._model = settings.anthropic_model | |
| def answer(self, question: str, context: Sequence[str]) -> str: | |
| joined = "\n\n".join(f"[doc {i + 1}]\n{c}" for i, c in enumerate(context)) | |
| system = ( | |
| "You are an e-commerce support agent. Answer ONLY from the provided " | |
| "context. If the answer is not in the context, say you don't know. Be " | |
| "concise and cite the relevant policy heading or product name." | |
| ) | |
| msg = self._client.messages.create( | |
| model=self._model, | |
| max_tokens=400, | |
| system=system, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": f"Context:\n{joined}\n\nCustomer question: {question}", | |
| } | |
| ], | |
| ) | |
| return "".join(block.text for block in msg.content if block.type == "text").strip() | |
| class OpenAIProvider: | |
| """Real provider backed by the OpenAI Chat Completions API.""" | |
| name = "openai" | |
| def __init__(self, settings: Settings): | |
| from openai import OpenAI # type: ignore | |
| self._client = OpenAI(api_key=settings.openai_api_key) | |
| self._model = settings.openai_model | |
| def answer(self, question: str, context: Sequence[str]) -> str: | |
| joined = "\n\n".join(f"[doc {i + 1}]\n{c}" for i, c in enumerate(context)) | |
| system = ( | |
| "You are an e-commerce support agent. Answer ONLY from the provided " | |
| "context. If the answer is not in the context, say you don't know. Be " | |
| "concise and cite the relevant policy heading or product name." | |
| ) | |
| resp = self._client.chat.completions.create( | |
| model=self._model, | |
| max_tokens=400, | |
| messages=[ | |
| {"role": "system", "content": system}, | |
| { | |
| "role": "user", | |
| "content": f"Context:\n{joined}\n\nCustomer question: {question}", | |
| }, | |
| ], | |
| ) | |
| return (resp.choices[0].message.content or "").strip() | |
| class SafeProvider: | |
| """Wrap a real provider so any runtime failure degrades to the offline stub. | |
| A bad API key, rate limit, or network blip on a live call should never 500 a | |
| support request — we fall back to a grounded extractive answer instead. The | |
| reported ``name`` is the wrapped provider's so observability stays accurate. | |
| """ | |
| def __init__(self, inner: LLMProvider): | |
| self._inner = inner | |
| self._fallback = StubProvider() | |
| self.name = inner.name | |
| def answer(self, question: str, context: Sequence[str]) -> str: | |
| try: | |
| text = self._inner.answer(question, context) | |
| return text or self._fallback.answer(question, context) | |
| except Exception: | |
| return self._fallback.answer(question, context) | |
| def get_provider(settings: Settings | None = None) -> LLMProvider: | |
| """Resolve a provider from settings. | |
| ``auto`` (default) uses a real provider only when its key is present, otherwise | |
| the offline stub. Explicit values (``anthropic``/``openai``/``stub``) are honored. | |
| Any failure to construct a real provider degrades gracefully to the stub so the | |
| app never hard-crashes on a missing optional dependency, and any failure at call | |
| time is caught by :class:`SafeProvider`. | |
| """ | |
| settings = settings or get_settings() | |
| choice = settings.llm_provider.lower() | |
| def _try(builder) -> LLMProvider | None: | |
| try: | |
| return SafeProvider(builder(settings)) | |
| except Exception: | |
| return None | |
| if choice == "stub": | |
| return StubProvider() | |
| if choice == "anthropic" and settings.anthropic_api_key: | |
| return _try(AnthropicProvider) or StubProvider() | |
| if choice == "openai" and settings.openai_api_key: | |
| return _try(OpenAIProvider) or StubProvider() | |
| if choice == "auto": | |
| if settings.anthropic_api_key: | |
| p = _try(AnthropicProvider) | |
| if p: | |
| return p | |
| if settings.openai_api_key: | |
| p = _try(OpenAIProvider) | |
| if p: | |
| return p | |
| return StubProvider() | |