Spaces:
Running on Zero
Running on Zero
| """Sanity tests for parse_response() — the deterministic JSON-extraction | |
| contract between the Claude response and the Gradio UI. The LLM call | |
| itself is exempt from unit tests (Principle VII); these tests cover | |
| only the parser surface. | |
| Run: pytest gradio-apps/compounding-test/test_diagnose.py -v | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| import json | |
| import os | |
| from unittest.mock import MagicMock | |
| from app import ( | |
| ANTHROPIC_MODEL_ID, | |
| HF_MODEL_ID, | |
| MalformedResponseError, | |
| PROVIDERS, | |
| _call_anthropic, | |
| _call_huggingface, | |
| _call_model, | |
| _call_zerogpu, | |
| _detect_provider, | |
| _zerogpu_available, | |
| _zerogpu_invoke, | |
| diagnose, | |
| parse_response, | |
| ) | |
| import app as app_module | |
| # --- Fixtures --------------------------------------------------------------- | |
| VALID_JSON_BLOCK = """```json | |
| { | |
| "constraint": "Underwriting at the quote screen.", | |
| "scores": { | |
| "proprietary_data": { "score": 4, "rationale": "First-party policy data.", "quoted_span": "claim outcomes Progressive observes directly" }, | |
| "self_labeling": { "score": 4, "rationale": "Every policy term self-labels.", "quoted_span": "every policy term produces a claim" }, | |
| "decreasing_marginal_cost": { "score": 3, "rationale": "Pipeline amortized.", "quoted_span": "17 years of amortized pipeline" }, | |
| "defensible_asymmetry": { "score": 3, "rationale": "Integration depth capped.", "quoted_span": "behavior data integration depth" } | |
| }, | |
| "quadrant": "compounder", | |
| "closest_portrait": "progressive", | |
| "closest_portrait_paragraph": "Your case tracks Progressive most closely because the labeling loop runs through claim outcomes you directly observe.", | |
| "warnings": [] | |
| } | |
| ``` | |
| # The Verdict | |
| Your initiative is a compounder. Here's why this lands cleanly on all | |
| four conditions and what to do Monday morning to keep it compounding. | |
| """ | |
| # --- Happy path ------------------------------------------------------------- | |
| def test_valid_response_parses_top_level_fields(): | |
| r = parse_response(VALID_JSON_BLOCK) | |
| assert r.constraint == "Underwriting at the quote screen." | |
| assert r.quadrant == "compounder" | |
| assert r.closest_portrait == "progressive" | |
| assert "compounder" in r.closest_portrait_paragraph or "Progressive" in r.closest_portrait_paragraph | |
| assert r.warnings == [] | |
| def test_valid_response_captures_writeup_after_json_block(): | |
| r = parse_response(VALID_JSON_BLOCK) | |
| assert "# The Verdict" in r.writeup | |
| assert "Monday morning" in r.writeup | |
| def test_valid_response_extracts_all_four_scores(): | |
| r = parse_response(VALID_JSON_BLOCK) | |
| assert set(r.scores.keys()) == { | |
| "proprietary_data", | |
| "self_labeling", | |
| "decreasing_marginal_cost", | |
| "defensible_asymmetry", | |
| } | |
| assert r.scores["proprietary_data"].score == 4 | |
| assert r.scores["decreasing_marginal_cost"].score == 3 | |
| assert r.scores["defensible_asymmetry"].quoted_span == "behavior data integration depth" | |
| # --- Sad path: missing or malformed JSON ------------------------------------ | |
| def test_no_json_block_raises(): | |
| raw = "Hi there, no JSON here at all, just prose explaining the verdict." | |
| with pytest.raises(MalformedResponseError, match="json"): | |
| parse_response(raw) | |
| def test_invalid_json_inside_block_raises(): | |
| raw = "```json\n{ this is not valid json }\n```\n# Writeup" | |
| with pytest.raises(MalformedResponseError): | |
| parse_response(raw) | |
| def test_missing_required_top_level_field_raises(): | |
| raw = """```json | |
| { | |
| "constraint": "...", | |
| "scores": {}, | |
| "quadrant": "compounder" | |
| } | |
| ```""" | |
| # missing closest_portrait, closest_portrait_paragraph, warnings | |
| with pytest.raises(MalformedResponseError, match="closest_portrait"): | |
| parse_response(raw) | |
| # --- Sad path: enum validation ---------------------------------------------- | |
| def test_invalid_quadrant_raises(): | |
| raw = VALID_JSON_BLOCK.replace('"quadrant": "compounder"', '"quadrant": "bogus-quadrant"') | |
| with pytest.raises(MalformedResponseError, match="quadrant"): | |
| parse_response(raw) | |
| def test_invalid_closest_portrait_raises(): | |
| raw = VALID_JSON_BLOCK.replace( | |
| '"closest_portrait": "progressive"', '"closest_portrait": "wells-fargo"' | |
| ) | |
| with pytest.raises(MalformedResponseError, match="closest_portrait"): | |
| parse_response(raw) | |
| # --- Sad path: score-range validation -------------------------------------- | |
| def test_score_below_zero_raises(): | |
| raw = VALID_JSON_BLOCK.replace('"score": 4, "rationale": "First-party policy data."', '"score": -1, "rationale": "First-party policy data."') | |
| with pytest.raises(MalformedResponseError, match="score"): | |
| parse_response(raw) | |
| def test_score_above_four_raises(): | |
| raw = VALID_JSON_BLOCK.replace('"score": 4, "rationale": "First-party policy data."', '"score": 7, "rationale": "First-party policy data."') | |
| with pytest.raises(MalformedResponseError, match="score"): | |
| parse_response(raw) | |
| def test_score_not_integer_raises(): | |
| raw = VALID_JSON_BLOCK.replace('"score": 4, "rationale": "First-party policy data."', '"score": "high", "rationale": "First-party policy data."') | |
| with pytest.raises(MalformedResponseError, match="score"): | |
| parse_response(raw) | |
| # --- Sad path: quoted_span validation -------------------------------------- | |
| def test_empty_quoted_span_raises(): | |
| raw = VALID_JSON_BLOCK.replace( | |
| '"quoted_span": "claim outcomes Progressive observes directly"', | |
| '"quoted_span": ""', | |
| ) | |
| with pytest.raises(MalformedResponseError, match="quoted_span"): | |
| parse_response(raw) | |
| def test_quoted_span_over_400_chars_raises(): | |
| """The 400-char limit is a generous ceiling — Phi-4-mini consistently | |
| generates ~200-220 char quoted_spans when asked for 5-15 words, so | |
| we bumped from 200 to 400 to accommodate normal model output without | |
| losing the runaway-output guard.""" | |
| over_limit = "x" * 401 | |
| raw = VALID_JSON_BLOCK.replace( | |
| '"quoted_span": "claim outcomes Progressive observes directly"', | |
| f'"quoted_span": "{over_limit}"', | |
| ) | |
| with pytest.raises(MalformedResponseError, match="quoted_span"): | |
| parse_response(raw) | |
| def test_quoted_span_up_to_400_chars_accepted(): | |
| """Confirms the new ceiling lets typical Phi-4-mini output through.""" | |
| at_limit = "x" * 250 # well above the prior 200-char cap | |
| raw = VALID_JSON_BLOCK.replace( | |
| '"quoted_span": "claim outcomes Progressive observes directly"', | |
| f'"quoted_span": "{at_limit}"', | |
| ) | |
| r = parse_response(raw) | |
| assert len(r.scores["proprietary_data"].quoted_span) == 250 | |
| # --- Tolerance: forward-compat and whitespace ------------------------------ | |
| def test_extra_unknown_fields_tolerated(): | |
| raw = VALID_JSON_BLOCK.replace( | |
| '"warnings": []', | |
| '"warnings": [], "future_field": "ignored", "another": 42', | |
| ) | |
| r = parse_response(raw) # should not raise | |
| assert r.quadrant == "compounder" | |
| # --- Provider auto-detection (multi-backend support) ---------------------- | |
| def test_detect_provider_explicit_anthropic_wins(): | |
| env = {"MODEL_PROVIDER": "anthropic", "HF_TOKEN": "hf-xxx"} | |
| assert _detect_provider(env) == "anthropic" | |
| def test_detect_provider_explicit_huggingface_wins(): | |
| env = {"MODEL_PROVIDER": "huggingface", "ANTHROPIC_API_KEY": "sk-xxx"} | |
| assert _detect_provider(env) == "huggingface" | |
| def test_detect_provider_case_insensitive(): | |
| assert _detect_provider({"MODEL_PROVIDER": "HuggingFace"}) == "huggingface" | |
| def test_detect_provider_invalid_explicit_falls_through(): | |
| # bogus MODEL_PROVIDER is ignored; auto-detect kicks in | |
| env = {"MODEL_PROVIDER": "bogus", "ANTHROPIC_API_KEY": "sk-xxx"} | |
| assert _detect_provider(env) == "anthropic" | |
| def test_detect_provider_anthropic_when_only_anthropic_key_set(): | |
| assert _detect_provider({"ANTHROPIC_API_KEY": "sk-xxx"}) == "anthropic" | |
| def test_detect_provider_huggingface_when_only_hf_token_set(): | |
| assert _detect_provider({"HF_TOKEN": "hf-xxx"}) == "huggingface" | |
| def test_detect_provider_huggingface_when_running_on_hf_space_without_zerogpu(monkeypatch): | |
| # On a Space WITHOUT ZeroGPU deps installed, fall back to the inference API. | |
| monkeypatch.setattr(app_module, "_zerogpu_available", lambda: False) | |
| assert _detect_provider({"SPACE_ID": "mile-hi-ai/compounding-test"}) == "huggingface" | |
| def test_detect_provider_prefers_zerogpu_on_pro_space_with_deps(monkeypatch): | |
| # On a Space WITH ZeroGPU deps installed (transformers + torch + spaces), | |
| # default to the free GPU backend rather than burning inference credits. | |
| monkeypatch.setattr(app_module, "_zerogpu_available", lambda: True) | |
| assert _detect_provider({"SPACE_ID": "mile-hi-ai/compounding-test"}) == "zerogpu" | |
| def test_detect_provider_explicit_anthropic_wins_over_zerogpu(monkeypatch): | |
| # Explicit MODEL_PROVIDER beats the zerogpu auto-detect even on a Pro Space. | |
| monkeypatch.setattr(app_module, "_zerogpu_available", lambda: True) | |
| env = {"MODEL_PROVIDER": "anthropic", "SPACE_ID": "mile-hi-ai/compounding-test"} | |
| assert _detect_provider(env) == "anthropic" | |
| def test_detect_provider_explicit_zerogpu_wins(): | |
| assert _detect_provider({"MODEL_PROVIDER": "zerogpu"}) == "zerogpu" | |
| def test_zerogpu_is_in_providers_dict(): | |
| # Even when deps aren't installed locally, the provider key exists so the | |
| # UI dropdown can show it (the stub raises a clear error if invoked). | |
| assert "zerogpu" in PROVIDERS | |
| def test_detect_provider_alt_hf_token_var(): | |
| # HuggingFace SDKs also recognize HUGGING_FACE_HUB_TOKEN | |
| assert _detect_provider({"HUGGING_FACE_HUB_TOKEN": "hf-xxx"}) == "huggingface" | |
| def test_detect_provider_default_when_nothing_set(): | |
| # No creds anywhere → default to anthropic (clearest error at call time) | |
| assert _detect_provider({}) == "anthropic" | |
| # --- Provider dispatch (_call_model routes to the right backend) ----------- | |
| def test_call_model_routes_to_anthropic_backend(monkeypatch): | |
| calls = [] | |
| monkeypatch.setitem(PROVIDERS, "anthropic", lambda s, u: (calls.append(("anthropic", s, u)) or "anth-out")) | |
| out = _call_model("system-text", "user-text", "anthropic") | |
| assert out == "anth-out" | |
| assert calls == [("anthropic", "system-text", "user-text")] | |
| def test_call_model_routes_to_huggingface_backend(monkeypatch): | |
| calls = [] | |
| monkeypatch.setitem(PROVIDERS, "huggingface", lambda s, u: (calls.append(("hf", s, u)) or "hf-out")) | |
| out = _call_model("system-text", "user-text", "huggingface") | |
| assert out == "hf-out" | |
| assert calls == [("hf", "system-text", "user-text")] | |
| def test_call_model_unknown_provider_raises(): | |
| with pytest.raises(ValueError, match="provider"): | |
| _call_model("s", "u", "bogus-provider") | |
| # --- diagnose() input validation ------------------------------------------- | |
| # Reused across diagnose() tests: a description long enough to pass the | |
| # 200-word minimum. The actual content doesn't matter for these tests | |
| # because we mock the backend. | |
| _LONG_DESCRIPTION = " ".join(["word"] * 250) | |
| def test_diagnose_empty_description_returns_friendly_error(): | |
| writeup, json_str = diagnose("", None, None, None, provider="zerogpu") | |
| assert "Please describe" in writeup | |
| assert json_str == "" | |
| def test_diagnose_short_description_returns_word_count_error(): | |
| short = " ".join(["word"] * 50) | |
| writeup, json_str = diagnose(short, None, None, None, provider="zerogpu") | |
| assert "at least 200 words" in writeup | |
| assert "50" in writeup # current word count | |
| assert json_str == "" | |
| def test_diagnose_long_description_returns_word_count_error(): | |
| long = " ".join(["word"] * 5001) | |
| writeup, json_str = diagnose(long, None, None, None, provider="zerogpu") | |
| assert "under 5000 words" in writeup | |
| assert json_str == "" | |
| def test_diagnose_unknown_provider_returns_friendly_error(): | |
| writeup, json_str = diagnose(_LONG_DESCRIPTION, None, None, None, provider="bogus") | |
| assert "Unknown model provider" in writeup | |
| assert "bogus" in writeup | |
| assert json_str == "" | |
| # --- diagnose() Premium (Anthropic) path ----------------------------------- | |
| def test_diagnose_premium_without_any_key_returns_friendly_error(monkeypatch): | |
| monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) | |
| writeup, json_str = diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key=None, | |
| ) | |
| assert "Premium" in writeup | |
| assert "API key" in writeup | |
| assert json_str == "" | |
| def test_diagnose_premium_with_empty_string_key_returns_friendly_error(monkeypatch): | |
| # Whitespace-only key should not count as supplied | |
| monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) | |
| writeup, _ = diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key=" ", | |
| ) | |
| assert "Premium" in writeup | |
| assert "API key" in writeup | |
| def test_diagnose_premium_with_env_key_dispatches_to_anthropic(monkeypatch): | |
| monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-env-xxx") | |
| captured = {} | |
| def fake_anthropic(system, user): | |
| captured["system"] = system | |
| captured["user"] = user | |
| captured["env_key_at_call_time"] = os.environ.get("ANTHROPIC_API_KEY") | |
| return _VALID_BACKEND_RESPONSE | |
| monkeypatch.setitem(PROVIDERS, "anthropic", fake_anthropic) | |
| writeup, json_str = diagnose( | |
| _LONG_DESCRIPTION, "insurance", "enterprise", "$1M–$10M", | |
| provider="anthropic", | |
| anthropic_api_key=None, | |
| ) | |
| # Backend was called (so dispatch worked) and the env key was visible | |
| assert captured.get("env_key_at_call_time") == "sk-env-xxx" | |
| # And the response made it through the parser → JSON tab populated | |
| assert json_str | |
| parsed = json.loads(json_str) | |
| assert parsed["quadrant"] == "compounder" | |
| def test_diagnose_premium_user_key_passed_directly_not_via_env(monkeypatch): | |
| """The page's API-key field should take precedence over any | |
| ANTHROPIC_API_KEY env var the Space owner has configured. Critically, | |
| the visitor's key must be passed DIRECTLY to _call_anthropic via | |
| kwarg — never written to os.environ — or concurrent requests from | |
| other visitors could pick up the wrong key from shared process env. | |
| See _call_anthropic docstring.""" | |
| monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-space-owner-xxx") | |
| captured = {} | |
| def fake_call_anthropic(system, user, *, api_key=None): | |
| captured["api_key_kwarg"] = api_key | |
| captured["env_at_call_time"] = os.environ.get("ANTHROPIC_API_KEY") | |
| return _VALID_BACKEND_RESPONSE | |
| monkeypatch.setattr(app_module, "_call_anthropic", fake_call_anthropic) | |
| diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key="sk-user-yyy", | |
| ) | |
| # User key passed directly via kwarg (the override mechanism) | |
| assert captured["api_key_kwarg"] == "sk-user-yyy" | |
| # CRITICAL: env was NOT clobbered with the user's key — Space | |
| # owner's key remained intact for any concurrent request that | |
| # legitimately needs it (or for no request at all if there's no | |
| # owner-set key). | |
| assert captured["env_at_call_time"] == "sk-space-owner-xxx" | |
| def test_diagnose_premium_does_not_mutate_env_with_user_key(monkeypatch): | |
| """Cross-tenant key-leak regression test. On a public Space, two | |
| concurrent visitors may both submit Premium requests. Each must use | |
| only their own key; neither should ever see the other's key via | |
| os.environ. The fix is to never write user-supplied keys to env.""" | |
| monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) | |
| captured = {} | |
| def fake_call_anthropic(system, user, *, api_key=None): | |
| captured["api_key_kwarg"] = api_key | |
| captured["env_at_call_time"] = os.environ.get("ANTHROPIC_API_KEY") | |
| return _VALID_BACKEND_RESPONSE | |
| monkeypatch.setattr(app_module, "_call_anthropic", fake_call_anthropic) | |
| diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key="sk-visitor-A-secret", | |
| ) | |
| # The key went directly to the SDK, not via env | |
| assert captured["api_key_kwarg"] == "sk-visitor-A-secret" | |
| # Env was never set during the call | |
| assert captured["env_at_call_time"] is None | |
| # And env is still unset after the call returns — no residue for | |
| # the next visitor's concurrent request to pick up | |
| assert os.environ.get("ANTHROPIC_API_KEY") is None | |
| def test_diagnose_redacts_user_key_from_error_messages(monkeypatch): | |
| """Defense-in-depth: if a backend exception ever included the | |
| user-supplied Anthropic key in its string representation, the F14 | |
| wrapper must redact it before surfacing the error to the UI. | |
| Symmetric with redactKey() in src/lib/anthropic-direct.ts.""" | |
| monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) | |
| user_key = "sk-ant-very-secret-12345" | |
| class _LeakyError(Exception): | |
| pass | |
| def leaky_anthropic(system, user, *, api_key=None): | |
| # Simulate the worst case: SDK echoes the key in its error | |
| raise _LeakyError(f"auth fail with key {api_key} rejected") | |
| monkeypatch.setattr(app_module, "_call_anthropic", leaky_anthropic) | |
| writeup, _ = diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key=user_key, | |
| ) | |
| assert user_key not in writeup | |
| assert "[redacted]" in writeup | |
| # And the rest of the error info should still be visible | |
| assert "LeakyError" in writeup | |
| def test_call_anthropic_passes_api_key_to_sdk_constructor(monkeypatch): | |
| """When _call_anthropic receives api_key=, it must be passed to the | |
| Anthropic() SDK constructor — not stored in os.environ, not | |
| discarded, not exposed elsewhere.""" | |
| captured_init = {} | |
| class FakeContentBlock: | |
| text = "ok" | |
| class FakeMessage: | |
| content = [FakeContentBlock()] | |
| class FakeClient: | |
| class messages: # noqa: N801 | |
| def create(**kwargs): | |
| return FakeMessage() | |
| def fake_anthropic_ctor(**kwargs): | |
| captured_init.update(kwargs) | |
| return FakeClient() | |
| import anthropic as anthropic_module | |
| monkeypatch.setattr(anthropic_module, "Anthropic", fake_anthropic_ctor) | |
| monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) | |
| _call_anthropic("sys", "usr", api_key="sk-direct-yyy") | |
| assert captured_init.get("api_key") == "sk-direct-yyy" | |
| # And env was not touched | |
| assert os.environ.get("ANTHROPIC_API_KEY") is None | |
| def test_call_anthropic_without_api_key_uses_env_via_sdk(monkeypatch): | |
| """When api_key is not supplied, the SDK constructor is called with | |
| no kwargs — letting it read ANTHROPIC_API_KEY from env, as is the | |
| SDK's normal default behavior. We don't explicitly pass api_key=None | |
| because the SDK treats that differently than 'not supplied'.""" | |
| captured_init = {} | |
| class FakeContentBlock: | |
| text = "ok" | |
| class FakeMessage: | |
| content = [FakeContentBlock()] | |
| class FakeClient: | |
| class messages: # noqa: N801 | |
| def create(**kwargs): | |
| return FakeMessage() | |
| def fake_anthropic_ctor(**kwargs): | |
| captured_init.update(kwargs) | |
| return FakeClient() | |
| import anthropic as anthropic_module | |
| monkeypatch.setattr(anthropic_module, "Anthropic", fake_anthropic_ctor) | |
| monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-env-default") | |
| _call_anthropic("sys", "usr") # no api_key kwarg | |
| # SDK constructor called with no api_key — it'll use env on its own | |
| assert "api_key" not in captured_init | |
| def test_diagnose_premium_backend_exception_returns_friendly_error(monkeypatch): | |
| """When the Anthropic SDK raises (auth fail, rate limit, network), | |
| F14 should wrap it in a markdown message that names the provider, | |
| model, exception class, and exception detail — never a raw trace.""" | |
| monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") | |
| class FakeAuthError(Exception): | |
| pass | |
| def failing_anthropic(system, user): | |
| raise FakeAuthError("invalid x-api-key header") | |
| monkeypatch.setitem(PROVIDERS, "anthropic", failing_anthropic) | |
| writeup, json_str = diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key=None, | |
| ) | |
| assert "anthropic" in writeup | |
| assert ANTHROPIC_MODEL_ID in writeup | |
| assert "FakeAuthError" in writeup | |
| assert "invalid x-api-key header" in writeup | |
| assert "stack" not in writeup.lower() # no stack trace leaked | |
| assert json_str == "" | |
| def test_diagnose_premium_backend_returns_malformed_response(monkeypatch): | |
| """When the backend returns something that fails the JSON schema | |
| validator, surface the parser's error message — don't crash.""" | |
| monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test") | |
| def garbage_anthropic(system, user): | |
| return "Sorry, I cannot help with that request." # no JSON block | |
| monkeypatch.setitem(PROVIDERS, "anthropic", garbage_anthropic) | |
| writeup, json_str = diagnose( | |
| _LONG_DESCRIPTION, None, None, None, | |
| provider="anthropic", | |
| anthropic_api_key=None, | |
| ) | |
| assert "malformed output" in writeup | |
| assert json_str == "" | |
| # --- _call_anthropic: Anthropic SDK call shape ----------------------------- | |
| # | |
| # Per Principle VII the actual API call is exempt from automated tests | |
| # (the SDK and the remote API are not our code). But the SHAPE of the | |
| # call we make IS our code: model id, system block, cache_control flag, | |
| # messages structure, response-unwrap path. These are easy to typo and | |
| # easy to miss in review. Shape tests catch that without hitting the | |
| # network. | |
| def test_call_anthropic_passes_system_block_with_cache_control(monkeypatch): | |
| captured = {} | |
| class FakeContentBlock: | |
| text = "raw response text" | |
| class FakeMessage: | |
| content = [FakeContentBlock()] | |
| class FakeClient: | |
| class messages: # noqa: N801 — mirroring SDK's nested .messages.create | |
| def create(**kwargs): | |
| captured.update(kwargs) | |
| return FakeMessage() | |
| import anthropic as anthropic_module | |
| monkeypatch.setattr(anthropic_module, "Anthropic", lambda: FakeClient()) | |
| result = _call_anthropic("MY SYSTEM BLOCK", "MY USER PROMPT") | |
| # The function unwrapped content[0].text correctly | |
| assert result == "raw response text" | |
| # Model + token budget | |
| assert captured["model"] == ANTHROPIC_MODEL_ID | |
| assert captured["max_tokens"] == 2500 | |
| # System block is a list of one dict with cache_control | |
| sys_block = captured["system"] | |
| assert isinstance(sys_block, list) | |
| assert len(sys_block) == 1 | |
| assert sys_block[0]["type"] == "text" | |
| assert sys_block[0]["text"] == "MY SYSTEM BLOCK" | |
| assert sys_block[0]["cache_control"] == {"type": "ephemeral"} | |
| # User prompt in the messages array | |
| assert captured["messages"] == [{"role": "user", "content": "MY USER PROMPT"}] | |
| # --- _call_huggingface: token resolution + call shape ---------------------- | |
| def _install_fake_inference_client(monkeypatch, captured: dict, *, | |
| response_text: str = "hf response", | |
| raises: Exception | None = None): | |
| """Replace huggingface_hub.InferenceClient with a fake that records | |
| its init kwargs and chat_completion kwargs into `captured`. Optionally | |
| have chat_completion raise an exception instead of returning.""" | |
| class _FakeMsg: | |
| content = response_text | |
| class _FakeChoice: | |
| message = _FakeMsg() | |
| class _FakeResponse: | |
| choices = [_FakeChoice()] | |
| class _FakeClient: | |
| def __init__(self, **kwargs): | |
| captured["init_kwargs"] = kwargs | |
| def chat_completion(self, **kwargs): | |
| captured["chat_kwargs"] = kwargs | |
| if raises is not None: | |
| raise raises | |
| return _FakeResponse() | |
| import huggingface_hub | |
| monkeypatch.setattr(huggingface_hub, "InferenceClient", _FakeClient) | |
| def test_call_huggingface_no_token_anywhere_raises_actionable_error(monkeypatch): | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.delenv("HUGGING_FACE_HUB_TOKEN", raising=False) | |
| import huggingface_hub | |
| monkeypatch.setattr(huggingface_hub, "get_token", lambda: None) | |
| with pytest.raises(RuntimeError, match="No HuggingFace token"): | |
| _call_huggingface("sys", "usr") | |
| def test_call_huggingface_uses_HF_TOKEN_env(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_from_env") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| _call_huggingface("sys", "usr") | |
| assert captured["init_kwargs"]["token"] == "hf_from_env" | |
| def test_call_huggingface_uses_HUGGING_FACE_HUB_TOKEN_env_as_fallback(monkeypatch): | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.setenv("HUGGING_FACE_HUB_TOKEN", "hf_legacy_var") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| _call_huggingface("sys", "usr") | |
| assert captured["init_kwargs"]["token"] == "hf_legacy_var" | |
| def test_call_huggingface_uses_get_token_when_no_env(monkeypatch): | |
| monkeypatch.delenv("HF_TOKEN", raising=False) | |
| monkeypatch.delenv("HUGGING_FACE_HUB_TOKEN", raising=False) | |
| import huggingface_hub | |
| monkeypatch.setattr(huggingface_hub, "get_token", lambda: "hf_from_cli_login") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| _call_huggingface("sys", "usr") | |
| assert captured["init_kwargs"]["token"] == "hf_from_cli_login" | |
| def test_call_huggingface_HF_TOKEN_wins_over_other_sources(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_winner") | |
| monkeypatch.setenv("HUGGING_FACE_HUB_TOKEN", "hf_loser_1") | |
| import huggingface_hub | |
| monkeypatch.setattr(huggingface_hub, "get_token", lambda: "hf_loser_2") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| _call_huggingface("sys", "usr") | |
| assert captured["init_kwargs"]["token"] == "hf_winner" | |
| def test_call_huggingface_init_shape_model_provider_timeout(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_test") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| _call_huggingface("sys", "usr") | |
| init = captured["init_kwargs"] | |
| assert init["model"] == HF_MODEL_ID | |
| # provider="auto" is the critical config that enables the modern HF | |
| # Inference Providers routing layer — without it, the client falls | |
| # back to the legacy hf-inference-only path. Catch any regression | |
| # that removes this flag. | |
| assert init["provider"] == "auto" | |
| assert init["timeout"] == 120 | |
| def test_call_huggingface_chat_completion_call_shape(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_test") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured) | |
| result = _call_huggingface("MY SYSTEM BLOCK", "MY USER PROMPT") | |
| chat = captured["chat_kwargs"] | |
| assert chat["messages"] == [ | |
| {"role": "system", "content": "MY SYSTEM BLOCK"}, | |
| {"role": "user", "content": "MY USER PROMPT"}, | |
| ] | |
| assert chat["max_tokens"] == 2500 | |
| # Low temperature is intentional — smaller open models can produce | |
| # looser JSON at higher temperatures. Catch any drift. | |
| assert chat["temperature"] == 0.2 | |
| # Response unwrap: choices[0].message.content | |
| assert result == "hf response" | |
| def test_call_huggingface_model_not_supported_error_wrapped(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_test") | |
| fake_hf_error = Exception( | |
| "Bad request: {'message': \"The requested model is not supported " | |
| "by any provider you have enabled.\", 'code': 'model_not_supported'}" | |
| ) | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured, raises=fake_hf_error) | |
| with pytest.raises(RuntimeError, match="isn't available through any"): | |
| _call_huggingface("sys", "usr") | |
| def test_call_huggingface_model_not_supported_alternate_phrasing_wrapped(monkeypatch): | |
| monkeypatch.setenv("HF_TOKEN", "hf_test") | |
| fake_hf_error = Exception("...'code': 'model_not_supported'...") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured, raises=fake_hf_error) | |
| with pytest.raises(RuntimeError, match="isn't available through any"): | |
| _call_huggingface("sys", "usr") | |
| def test_call_huggingface_other_exception_passes_through(monkeypatch): | |
| """Errors that aren't the model_not_supported case (auth fail, | |
| network timeout, malformed response) should propagate up so the | |
| F14 wrapper in diagnose() can surface them with the original class | |
| name and detail.""" | |
| monkeypatch.setenv("HF_TOKEN", "hf_test") | |
| fake_other_error = ValueError("Invalid API key") | |
| captured = {} | |
| _install_fake_inference_client(monkeypatch, captured, raises=fake_other_error) | |
| with pytest.raises(ValueError, match="Invalid API key"): | |
| _call_huggingface("sys", "usr") | |
| # --- _call_zerogpu: stub path + invocation shape -------------------------- | |
| def test_call_zerogpu_stub_raises_clear_error_when_deps_unavailable(): | |
| """In a local environment without spaces/torch/transformers installed, | |
| _ZEROGPU_DEPS_AVAILABLE is False and _call_zerogpu is the stub that | |
| raises a RuntimeError pointing the user to the other two backends.""" | |
| if _zerogpu_available(): | |
| pytest.skip("Test only meaningful when zerogpu deps are NOT installed") | |
| with pytest.raises(RuntimeError, match="ZeroGPU backend requires"): | |
| _call_zerogpu("sys", "usr") | |
| def test_zerogpu_available_reflects_dep_state(): | |
| """_zerogpu_available() is the sole gating function for the zerogpu | |
| branch in _detect_provider; it must return the cached import-time | |
| boolean rather than re-trying imports on every call.""" | |
| import app as app_module | |
| assert _zerogpu_available() is app_module._ZEROGPU_DEPS_AVAILABLE | |
| def _install_fake_zerogpu_model(monkeypatch, captured: dict, *, | |
| prompt_len: int = 5, | |
| decoded_text: str = "model output"): | |
| """Replace the module-level _zerogpu_tokenizer and _zerogpu_model | |
| with fakes that record their calls. Simulates transformers types | |
| just enough for _zerogpu_invoke() to run end-to-end without torch | |
| actually installed.""" | |
| import app as app_module | |
| class _FakeInputs: | |
| def __init__(self): | |
| self.shape = (1, prompt_len) | |
| def to(self, device): | |
| captured["inputs_moved_to_device"] = device | |
| return self # chain .to() back into self for further use | |
| fake_inputs = _FakeInputs() | |
| fake_outputs = [list(range(prompt_len + 10))] # prompt tokens + 10 new tokens | |
| class _FakeTokenizer: | |
| eos_token_id = 99 | |
| def apply_chat_template(self, messages, **kwargs): | |
| captured["apply_chat_template"] = { | |
| "messages": messages, | |
| "kwargs": kwargs, | |
| } | |
| return fake_inputs | |
| def decode(self, token_ids, **kwargs): | |
| captured["decode"] = {"token_ids": list(token_ids), "kwargs": kwargs} | |
| return decoded_text | |
| class _FakeModel: | |
| device = "cpu" # starts on CPU; _zerogpu_invoke moves to cuda | |
| def to(self, device): | |
| captured["model_moved_to_device"] = device | |
| self.device = device | |
| return self | |
| def generate(self, inputs, **kwargs): | |
| captured["generate_inputs"] = inputs | |
| captured["generate_kwargs"] = kwargs | |
| return fake_outputs | |
| monkeypatch.setattr(app_module, "_zerogpu_tokenizer", _FakeTokenizer()) | |
| monkeypatch.setattr(app_module, "_zerogpu_model", _FakeModel()) | |
| # Note: no _load_zerogpu_model to patch — after the pre-load refactor | |
| # (commit ___), model load happens at module init, not lazily. | |
| def test_zerogpu_invoke_builds_chat_template_with_system_and_user(monkeypatch): | |
| captured = {} | |
| _install_fake_zerogpu_model(monkeypatch, captured) | |
| _zerogpu_invoke("MY SYSTEM BLOCK", "MY USER PROMPT") | |
| chat = captured["apply_chat_template"] | |
| assert chat["messages"] == [ | |
| {"role": "system", "content": "MY SYSTEM BLOCK"}, | |
| {"role": "user", "content": "MY USER PROMPT"}, | |
| ] | |
| assert chat["kwargs"]["return_tensors"] == "pt" | |
| assert chat["kwargs"]["add_generation_prompt"] is True | |
| def test_zerogpu_invoke_moves_model_and_inputs_to_cuda(monkeypatch): | |
| """Post-refactor (pre-load pattern): the model lives on CPU at | |
| module init, and _zerogpu_invoke must explicitly move it AND the | |
| input tensors to cuda inside the @spaces.GPU context.""" | |
| captured = {} | |
| _install_fake_zerogpu_model(monkeypatch, captured) | |
| _zerogpu_invoke("sys", "usr") | |
| # Model: moved CPU → cuda inside the invoke | |
| assert captured["model_moved_to_device"] == "cuda" | |
| # Inputs: tokenized then moved to cuda for inference | |
| assert captured["inputs_moved_to_device"] == "cuda" | |
| def test_zerogpu_invoke_generate_call_shape(monkeypatch): | |
| """The .generate() kwargs are easy to typo and carry real semantics: | |
| max_new_tokens=2500 caps output length | |
| temperature=0.2 keeps JSON output stable for small models | |
| do_sample=True is needed for non-zero temperature to have effect | |
| pad_token_id=eos_token_id avoids warning spam on short prompts | |
| Catch regressions in any of these.""" | |
| captured = {} | |
| _install_fake_zerogpu_model(monkeypatch, captured) | |
| _zerogpu_invoke("sys", "usr") | |
| gen = captured["generate_kwargs"] | |
| assert gen["max_new_tokens"] == 2500 | |
| assert gen["temperature"] == 0.2 | |
| assert gen["do_sample"] is True | |
| assert gen["pad_token_id"] == 99 # _FakeTokenizer.eos_token_id | |
| def test_zerogpu_invoke_strips_prompt_tokens_before_decode(monkeypatch): | |
| """The decoded output must be the GENERATED text only, not echo back | |
| the prompt. The function does this by slicing outputs[0][prompt_len:] | |
| before calling decode. Verify the slice happens correctly.""" | |
| captured = {} | |
| # prompt_len=5 → fake_outputs returns range(15) (5 prompt + 10 generated) | |
| # so decode should be called with tokens [5..15) | |
| _install_fake_zerogpu_model(monkeypatch, captured, prompt_len=5) | |
| _zerogpu_invoke("sys", "usr") | |
| decoded_tokens = captured["decode"]["token_ids"] | |
| assert decoded_tokens == list(range(5, 15)) | |
| # And skip_special_tokens is on so we don't include things like </s> | |
| assert captured["decode"]["kwargs"]["skip_special_tokens"] is True | |
| def test_zerogpu_invoke_returns_decoded_text(monkeypatch): | |
| captured = {} | |
| _install_fake_zerogpu_model(monkeypatch, captured, decoded_text="my generated answer") | |
| result = _zerogpu_invoke("sys", "usr") | |
| assert result == "my generated answer" | |
| # --- Integration test (opt-in; hits the real Anthropic API) ---------------- | |
| # | |
| # Skipped unless ANTHROPIC_API_KEY is set AND ANTHROPIC_INTEGRATION=1 is | |
| # set. Costs money to run (~$0.05 per call to Opus 4.7). Use this when | |
| # you want to verify end-to-end that the key works and the model is | |
| # reachable; routine CI should leave this skipped. | |
| import pytest as _pytest # already imported above as pytest, but kept explicit | |
| def test_call_anthropic_real_api_returns_text(): | |
| result = _call_anthropic( | |
| "You are a one-word echo. Reply with exactly one word.", | |
| "Say hello.", | |
| ) | |
| assert isinstance(result, str) | |
| assert len(result) > 0 | |
| assert len(result.split()) < 20 # one-word reply, generously bounded | |
| # --------------------------------------------------------------------------- | |
| # Fixture: a backend response that satisfies parse_response() so the | |
| # diagnose-Premium happy-path tests can assert on parser output without | |
| # duplicating the full JSON shape per test. | |
| # --------------------------------------------------------------------------- | |
| _VALID_BACKEND_RESPONSE = """```json | |
| { | |
| "constraint": "Underwriting at the quote screen.", | |
| "scores": { | |
| "proprietary_data": { "score": 4, "rationale": "first-party.", "quoted_span": "12-year claims database" }, | |
| "self_labeling": { "score": 4, "rationale": "policies self-label.", "quoted_span": "Every policy we write" }, | |
| "decreasing_marginal_cost": { "score": 3, "rationale": "amortized pipeline.", "quoted_span": "feed back into the next quarter" }, | |
| "defensible_asymmetry": { "score": 3, "rationale": "no data sharing.", "quoted_span": "we don't share data with" } | |
| }, | |
| "quadrant": "compounder", | |
| "closest_portrait": "progressive", | |
| "closest_portrait_paragraph": "Your case tracks Progressive most closely.", | |
| "warnings": [] | |
| } | |
| ``` | |
| # The Verdict | |
| Your initiative is a compounder. Here's why. | |
| """ | |
| def test_warnings_populated_for_failure_quadrant(): | |
| raw = VALID_JSON_BLOCK.replace('"quadrant": "compounder"', '"quadrant": "roman-candle"').replace( | |
| '"warnings": []', | |
| '"warnings": [{"text": "Wrong place, conditions weak.", "citation_source": "Buffett 2007", "citation_url": "https://www.berkshirehathaway.com/letters/2007ltr.pdf"}]', | |
| ) | |
| r = parse_response(raw) | |
| assert r.quadrant == "roman-candle" | |
| assert len(r.warnings) == 1 | |
| assert r.warnings[0].citation_source == "Buffett 2007" | |
| assert "berkshirehathaway" in r.warnings[0].citation_url | |