""" Smoke test: confirm Gemma 4 free tier on OpenRouter supports the two features FlutIQ depends on: 1. reasoning mode (risk-analyst agent) 2. OpenAI-format tool calling (data agents) Run: cd backend && set -a && source .env && set +a && .venv/bin/python scripts/smoke_test.py """ import asyncio import json import os import sys import httpx API_KEY = os.environ.get("OPENROUTER_API_KEY", "") BASE = "https://openrouter.ai/api/v1/chat/completions" PRIMARY = "google/gemma-4-31b-it:free" FALLBACK = "google/gemma-4-26b-a4b-it:free" HEADERS = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://flutiq.pages.dev", "X-Title": "FlutIQ smoke test", } def section(title: str) -> None: print("\n" + "=" * 70) print(title) print("=" * 70) async def call(payload: dict) -> dict: async with httpx.AsyncClient(timeout=120) as client: resp = await client.post(BASE, headers=HEADERS, json=payload) print(f" HTTP {resp.status_code}") if resp.status_code != 200: print(f" body: {resp.text[:600]}") return {} return resp.json() async def test_basic(model: str) -> bool: section(f"TEST 1 — basic completion ({model})") data = await call({ "model": model, "messages": [{"role": "user", "content": "Say only: pong"}], "max_tokens": 16, "temperature": 0, }) if not data: return False text = data.get("choices", [{}])[0].get("message", {}).get("content", "") print(f" response: {text!r}") return bool(text) async def test_reasoning(model: str) -> bool: section(f"TEST 2 — reasoning mode ({model})") data = await call({ "model": model, "messages": [{ "role": "user", "content": ( "If the annual exceedance probability of a flood is 0.01, " "what is the probability of at least one flood in 30 years? " "Show your work, then return only the final number." ), }], "reasoning": {"enabled": True}, "max_tokens": 1024, "temperature": 0, }) if not data: return False msg = data.get("choices", [{}])[0].get("message", {}) text = msg.get("content", "") reasoning_details = msg.get("reasoning_details", []) reasoning_field = msg.get("reasoning", "") print(f" content (first 300 chars): {text[:300]!r}") print(f" reasoning_details present: {bool(reasoning_details)} (len={len(reasoning_details)})") print(f" reasoning field present: {bool(reasoning_field)} (len={len(reasoning_field) if isinstance(reasoning_field, str) else 'n/a'})") if reasoning_details: first = reasoning_details[0] print(f" reasoning_details[0] keys: {list(first.keys()) if isinstance(first, dict) else type(first).__name__}") sample = json.dumps(first)[:300] if isinstance(first, dict) else str(first)[:300] print(f" reasoning_details[0] sample: {sample}") elif reasoning_field: sample = reasoning_field[:300] if isinstance(reasoning_field, str) else str(reasoning_field)[:300] print(f" reasoning sample: {sample!r}") print(f" usage: {data.get('usage', {})}") return bool(reasoning_details or reasoning_field) async def test_tools(model: str) -> bool: section(f"TEST 3 — function calling ({model})") tools = [{ "type": "function", "function": { "name": "lookup_fema_flood_zone", "description": "Look up FEMA flood zone for coordinates.", "parameters": { "type": "object", "properties": { "latitude": {"type": "number"}, "longitude": {"type": "number"}, }, "required": ["latitude", "longitude"], }, }, }] data = await call({ "model": model, "messages": [{ "role": "user", "content": "What is the FEMA flood zone for 41.8087, -87.6062?", }], "tools": tools, "tool_choice": "auto", "max_tokens": 512, "temperature": 0, }) if not data: return False msg = data.get("choices", [{}])[0].get("message", {}) tool_calls = msg.get("tool_calls", []) or [] text = msg.get("content", "") or "" print(f" content: {text[:200]!r}") print(f" tool_calls count: {len(tool_calls)}") if tool_calls: tc = tool_calls[0] print(f" tool_call[0]: {json.dumps(tc)[:400]}") return bool(tool_calls) async def main() -> int: if not API_KEY: print("ERROR: OPENROUTER_API_KEY not set", file=sys.stderr) return 2 results = {} for model in (PRIMARY, FALLBACK): results[(model, "basic")] = await test_basic(model) results[(model, "reasoning")] = await test_reasoning(model) results[(model, "tools")] = await test_tools(model) section("SUMMARY") for (model, name), ok in results.items(): mark = "PASS" if ok else "FAIL" print(f" [{mark}] {model:42s} {name}") all_critical = all([ results.get((PRIMARY, "basic"), False), results.get((PRIMARY, "reasoning"), False) or results.get((FALLBACK, "reasoning"), False), results.get((PRIMARY, "tools"), False) or results.get((FALLBACK, "tools"), False), ]) print() print("Overall:", "OK to proceed" if all_critical else "BLOCKED — adjust spec before building") return 0 if all_critical else 1 if __name__ == "__main__": sys.exit(asyncio.run(main()))