FlutIQ / scripts /smoke_test.py
kredd25's picture
rename FloodIQ β†’ FlutIQ across the codebase
f632c81
"""
Smoke test: confirm Gemma 4 free tier on OpenRouter supports
the two features FlutIQ depends on:
1. reasoning mode (risk-analyst agent)
2. OpenAI-format tool calling (data agents)
Run:
cd backend && set -a && source .env && set +a && .venv/bin/python scripts/smoke_test.py
"""
import asyncio
import json
import os
import sys
import httpx
API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
BASE = "https://openrouter.ai/api/v1/chat/completions"
PRIMARY = "google/gemma-4-31b-it:free"
FALLBACK = "google/gemma-4-26b-a4b-it:free"
HEADERS = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://flutiq.pages.dev",
"X-Title": "FlutIQ smoke test",
}
def section(title: str) -> None:
print("\n" + "=" * 70)
print(title)
print("=" * 70)
async def call(payload: dict) -> dict:
async with httpx.AsyncClient(timeout=120) as client:
resp = await client.post(BASE, headers=HEADERS, json=payload)
print(f" HTTP {resp.status_code}")
if resp.status_code != 200:
print(f" body: {resp.text[:600]}")
return {}
return resp.json()
async def test_basic(model: str) -> bool:
section(f"TEST 1 β€” basic completion ({model})")
data = await call({
"model": model,
"messages": [{"role": "user", "content": "Say only: pong"}],
"max_tokens": 16,
"temperature": 0,
})
if not data:
return False
text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
print(f" response: {text!r}")
return bool(text)
async def test_reasoning(model: str) -> bool:
section(f"TEST 2 β€” reasoning mode ({model})")
data = await call({
"model": model,
"messages": [{
"role": "user",
"content": (
"If the annual exceedance probability of a flood is 0.01, "
"what is the probability of at least one flood in 30 years? "
"Show your work, then return only the final number."
),
}],
"reasoning": {"enabled": True},
"max_tokens": 1024,
"temperature": 0,
})
if not data:
return False
msg = data.get("choices", [{}])[0].get("message", {})
text = msg.get("content", "")
reasoning_details = msg.get("reasoning_details", [])
reasoning_field = msg.get("reasoning", "")
print(f" content (first 300 chars): {text[:300]!r}")
print(f" reasoning_details present: {bool(reasoning_details)} (len={len(reasoning_details)})")
print(f" reasoning field present: {bool(reasoning_field)} (len={len(reasoning_field) if isinstance(reasoning_field, str) else 'n/a'})")
if reasoning_details:
first = reasoning_details[0]
print(f" reasoning_details[0] keys: {list(first.keys()) if isinstance(first, dict) else type(first).__name__}")
sample = json.dumps(first)[:300] if isinstance(first, dict) else str(first)[:300]
print(f" reasoning_details[0] sample: {sample}")
elif reasoning_field:
sample = reasoning_field[:300] if isinstance(reasoning_field, str) else str(reasoning_field)[:300]
print(f" reasoning sample: {sample!r}")
print(f" usage: {data.get('usage', {})}")
return bool(reasoning_details or reasoning_field)
async def test_tools(model: str) -> bool:
section(f"TEST 3 β€” function calling ({model})")
tools = [{
"type": "function",
"function": {
"name": "lookup_fema_flood_zone",
"description": "Look up FEMA flood zone for coordinates.",
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
},
},
}]
data = await call({
"model": model,
"messages": [{
"role": "user",
"content": "What is the FEMA flood zone for 41.8087, -87.6062?",
}],
"tools": tools,
"tool_choice": "auto",
"max_tokens": 512,
"temperature": 0,
})
if not data:
return False
msg = data.get("choices", [{}])[0].get("message", {})
tool_calls = msg.get("tool_calls", []) or []
text = msg.get("content", "") or ""
print(f" content: {text[:200]!r}")
print(f" tool_calls count: {len(tool_calls)}")
if tool_calls:
tc = tool_calls[0]
print(f" tool_call[0]: {json.dumps(tc)[:400]}")
return bool(tool_calls)
async def main() -> int:
if not API_KEY:
print("ERROR: OPENROUTER_API_KEY not set", file=sys.stderr)
return 2
results = {}
for model in (PRIMARY, FALLBACK):
results[(model, "basic")] = await test_basic(model)
results[(model, "reasoning")] = await test_reasoning(model)
results[(model, "tools")] = await test_tools(model)
section("SUMMARY")
for (model, name), ok in results.items():
mark = "PASS" if ok else "FAIL"
print(f" [{mark}] {model:42s} {name}")
all_critical = all([
results.get((PRIMARY, "basic"), False),
results.get((PRIMARY, "reasoning"), False) or results.get((FALLBACK, "reasoning"), False),
results.get((PRIMARY, "tools"), False) or results.get((FALLBACK, "tools"), False),
])
print()
print("Overall:", "OK to proceed" if all_critical else "BLOCKED β€” adjust spec before building")
return 0 if all_critical else 1
if __name__ == "__main__":
sys.exit(asyncio.run(main()))