File size: 5,632 Bytes
577ea9f
 
f632c81
577ea9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f632c81
 
577ea9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
Smoke test: confirm Gemma 4 free tier on OpenRouter supports
the two features FlutIQ depends on:
  1. reasoning mode (risk-analyst agent)
  2. OpenAI-format tool calling (data agents)

Run:
    cd backend && set -a && source .env && set +a && .venv/bin/python scripts/smoke_test.py
"""
import asyncio
import json
import os
import sys

import httpx

API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
BASE = "https://openrouter.ai/api/v1/chat/completions"
PRIMARY = "google/gemma-4-31b-it:free"
FALLBACK = "google/gemma-4-26b-a4b-it:free"

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json",
    "HTTP-Referer": "https://flutiq.pages.dev",
    "X-Title": "FlutIQ smoke test",
}


def section(title: str) -> None:
    print("\n" + "=" * 70)
    print(title)
    print("=" * 70)


async def call(payload: dict) -> dict:
    async with httpx.AsyncClient(timeout=120) as client:
        resp = await client.post(BASE, headers=HEADERS, json=payload)
    print(f"  HTTP {resp.status_code}")
    if resp.status_code != 200:
        print(f"  body: {resp.text[:600]}")
        return {}
    return resp.json()


async def test_basic(model: str) -> bool:
    section(f"TEST 1 — basic completion ({model})")
    data = await call({
        "model": model,
        "messages": [{"role": "user", "content": "Say only: pong"}],
        "max_tokens": 16,
        "temperature": 0,
    })
    if not data:
        return False
    text = data.get("choices", [{}])[0].get("message", {}).get("content", "")
    print(f"  response: {text!r}")
    return bool(text)


async def test_reasoning(model: str) -> bool:
    section(f"TEST 2 — reasoning mode ({model})")
    data = await call({
        "model": model,
        "messages": [{
            "role": "user",
            "content": (
                "If the annual exceedance probability of a flood is 0.01, "
                "what is the probability of at least one flood in 30 years? "
                "Show your work, then return only the final number."
            ),
        }],
        "reasoning": {"enabled": True},
        "max_tokens": 1024,
        "temperature": 0,
    })
    if not data:
        return False
    msg = data.get("choices", [{}])[0].get("message", {})
    text = msg.get("content", "")
    reasoning_details = msg.get("reasoning_details", [])
    reasoning_field = msg.get("reasoning", "")
    print(f"  content (first 300 chars): {text[:300]!r}")
    print(f"  reasoning_details present: {bool(reasoning_details)} (len={len(reasoning_details)})")
    print(f"  reasoning field present: {bool(reasoning_field)} (len={len(reasoning_field) if isinstance(reasoning_field, str) else 'n/a'})")
    if reasoning_details:
        first = reasoning_details[0]
        print(f"  reasoning_details[0] keys: {list(first.keys()) if isinstance(first, dict) else type(first).__name__}")
        sample = json.dumps(first)[:300] if isinstance(first, dict) else str(first)[:300]
        print(f"  reasoning_details[0] sample: {sample}")
    elif reasoning_field:
        sample = reasoning_field[:300] if isinstance(reasoning_field, str) else str(reasoning_field)[:300]
        print(f"  reasoning sample: {sample!r}")
    print(f"  usage: {data.get('usage', {})}")
    return bool(reasoning_details or reasoning_field)


async def test_tools(model: str) -> bool:
    section(f"TEST 3 — function calling ({model})")
    tools = [{
        "type": "function",
        "function": {
            "name": "lookup_fema_flood_zone",
            "description": "Look up FEMA flood zone for coordinates.",
            "parameters": {
                "type": "object",
                "properties": {
                    "latitude": {"type": "number"},
                    "longitude": {"type": "number"},
                },
                "required": ["latitude", "longitude"],
            },
        },
    }]
    data = await call({
        "model": model,
        "messages": [{
            "role": "user",
            "content": "What is the FEMA flood zone for 41.8087, -87.6062?",
        }],
        "tools": tools,
        "tool_choice": "auto",
        "max_tokens": 512,
        "temperature": 0,
    })
    if not data:
        return False
    msg = data.get("choices", [{}])[0].get("message", {})
    tool_calls = msg.get("tool_calls", []) or []
    text = msg.get("content", "") or ""
    print(f"  content: {text[:200]!r}")
    print(f"  tool_calls count: {len(tool_calls)}")
    if tool_calls:
        tc = tool_calls[0]
        print(f"  tool_call[0]: {json.dumps(tc)[:400]}")
    return bool(tool_calls)


async def main() -> int:
    if not API_KEY:
        print("ERROR: OPENROUTER_API_KEY not set", file=sys.stderr)
        return 2

    results = {}
    for model in (PRIMARY, FALLBACK):
        results[(model, "basic")] = await test_basic(model)
        results[(model, "reasoning")] = await test_reasoning(model)
        results[(model, "tools")] = await test_tools(model)

    section("SUMMARY")
    for (model, name), ok in results.items():
        mark = "PASS" if ok else "FAIL"
        print(f"  [{mark}] {model:42s} {name}")

    all_critical = all([
        results.get((PRIMARY, "basic"), False),
        results.get((PRIMARY, "reasoning"), False) or results.get((FALLBACK, "reasoning"), False),
        results.get((PRIMARY, "tools"), False) or results.get((FALLBACK, "tools"), False),
    ])
    print()
    print("Overall:", "OK to proceed" if all_critical else "BLOCKED — adjust spec before building")
    return 0 if all_critical else 1


if __name__ == "__main__":
    sys.exit(asyncio.run(main()))