smoke test: verify Gemma 4 vision on OpenRouter free tier (BYOK)
Browse filesBoth google/gemma-4-31b-it:free and google/gemma-4-26b-a4b-it:free
support multimodal input through the OpenAI-compatible image_url
content-part. Cost stays at $0 with BYOK.
Critically, the model declines to fabricate flood-risk indicators
when given an irrelevant image (test used a Picsum patio shot;
Gemma returned indicators=[] with high confidence and an honest
'insufficient data' summary). That's the property we need before
building a Street View agent on top.
Image-fetch pattern in the smoke test is the production pattern:
download server-side with httpx, base64-encode, send inline as a
data: URL. Avoids upstream-fetcher 404s/UA-blocks (Wikimedia bit
us when we tried passing a public URL directly).
Unblocks the Street View flood-indicator agent from the Gemma 4
depth review.
- scripts/smoke_test_vision.py +212 -0
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smoke test: does Gemma 4 vision work on OpenRouter free tier?
|
| 3 |
+
|
| 4 |
+
If this passes we can build a Street View flood-indicator agent.
|
| 5 |
+
If it fails (or returns garbage), we either need paid OpenRouter or
|
| 6 |
+
a Google AI Studio direct-call code path.
|
| 7 |
+
|
| 8 |
+
Run:
|
| 9 |
+
cd backend && set -a && source .env && set +a && \\
|
| 10 |
+
PYTHONPATH=. .venv/bin/python scripts/smoke_test_vision.py
|
| 11 |
+
"""
|
| 12 |
+
import asyncio
|
| 13 |
+
import base64
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
|
| 18 |
+
import httpx
|
| 19 |
+
|
| 20 |
+
API_KEY = os.environ.get("OPENROUTER_API_KEY", "").strip()
|
| 21 |
+
BASE = "https://openrouter.ai/api/v1/chat/completions"
|
| 22 |
+
PRIMARY = "google/gemma-4-31b-it:free"
|
| 23 |
+
FALLBACK = "google/gemma-4-26b-a4b-it:free"
|
| 24 |
+
|
| 25 |
+
HEADERS = {
|
| 26 |
+
"Authorization": f"Bearer {API_KEY}",
|
| 27 |
+
"Content-Type": "application/json",
|
| 28 |
+
"HTTP-Referer": "https://flutiq.pages.dev",
|
| 29 |
+
"X-Title": "FlutIQ vision smoke test",
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Hotlink-friendly source. Picsum gives us a real photograph (random
|
| 33 |
+
# subject) at our requested size β fine for "does vision work at all".
|
| 34 |
+
# We test flood-indicator extraction separately with a known building
|
| 35 |
+
# image.
|
| 36 |
+
TEST_IMAGE_URL = "https://picsum.photos/seed/flutiq-vision-test/640/480"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def section(title: str) -> None:
|
| 40 |
+
print("\n" + "=" * 70)
|
| 41 |
+
print(title)
|
| 42 |
+
print("=" * 70)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
async def fetch_image_as_data_url(url: str) -> str:
|
| 46 |
+
"""Download an image and return it as a data: URL (base64-encoded).
|
| 47 |
+
|
| 48 |
+
Mirrors the production pattern: we'll fetch Google Street View
|
| 49 |
+
images server-side, then send them inline to Gemma 4. This is
|
| 50 |
+
more reliable than passing a public URL (which the upstream
|
| 51 |
+
fetcher may not be able to reach due to User-Agent rules).
|
| 52 |
+
"""
|
| 53 |
+
# Realistic UA β Wikimedia in particular rejects clients without one.
|
| 54 |
+
headers = {"User-Agent": "FlutIQ/1.0 (smoke test)"}
|
| 55 |
+
async with httpx.AsyncClient(timeout=30, headers=headers, follow_redirects=True) as client:
|
| 56 |
+
resp = await client.get(url)
|
| 57 |
+
resp.raise_for_status()
|
| 58 |
+
ct = resp.headers.get("content-type", "image/jpeg").split(";")[0].strip()
|
| 59 |
+
b64 = base64.b64encode(resp.content).decode("ascii")
|
| 60 |
+
return f"data:{ct};base64,{b64}"
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
async def call(model: str, prompt: str, image_url: str) -> dict:
|
| 64 |
+
payload = {
|
| 65 |
+
"model": model,
|
| 66 |
+
"messages": [{
|
| 67 |
+
"role": "user",
|
| 68 |
+
"content": [
|
| 69 |
+
{"type": "text", "text": prompt},
|
| 70 |
+
{"type": "image_url", "image_url": {"url": image_url}},
|
| 71 |
+
],
|
| 72 |
+
}],
|
| 73 |
+
"max_tokens": 1024,
|
| 74 |
+
"temperature": 0.2,
|
| 75 |
+
}
|
| 76 |
+
async with httpx.AsyncClient(timeout=120) as client:
|
| 77 |
+
resp = await client.post(BASE, headers=HEADERS, json=payload)
|
| 78 |
+
print(f" HTTP {resp.status_code}")
|
| 79 |
+
if resp.status_code != 200:
|
| 80 |
+
print(f" body (first 800 chars): {resp.text[:800]}")
|
| 81 |
+
return {}
|
| 82 |
+
return resp.json()
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
async def test_describe(model: str, image_url: str) -> bool:
|
| 86 |
+
section(f"TEST 1 β basic image describe ({model})")
|
| 87 |
+
data = await call(
|
| 88 |
+
model,
|
| 89 |
+
"What kind of building is shown in this photo? Answer in one sentence.",
|
| 90 |
+
image_url,
|
| 91 |
+
)
|
| 92 |
+
if not data:
|
| 93 |
+
return False
|
| 94 |
+
msg = data.get("choices", [{}])[0].get("message", {})
|
| 95 |
+
text = msg.get("content", "") or ""
|
| 96 |
+
print(f" response: {text[:400]!r}")
|
| 97 |
+
print(f" usage: {data.get('usage', {})}")
|
| 98 |
+
return bool(text and len(text.strip()) > 5)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
async def test_flood_indicators(model: str, image_url: str) -> bool:
|
| 102 |
+
section(f"TEST 2 β flood-risk indicator extraction ({model})")
|
| 103 |
+
prompt = """You are a flood risk surveyor analyzing a street-level photo of a residential building.
|
| 104 |
+
|
| 105 |
+
Identify visible flood risk indicators. For each one you find, note its
|
| 106 |
+
location in the image and what it implies about flood vulnerability.
|
| 107 |
+
|
| 108 |
+
Look specifically for:
|
| 109 |
+
- Basement-level windows (vulnerable to surface flooding)
|
| 110 |
+
- Ground-floor HVAC units, water heaters, electrical panels
|
| 111 |
+
- Below-grade entries or stairwells
|
| 112 |
+
- Visible drainage infrastructure (downspouts, storm drains, swales)
|
| 113 |
+
- Evidence of prior water damage (staining, repairs)
|
| 114 |
+
- Property elevation relative to street grade
|
| 115 |
+
- Proximity to obvious water features
|
| 116 |
+
|
| 117 |
+
Return ONLY a JSON object with this shape:
|
| 118 |
+
{
|
| 119 |
+
"indicators": [
|
| 120 |
+
{"feature": "<short name>", "location": "<where in image>", "risk_implication": "<1 sentence>"}
|
| 121 |
+
],
|
| 122 |
+
"overall_visual_risk": "low" | "moderate" | "high",
|
| 123 |
+
"confidence": "low" | "medium" | "high",
|
| 124 |
+
"summary": "<1 sentence>"
|
| 125 |
+
}"""
|
| 126 |
+
data = await call(model, prompt, image_url)
|
| 127 |
+
if not data:
|
| 128 |
+
return False
|
| 129 |
+
msg = data.get("choices", [{}])[0].get("message", {})
|
| 130 |
+
text = msg.get("content", "") or ""
|
| 131 |
+
print(f" raw response (first 800 chars): {text[:800]}")
|
| 132 |
+
|
| 133 |
+
# Try to parse JSON
|
| 134 |
+
clean = text.strip()
|
| 135 |
+
if clean.startswith("```"):
|
| 136 |
+
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
|
| 137 |
+
if clean.endswith("```"):
|
| 138 |
+
clean = clean.rsplit("```", 1)[0]
|
| 139 |
+
clean = clean.strip()
|
| 140 |
+
if clean.startswith("json"):
|
| 141 |
+
clean = clean[4:].strip()
|
| 142 |
+
try:
|
| 143 |
+
parsed = json.loads(clean)
|
| 144 |
+
print()
|
| 145 |
+
print(" PARSED JSON:")
|
| 146 |
+
print(json.dumps(parsed, indent=4))
|
| 147 |
+
return True
|
| 148 |
+
except json.JSONDecodeError as e:
|
| 149 |
+
print(f" JSON parse failed: {e}")
|
| 150 |
+
return False
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
async def main() -> int:
|
| 154 |
+
if not API_KEY:
|
| 155 |
+
print("ERROR: OPENROUTER_API_KEY not set", file=sys.stderr)
|
| 156 |
+
return 2
|
| 157 |
+
|
| 158 |
+
print(f"Fetching test image: {TEST_IMAGE_URL}")
|
| 159 |
+
try:
|
| 160 |
+
image = await fetch_image_as_data_url(TEST_IMAGE_URL)
|
| 161 |
+
decoded_bytes = (len(image) - len(image.split(",")[0]) - 1) * 3 // 4
|
| 162 |
+
print(f" β got {len(image)} chars of data URL ({decoded_bytes} bytes raw)")
|
| 163 |
+
except Exception as e:
|
| 164 |
+
print(f" failed to fetch test image: {e}", file=sys.stderr)
|
| 165 |
+
return 2
|
| 166 |
+
|
| 167 |
+
results = {}
|
| 168 |
+
for model in (PRIMARY, FALLBACK):
|
| 169 |
+
try:
|
| 170 |
+
results[(model, "describe")] = await test_describe(model, image)
|
| 171 |
+
except Exception as e:
|
| 172 |
+
print(f" EXCEPTION: {type(e).__name__}: {e}")
|
| 173 |
+
results[(model, "describe")] = False
|
| 174 |
+
|
| 175 |
+
await asyncio.sleep(2) # be polite to free tier
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
results[(model, "flood_indicators")] = await test_flood_indicators(model, image)
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print(f" EXCEPTION: {type(e).__name__}: {e}")
|
| 181 |
+
results[(model, "flood_indicators")] = False
|
| 182 |
+
|
| 183 |
+
await asyncio.sleep(2)
|
| 184 |
+
|
| 185 |
+
section("SUMMARY")
|
| 186 |
+
for (model, name), ok in results.items():
|
| 187 |
+
mark = "PASS" if ok else "FAIL"
|
| 188 |
+
print(f" [{mark}] {model:42s} {name}")
|
| 189 |
+
|
| 190 |
+
any_describe_passed = any(
|
| 191 |
+
results.get((m, "describe"), False) for m in (PRIMARY, FALLBACK)
|
| 192 |
+
)
|
| 193 |
+
any_flood_passed = any(
|
| 194 |
+
results.get((m, "flood_indicators"), False) for m in (PRIMARY, FALLBACK)
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
print()
|
| 198 |
+
if any_describe_passed and any_flood_passed:
|
| 199 |
+
print("VERDICT: Vision works on free tier. Street View agent is buildable.")
|
| 200 |
+
return 0
|
| 201 |
+
elif any_describe_passed:
|
| 202 |
+
print("VERDICT: Vision works for description but JSON-mode flood-indicator")
|
| 203 |
+
print(" extraction is unreliable. Buildable but with retry logic.")
|
| 204 |
+
return 0
|
| 205 |
+
else:
|
| 206 |
+
print("VERDICT: Vision does NOT work on this free-tier route.")
|
| 207 |
+
print(" Need either paid OpenRouter or Google AI Studio direct.")
|
| 208 |
+
return 1
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
if __name__ == "__main__":
|
| 212 |
+
sys.exit(asyncio.run(main()))
|