Spaces:

kredd25
/

FlutIQ

Sleeping

kredd25 commited on May 3

Commit

2d86174

1 Parent(s): da8d7b2

smoke test: verify Gemma 4 vision on OpenRouter free tier (BYOK)

Both google/gemma-4-31b-it:free and google/gemma-4-26b-a4b-it:free
support multimodal input through the OpenAI-compatible image_url
content-part. Cost stays at $0 with BYOK.

Critically, the model declines to fabricate flood-risk indicators
when given an irrelevant image (test used a Picsum patio shot;
Gemma returned indicators=[] with high confidence and an honest
'insufficient data' summary). That's the property we need before
building a Street View agent on top.

Image-fetch pattern in the smoke test is the production pattern:
download server-side with httpx, base64-encode, send inline as a
data: URL. Avoids upstream-fetcher 404s/UA-blocks (Wikimedia bit
us when we tried passing a public URL directly).

Unblocks the Street View flood-indicator agent from the Gemma 4
depth review.

Files changed (1) hide show

scripts/smoke_test_vision.py +212 -0

scripts/smoke_test_vision.py ADDED Viewed

	@@ -0,0 +1,212 @@

+"""
+Smoke test: does Gemma 4 vision work on OpenRouter free tier?
+If this passes we can build a Street View flood-indicator agent.
+If it fails (or returns garbage), we either need paid OpenRouter or
+a Google AI Studio direct-call code path.
+Run:
+    cd backend && set -a && source .env && set +a && \\
+    PYTHONPATH=. .venv/bin/python scripts/smoke_test_vision.py
+"""
+import asyncio
+import base64
+import json
+import os
+import sys
+import httpx
+API_KEY = os.environ.get("OPENROUTER_API_KEY", "").strip()
+BASE = "https://openrouter.ai/api/v1/chat/completions"
+PRIMARY = "google/gemma-4-31b-it:free"
+FALLBACK = "google/gemma-4-26b-a4b-it:free"
+HEADERS = {
+    "Authorization": f"Bearer {API_KEY}",
+    "Content-Type": "application/json",
+    "HTTP-Referer": "https://flutiq.pages.dev",
+    "X-Title": "FlutIQ vision smoke test",
+}
+# Hotlink-friendly source. Picsum gives us a real photograph (random
+# subject) at our requested size — fine for "does vision work at all".
+# We test flood-indicator extraction separately with a known building
+# image.
+TEST_IMAGE_URL = "https://picsum.photos/seed/flutiq-vision-test/640/480"
+def section(title: str) -> None:
+    print("\n" + "=" * 70)
+    print(title)
+    print("=" * 70)
+async def fetch_image_as_data_url(url: str) -> str:
+    """Download an image and return it as a data: URL (base64-encoded).
+    Mirrors the production pattern: we'll fetch Google Street View
+    images server-side, then send them inline to Gemma 4. This is
+    more reliable than passing a public URL (which the upstream
+    fetcher may not be able to reach due to User-Agent rules).
+    """
+    # Realistic UA — Wikimedia in particular rejects clients without one.
+    headers = {"User-Agent": "FlutIQ/1.0 (smoke test)"}
+    async with httpx.AsyncClient(timeout=30, headers=headers, follow_redirects=True) as client:
+        resp = await client.get(url)
+    resp.raise_for_status()
+    ct = resp.headers.get("content-type", "image/jpeg").split(";")[0].strip()
+    b64 = base64.b64encode(resp.content).decode("ascii")
+    return f"data:{ct};base64,{b64}"
+async def call(model: str, prompt: str, image_url: str) -> dict:
+    payload = {
+        "model": model,
+        "messages": [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": image_url}},
+            ],
+        }],
+        "max_tokens": 1024,
+        "temperature": 0.2,
+    }
+    async with httpx.AsyncClient(timeout=120) as client:
+        resp = await client.post(BASE, headers=HEADERS, json=payload)
+    print(f"  HTTP {resp.status_code}")
+    if resp.status_code != 200:
+        print(f"  body (first 800 chars): {resp.text[:800]}")
+        return {}
+    return resp.json()
+async def test_describe(model: str, image_url: str) -> bool:
+    section(f"TEST 1 — basic image describe ({model})")
+    data = await call(
+        model,
+        "What kind of building is shown in this photo? Answer in one sentence.",
+        image_url,
+    )
+    if not data:
+        return False
+    msg = data.get("choices", [{}])[0].get("message", {})
+    text = msg.get("content", "") or ""
+    print(f"  response: {text[:400]!r}")
+    print(f"  usage: {data.get('usage', {})}")
+    return bool(text and len(text.strip()) > 5)
+async def test_flood_indicators(model: str, image_url: str) -> bool:
+    section(f"TEST 2 — flood-risk indicator extraction ({model})")
+    prompt = """You are a flood risk surveyor analyzing a street-level photo of a residential building.
+Identify visible flood risk indicators. For each one you find, note its
+location in the image and what it implies about flood vulnerability.
+Look specifically for:
+- Basement-level windows (vulnerable to surface flooding)
+- Ground-floor HVAC units, water heaters, electrical panels
+- Below-grade entries or stairwells
+- Visible drainage infrastructure (downspouts, storm drains, swales)
+- Evidence of prior water damage (staining, repairs)
+- Property elevation relative to street grade
+- Proximity to obvious water features
+Return ONLY a JSON object with this shape:
+{
+  "indicators": [
+    {"feature": "<short name>", "location": "<where in image>", "risk_implication": "<1 sentence>"}
+  ],
+  "overall_visual_risk": "low" | "moderate" | "high",
+  "confidence": "low" | "medium" | "high",
+  "summary": "<1 sentence>"
+}"""
+    data = await call(model, prompt, image_url)
+    if not data:
+        return False
+    msg = data.get("choices", [{}])[0].get("message", {})
+    text = msg.get("content", "") or ""
+    print(f"  raw response (first 800 chars): {text[:800]}")
+    # Try to parse JSON
+    clean = text.strip()
+    if clean.startswith("```"):
+        clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
+        if clean.endswith("```"):
+            clean = clean.rsplit("```", 1)[0]
+        clean = clean.strip()
+        if clean.startswith("json"):
+            clean = clean[4:].strip()
+    try:
+        parsed = json.loads(clean)
+        print()
+        print("  PARSED JSON:")
+        print(json.dumps(parsed, indent=4))
+        return True
+    except json.JSONDecodeError as e:
+        print(f"  JSON parse failed: {e}")
+        return False
+async def main() -> int:
+    if not API_KEY:
+        print("ERROR: OPENROUTER_API_KEY not set", file=sys.stderr)
+        return 2
+    print(f"Fetching test image: {TEST_IMAGE_URL}")
+    try:
+        image = await fetch_image_as_data_url(TEST_IMAGE_URL)
+        decoded_bytes = (len(image) - len(image.split(",")[0]) - 1) * 3 // 4
+        print(f"  → got {len(image)} chars of data URL ({decoded_bytes} bytes raw)")
+    except Exception as e:
+        print(f"  failed to fetch test image: {e}", file=sys.stderr)
+        return 2
+    results = {}
+    for model in (PRIMARY, FALLBACK):
+        try:
+            results[(model, "describe")] = await test_describe(model, image)
+        except Exception as e:
+            print(f"  EXCEPTION: {type(e).__name__}: {e}")
+            results[(model, "describe")] = False
+        await asyncio.sleep(2)  # be polite to free tier
+        try:
+            results[(model, "flood_indicators")] = await test_flood_indicators(model, image)
+        except Exception as e:
+            print(f"  EXCEPTION: {type(e).__name__}: {e}")
+            results[(model, "flood_indicators")] = False
+        await asyncio.sleep(2)
+    section("SUMMARY")
+    for (model, name), ok in results.items():
+        mark = "PASS" if ok else "FAIL"
+        print(f"  [{mark}] {model:42s} {name}")
+    any_describe_passed = any(
+        results.get((m, "describe"), False) for m in (PRIMARY, FALLBACK)
+    )
+    any_flood_passed = any(
+        results.get((m, "flood_indicators"), False) for m in (PRIMARY, FALLBACK)
+    )
+    print()
+    if any_describe_passed and any_flood_passed:
+        print("VERDICT: Vision works on free tier. Street View agent is buildable.")
+        return 0
+    elif any_describe_passed:
+        print("VERDICT: Vision works for description but JSON-mode flood-indicator")
+        print("         extraction is unreliable. Buildable but with retry logic.")
+        return 0
+    else:
+        print("VERDICT: Vision does NOT work on this free-tier route.")
+        print("         Need either paid OpenRouter or Google AI Studio direct.")
+        return 1
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))