"""MicroLens HuggingFace Space โ Kaggle Gemma 4 Good Hackathon submission.
Layout:
- Header: italic-serif logo + ONLINE green + Kaggle/Gemma 4 hackathon meta line
- Toolbar: 3 mode buttons (MICROSCOPE / UPLOAD / SAMPLES) + 4 tool buttons
(CIRCLE/SQUARE segmented + GRID + CROSS)
- Main row: viewport (square/circle, optional grid+cross overlays) + right
control panel (mode-dependent: 5 categories ร 6 thumbs / upload zone /
camera enumeration)
- AI ANALYZE long oval cyanโred gradient button
- 3 result panels: UNTRAINED BASELINE / MICROLENS V2 BRIEF / MICROLENS V3 RICH
- Translate row with 28 languages (English default) + ORIGINAL button after
translation
- Footer with run-locally + APK + Legal links
SAMPLES tab uses cached answers from catalog.json (vanilla + v2 + v3 for all 30).
UPLOAD / MICROSCOPE tabs run LIVE inference against per-model backend URLs:
URL_VANILLA (default http://127.0.0.1:8085/v1/chat/completions)
URL_V2 (default http://127.0.0.1:8084/v1/chat/completions)
URL_V3 (default http://127.0.0.1:8083/v1/chat/completions)
On HF Space deployment configure these as Variables to point at a public tunnel
(e.g. Cloudflare โ llama-server). When unreachable the panel shows a clean
"backend unavailable" message instead of crashing.
"""
from __future__ import annotations
import base64
import json
import mimetypes
import os
import urllib.error
import urllib.request
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import gradio as gr
from PIL import Image
ROOT = Path(__file__).parent
EXAMPLES_DIR = ROOT / "examples"
CATALOG_PATH = ROOT / "catalog.json"
CATEGORIES: List[Tuple[str, str]] = [
("diatom", "DIATOMS"),
("freshwater_zooplankton", "FRESHWATER"),
("marine_zooplankton", "MARINE"),
("fungal_spore", "FUNGAL SPORES"),
("fish", "FISH"),
]
CAT_LABELS = [lbl for _, lbl in CATEGORIES]
CAT_BY_LABEL = {lbl: cid for cid, lbl in CATEGORIES}
LANGUAGES: List[Tuple[str, str, str]] = sorted([
("๐ธ๐ฆ", "Arabic", "ar"),
("๐ง๐ฉ", "Bengali", "bn"),
("๐จ๐ณ", "Chinese", "zh"),
("๐จ๐ฟ", "Czech", "cs"),
("๐ฉ๐ฐ", "Danish", "da"),
("๐ณ๐ฑ", "Dutch", "nl"),
("๐ฌ๐ง", "English", "en"),
("๐ซ๐ท", "French", "fr"),
("๐ฉ๐ช", "German", "de"),
("๐ฌ๐ท", "Greek", "el"),
("๐ฎ๐ณ", "Hindi", "hi"),
("๐ญ๐บ", "Hungarian", "hu"),
("๐ฎ๐ฉ", "Indonesian", "id"),
("๐ฎ๐น", "Italian", "it"),
("๐ฏ๐ต", "Japanese", "ja"),
("๐ฐ๐ท", "Korean", "ko"),
("๐ฒ๐พ", "Malay", "ms"),
("๐ณ๐ด", "Norwegian", "no"),
("๐ต๐ฑ", "Polish", "pl"),
("๐ต๐น", "Portuguese", "pt"),
("๐ท๐ด", "Romanian", "ro"),
("๐ท๐บ", "Russian", "ru"),
("๐ช๐ธ", "Spanish", "es"),
("๐ฐ๐ช", "Swahili", "sw"),
("๐ธ๐ช", "Swedish", "sv"),
("๐น๐ญ", "Thai", "th"),
("๐น๐ท", "Turkish", "tr"),
("๐บ๐ฆ", "Ukrainian", "uk"),
("๐ป๐ณ", "Vietnamese", "vi"),
], key=lambda x: x[1])
LANG_DISPLAY = [f"{flag} {name}" for flag, name, _ in LANGUAGES]
LANG_BY_DISPLAY = {f"{flag} {name}": code for flag, name, code in LANGUAGES}
DEFAULT_LANG_DISPLAY = "๐ฌ๐ง English"
CATALOG: List[Dict] = json.loads(CATALOG_PATH.read_text())
BY_FILENAME = {s["filename"]: s for s in CATALOG}
URL_VANILLA = os.environ.get("URL_VANILLA", "http://127.0.0.1:8085/v1/chat/completions")
URL_V2 = os.environ.get("URL_V2", "http://127.0.0.1:8084/v1/chat/completions")
URL_V3 = os.environ.get("URL_V3", "http://127.0.0.1:8083/v1/chat/completions")
INFERENCE_PROMPT = "What is shown in this microscope image?"
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# ZeroGPU runtime: when running on HF Space we replace HTTP llama-server calls
# with in-process transformers + PEFT multi-adapter inference on H200.
# Outside HF Space (local dev) the original HTTP path is preserved.
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
IS_HF_SPACE = bool(os.environ.get("SPACE_ID"))
_HF_BASE = "unsloth/gemma-4-E2B-it"
_HF_LORA_REPO = "Laborator/microlens-gemma4-e2b"
_zerogpu_processor = None
_zerogpu_model = None
if IS_HF_SPACE:
import spaces
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText
from peft import PeftModel
print("[ZeroGPU] loading processor + base model on cudaโฆ", flush=True)
_zerogpu_processor = AutoProcessor.from_pretrained(_HF_BASE)
_zerogpu_model = AutoModelForImageTextToText.from_pretrained(
_HF_BASE, torch_dtype=torch.bfloat16, device_map="cuda",
)
# PEFT 0.19 cannot hook transformers' Gemma4ClippableLinear (vision tower
# wrapper around nn.Linear with opt-in clamping). The clamp thresholds
# default to ยฑinf so the wrapper is a behavioral no-op โ replace each
# occurrence with its inner .linear so PEFT sees a plain nn.Linear.
def _unwrap_clippable(module):
from torch import nn
for name, child in list(module.named_children()):
if type(child).__name__ == "Gemma4ClippableLinear" and isinstance(
getattr(child, "linear", None), nn.Linear
):
if getattr(child, "use_clipped_linears", False):
print(f"[ZeroGPU] WARN: clipped-linears active on {name}; "
"unwrapping anyway (thresholds are ยฑinf = no-op)", flush=True)
setattr(module, name, child.linear)
else:
_unwrap_clippable(child)
_unwrap_clippable(_zerogpu_model)
print("[ZeroGPU] attaching v2 LoRAโฆ", flush=True)
_zerogpu_model = PeftModel.from_pretrained(
_zerogpu_model, _HF_LORA_REPO, subfolder="lora/v2", adapter_name="v2",
)
print("[ZeroGPU] attaching v3 LoRAโฆ", flush=True)
_zerogpu_model.load_adapter(
_HF_LORA_REPO, subfolder="lora/v3", adapter_name="v3",
)
_zerogpu_model.eval()
print("[ZeroGPU] ready (vanilla / v2 / v3 share one base, swap adapters)", flush=True)
# โโ Batch path: run vanilla + v2 + v3 in a SINGLE GPU acquisition.
# duration=60: vanilla can ramble for 20+s on long answers; v2+v3 add
# another 15s. 60s budget guarantees all 3 finish without "GPU task
# aborted". Anon (2min/day) gets 2 clicks; free (3.5min) ~3; PRO (25min) ~25.
@spaces.GPU(duration=60)
def _zerogpu_infer_all(image_data_uri: str, prompt: str):
import time as _t
t_total = _t.time()
print(f"[infer-all] start cuda={torch.cuda.is_available()}", flush=True)
b64 = _strip_data_uri(image_data_uri) if image_data_uri.startswith("data:") else image_data_uri
img = Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")
if max(img.size) > 768:
img.thumbnail((768, 768))
messages = [{"role": "user", "content": [
{"type": "image", "image": img},
{"type": "text", "text": prompt},
]}]
inputs = _zerogpu_processor.apply_chat_template(
messages, add_generation_prompt=True, tokenize=True,
return_dict=True, return_tensors="pt",
)
inputs = {k: (v.to(_zerogpu_model.device, dtype=torch.bfloat16) if v.is_floating_point()
else v.to(_zerogpu_model.device))
for k, v in inputs.items()}
prompt_len = inputs["input_ids"].shape[1]
results = {}
for version in ("vanilla", "v2", "v3"):
t0 = _t.time()
if version == "vanilla":
_zerogpu_model.disable_adapter_layers()
# Vanilla rambles up to 1400+ chars on a microscope image which
# blows the 60s ZeroGPU budget; cap it tighter.
_max_tok = 256
else:
_zerogpu_model.enable_adapter_layers()
_zerogpu_model.set_adapter(version)
_max_tok = 512
with torch.inference_mode():
out = _zerogpu_model.generate(
**inputs, max_new_tokens=_max_tok, do_sample=False,
)
gen_ids = out[0][prompt_len:]
text = _zerogpu_processor.decode(gen_ids, skip_special_tokens=True).strip()
results[version] = text
print(f"[infer-all] {version} t+{_t.time()-t0:.2f}s len={len(text)}", flush=True)
print(f"[infer-all] DONE total t+{_t.time()-t_total:.2f}s", flush=True)
return results
# โโ Single-version path (legacy / local fallback). Still used when llama_server_call
# is called outside the do_analyze HF-Space short-circuit (e.g. potential future paths).
@spaces.GPU(duration=25)
def _zerogpu_infer(version: str, image_data_uri: str, prompt: str) -> str:
import time as _t
t0 = _t.time()
print(f"[infer] version={version} cuda={torch.cuda.is_available()} "
f"dev={torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'cpu'}",
flush=True)
b64 = _strip_data_uri(image_data_uri) if image_data_uri.startswith("data:") else image_data_uri
img = Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")
if max(img.size) > 768:
img.thumbnail((768, 768))
print(f"[infer] image {img.size}", flush=True)
if version == "vanilla":
_zerogpu_model.disable_adapter_layers()
else:
_zerogpu_model.enable_adapter_layers()
_zerogpu_model.set_adapter(version)
messages = [{"role": "user", "content": [
{"type": "image", "image": img},
{"type": "text", "text": prompt},
]}]
inputs = _zerogpu_processor.apply_chat_template(
messages, add_generation_prompt=True, tokenize=True,
return_dict=True, return_tensors="pt",
)
# Move to model device; only float tensors get bfloat16 cast.
inputs = {k: (v.to(_zerogpu_model.device, dtype=torch.bfloat16) if v.is_floating_point()
else v.to(_zerogpu_model.device))
for k, v in inputs.items()}
print(f"[infer] inputs ready, t+{_t.time()-t0:.2f}s, generatingโฆ", flush=True)
with torch.inference_mode():
out = _zerogpu_model.generate(
**inputs, max_new_tokens=512, do_sample=False,
)
prompt_len = inputs["input_ids"].shape[1]
gen_ids = out[0][prompt_len:]
text = _zerogpu_processor.decode(gen_ids, skip_special_tokens=True)
print(f"[infer] DONE t+{_t.time()-t0:.2f}s, gen_tokens={gen_ids.shape[0]}, "
f"text_len={len(text)}, preview={text[:80]!r}", flush=True)
return text.strip()
_URL_TO_KIND = {URL_VANILLA: "vanilla", URL_V2: "v2", URL_V3: "v3"}
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# QR codes for the footer install card. Generated once at module load.
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
APK_URL = "https://huggingface.co/Laborator/microlens-gemma4-e2b/resolve/main/android/microlens-android-v1.0.0.apk"
GITHUB_URL = "https://github.com/SergheiBrinza/microlens"
def _qr_data_uri(data: str, dark: str = "#FFFFFF", light: str = "#000000",
alpha: float = 1.0) -> str:
try:
import qrcode
qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_M,
box_size=10, border=2)
qr.add_data(data)
qr.make(fit=True)
img = qr.make_image(fill_color=dark, back_color=light).convert("RGBA")
if alpha < 1.0:
r, g, b, a = img.split()
a = a.point(lambda x: int(x * alpha))
img = Image.merge("RGBA", (r, g, b, a))
buf = BytesIO()
img.save(buf, "PNG")
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
except Exception as e:
print(f"[qr] failed for {data[:40]}: {e}", flush=True)
return ""
QR_ANDROID = _qr_data_uri(APK_URL, dark="#FFFFFF", light="#000000")
QR_IOS = _qr_data_uri(GITHUB_URL, dark="#666666", light="#0a0a0a")
def _data_uri(path: Path) -> str:
if not path.exists():
return ""
mime, _ = mimetypes.guess_type(str(path))
return f"data:{mime or 'image/png'};base64,{base64.b64encode(path.read_bytes()).decode()}"
_THUMB_CACHE: Dict[str, str] = {}
def thumb_uri(filename: str) -> str:
if filename not in _THUMB_CACHE:
p = EXAMPLES_DIR / filename
if p.exists():
img = Image.open(p).convert("RGB")
img.thumbnail((300, 300))
buf = BytesIO()
img.save(buf, "JPEG", quality=86)
_THUMB_CACHE[filename] = "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode()
else:
_THUMB_CACHE[filename] = ""
return _THUMB_CACHE[filename]
_FULL_CACHE: Dict[str, str] = {}
def full_uri(filename: str) -> str:
if filename not in _FULL_CACHE:
_FULL_CACHE[filename] = _data_uri(EXAMPLES_DIR / filename)
return _FULL_CACHE[filename]
SHAPE_CIRCLE = "circle"
SHAPE_SQUARE = "square"
MODE_SAMPLES = "samples"
MODE_UPLOAD = "upload"
MODE_MICRO = "micro"
ACCENT_RED = "#FF1744"
ACCENT_CYAN = "#7FE8E3"
ACCENT_GOLD = "#D4AF37"
def _strip_data_uri(data_uri: str) -> str:
if data_uri.startswith("data:"):
comma = data_uri.find(",")
if comma >= 0:
return data_uri[comma + 1:]
return data_uri
def llama_server_call(url: str, image_data_uri: str,
prompt: str = INFERENCE_PROMPT,
timeout: int = 180) -> Tuple[str, Optional[str]]:
"""Returns (text, error_or_None).
On HF Space: routes to in-process ZeroGPU inference (transformers + PEFT).
Locally: OpenAI-compatible call to llama-server (original behavior)."""
if IS_HF_SPACE:
kind = _URL_TO_KIND.get(url, "vanilla")
try:
return _zerogpu_infer(kind, image_data_uri, prompt), None
except Exception as e:
return "", f"{type(e).__name__}: {str(e)[:240]}"
payload = {
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_data_uri}},
],
}],
"max_tokens": 600,
"temperature": 0.2,
}
req = urllib.request.Request(
url,
data=json.dumps(payload).encode("utf-8"),
headers={"Content-Type": "application/json",
"User-Agent": "MicroLens-Space/1.0"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode("utf-8"))
text = (data.get("choices", [{}])[0].get("message", {}) or {}).get("content", "")
return (text or "").strip(), None
except urllib.error.HTTPError as e:
body = ""
try: body = e.read().decode("utf-8", errors="replace")
except Exception: pass
return "", f"HTTP {e.code}: {body[:240]}"
except Exception as e:
return "", f"{type(e).__name__}: {str(e)[:240]}"
def matte_btn_style(active: bool, *, padding: str = "16px 20px",
radius: str = "18px", min_w: str = "100px",
font_size: str = "12px") -> str:
if active:
return (f"cursor:pointer; padding:{padding}; border-radius:{radius};"
f" background: linear-gradient(180deg, rgba(255,23,68,.18) 0%, rgba(255,23,68,.06) 100%);"
f" border: 1.5px solid {ACCENT_RED};"
f" color: #fff; font-weight: 800; letter-spacing: 3px;"
f" font-size: {font_size}; font-family: 'Space Grotesk', sans-serif;"
f" text-align:center; transition: all .15s ease; user-select:none;"
f" min-width: {min_w};"
f" box-shadow: 0 0 28px rgba(255,23,68,.45), inset 0 0 16px rgba(255,23,68,.12);")
return (f"cursor:pointer; padding:{padding}; border-radius:{radius};"
f" background: #0a0a0a;"
f" border: 1.5px solid #2a2a2a;"
f" color: #ffffff; font-weight: 800; letter-spacing: 3px;"
f" font-size: {font_size}; font-family: 'Space Grotesk', sans-serif;"
f" text-align:center; transition: all .15s ease; user-select:none;"
f" min-width: {min_w};"
f" box-shadow: inset 0 1px 0 rgba(255,255,255,0.04);")
GRID_CELLS = [0, 2, 4, 6, 8]
N_GRID_LEVELS = len(GRID_CELLS)
def grid_overlay_html(level: int) -> str:
if level <= 0 or level >= len(GRID_CELLS):
return ""
n = GRID_CELLS[level]
pct = 100 / n
op = 0.45 + level * 0.12
line_w = 1 if level <= 2 else 1.5
return f"""
"""
CROSS_OPACITY = [0, 0.55, 0.75, 0.90, 1.0]
CROSS_TICKS = [0, 6, 10, 14, 18]
N_CROSS_LEVELS = len(CROSS_OPACITY)
def cross_overlay_html(level: int) -> str:
if level <= 0 or level >= len(CROSS_OPACITY):
return ""
op = CROSS_OPACITY[level]
n = CROSS_TICKS[level]
col = f"rgba(255,255,255,{op})"
col_strong = f"rgba(255,255,255,{min(op*1.10, 1.0)})"
halo_op = min(op * 0.55, 1.0)
halo = f"rgba(0,0,0,{halo_op})"
line_w = 2 if level >= 3 else 1
halo_w = line_w + 2
ticks = []
for i in range(1, n + 1):
pct = (i / (n + 1)) * 50
is_major = (i % 4 == 0)
size = 11 if is_major else 6
c = col_strong if is_major else col
for axis_left, axis_top, w_h in [
("50%", f"{50-pct}%", "h"), ("50%", f"{50+pct}%", "h"),
]:
tw, th = (size, 1) if w_h == "h" else (1, size)
ticks.append(f'')
ticks.append(f'')
for axis_left, axis_top, w_h in [
(f"{50-pct}%", "50%", "v"), (f"{50+pct}%", "50%", "v"),
]:
tw, th = (1, size)
ticks.append(f'')
ticks.append(f'')
return f"""
"""
def viewport_html(image_data_uri: Optional[str], shape: str = SHAPE_SQUARE,
grid: int = 0, cross: int = 0,
empty_text: str = "PICK A SAMPLE FROM A CATEGORY",
live_video: bool = False) -> str:
if live_video and not image_data_uri:
# Live camera stream โ JS sets video.srcObject after camera selection
inner = ('')
elif image_data_uri:
inner = f'
'
else:
inner = (
f'{empty_text}
'
)
radius = "50%" if shape == SHAPE_CIRCLE else "22px"
overlays = grid_overlay_html(grid) + cross_overlay_html(cross)
return f"""
{inner}
{overlays}
"""
def folder_pills_html(active_label: str) -> str:
pills = []
for cid, lbl in CATEGORIES:
is_active = (lbl == active_label)
if is_active:
pill_style = (
"cursor:pointer; padding:10px 8px; border-radius:12px;"
" background: linear-gradient(180deg, rgba(255,23,68,.20) 0%, rgba(255,23,68,.06) 100%);"
f" border: 1.5px solid {ACCENT_RED};"
" color: #fff; font-weight: 800; letter-spacing: 1.5px;"
" font-size: 11px; font-family: 'Space Grotesk', sans-serif;"
" text-align:center; transition: all .15s ease; user-select:none;"
" white-space: nowrap;"
" box-shadow: 0 0 22px rgba(255,23,68,.40), inset 0 0 12px rgba(255,23,68,.10);"
)
else:
pill_style = (
"cursor:pointer; padding:10px 8px; border-radius:12px;"
" background: #0a0a0a;"
f" border: 1.5px solid rgba(255,23,68,0.35);"
" color: #cfcfcf; font-weight: 800; letter-spacing: 1.5px;"
" font-size: 11px; font-family: 'Space Grotesk', sans-serif;"
" text-align:center; transition: all .15s ease; user-select:none;"
" white-space: nowrap;"
)
pills.append(f"""
{lbl}
""")
return f'{"".join(pills)}
'
def folder_html(category_label: str, picked_filename: Optional[str]) -> str:
cid = CAT_BY_LABEL.get(category_label, "diatom")
samples = [s for s in CATALOG if s["category"] == cid]
if not samples:
return 'No samples
'
cards = []
for s in samples:
is_sel = s["filename"] == picked_filename
sel_style = (f"border-color: {ACCENT_RED}; "
f"box-shadow: 0 0 30px rgba(255,23,68,.55), 0 0 0 2px rgba(255,23,68,.6);"
if is_sel else "")
sel_dot = (f''
if is_sel else "")
caption = (s.get("genus") or s["category"])[:16]
cards.append(f"""
})
{sel_dot}
{caption}
""")
return f'{"".join(cards)}
'
def mode_buttons_html(active: str) -> str:
btns = [
(MODE_MICRO, "๐ฌ MICROSCOPE"),
(MODE_UPLOAD, "๐ค UPLOAD"),
(MODE_SAMPLES, "๐ SAMPLES"),
]
out = []
for m, lbl in btns:
is_a = (m == active)
style = matte_btn_style(is_a, padding="13px 28px", radius="16px",
min_w="170px", font_size="13px")
out.append(f"""
{lbl}
""")
return f'{"".join(out)}
'
def tool_buttons_html(shape: str, grid: int, cross: int) -> str:
out = []
seg_inner = []
for s, lbl in [(SHAPE_CIRCLE, "โ CIRCLE"), (SHAPE_SQUARE, "โก SQUARE")]:
is_a = (s == shape)
if is_a:
inner_style = ("background: linear-gradient(180deg, rgba(255,23,68,.22) 0%, rgba(255,23,68,.08) 100%);"
" color:#fff; box-shadow: 0 0 18px rgba(255,23,68,.45),"
" inset 0 0 12px rgba(255,23,68,.15); border: 1px solid rgba(255,23,68,.7);")
else:
inner_style = "background: transparent; color:#cfcfcf; border: 1px solid transparent;"
seg_inner.append(f"""
{lbl}
""")
out.append(f"""
{"".join(seg_inner)}
""")
grid_active = (grid > 0)
grid_label = "โฆ GRID" if grid == 0 else f"โฆ GRID ยท {GRID_CELLS[grid]}ร{GRID_CELLS[grid]}"
grid_style = matte_btn_style(grid_active, padding="13px 24px", radius="16px",
min_w="140px", font_size="12px")
out.append(f"""
{grid_label}
""")
cross_active = (cross > 0)
cross_label = "โ CROSS" if cross == 0 else f"โ CROSS ยท {cross}/4"
cross_style = matte_btn_style(cross_active, padding="13px 24px", radius="16px",
min_w="140px", font_size="12px")
out.append(f"""
{cross_label}
""")
return f'{"".join(out)}
'
def upload_zone_html() -> str:
return f"""
Upload Microscope Image
📂
DROP YOUR IMAGE HERE
or use the BROWSE button below
PNG · JPG · TIFF · BMP · up to 20 MB
"""
def camera_list_html() -> str:
# Initial state rendered directly in Python so it survives Gradio re-renders.
# The