Spaces:

prithivMLmods
/

Multimodal-Edge-Node

Running on Zero

App Files Files Community

prithivMLmods commited on 27 days ago

Commit

fd39af9

verified ·

1 Parent(s): 2969977

update app

Browse files

Files changed (1) hide show

app.py +508 -692

app.py CHANGED Viewed

@@ -4,13 +4,15 @@ import json
 import ast
 import re
 import uuid
 import threading
 from pathlib import Path
 from typing import Optional
 import spaces
 import torch
-from PIL import Image
 from gradio import Server
 from fastapi import Request, UploadFile, File, Form
@@ -51,35 +53,25 @@ QWEN25_VL_3B_NAME       = "Qwen/Qwen2.5-VL-3B-Instruct"
 print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
 try:
     qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
-        QWEN_VL_2B_MODEL_NAME,
-        trust_remote_code=True,
-        torch_dtype=torch.bfloat16,
     ).to(DEVICE).eval()
-    qwen_vl_2b_processor = AutoProcessor.from_pretrained(
-        QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
-    )
     print("Qwen3-VL-2B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
-    qwen_vl_2b_model = None
-    qwen_vl_2b_processor = None
 # ── Qwen3-VL-4B-Instruct ────────────────────────────────
 print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
 try:
     qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
-        QWEN_VL_4B_MODEL_NAME,
-        trust_remote_code=True,
-        torch_dtype=torch.bfloat16,
     ).to(DEVICE).eval()
-    qwen_vl_4b_processor = AutoProcessor.from_pretrained(
-        QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
-    )
     print("Qwen3-VL-4B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
-    qwen_vl_4b_model = None
-    qwen_vl_4b_processor = None
 # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
@@ -91,8 +83,7 @@ try:
     print("Qwen3.5-4B-Unredacted-MAX model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-4B-Unredacted-MAX model loading failed. Error: {e}")
-    qwen_4b_unredacted_model = None
-    qwen_4b_unredacted_processor = None
 # ── Qwen3.5-4B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
@@ -104,8 +95,7 @@ try:
     print("Qwen3.5-4B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-4B model loading failed. Error: {e}")
-    qwen_4b_model = None
-    qwen_4b_processor = None
 # ── Qwen3.5-2B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-2B model: {QWEN_2B_MODEL_NAME} on {DEVICE}...")
@@ -117,30 +107,25 @@ try:
     print("Qwen3.5-2B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-2B model loading failed. Error: {e}")
-    qwen_2b_model = None
-    qwen_2b_processor = None
 # ── LFM2.5-VL-450M ──────────────────────────────────────
 print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
 try:
     lfm_450_model = AutoModelForImageTextToText.from_pretrained(
-        LFM_450_MODEL_NAME,
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
     ).eval()
     lfm_450_processor = AutoProcessor.from_pretrained(LFM_450_MODEL_NAME)
     print("LFM-450M model loaded successfully.")
 except Exception as e:
     print(f"Warning: LFM-450M model loading failed. Error: {e}")
-    lfm_450_model = None
-    lfm_450_processor = None
 # ── Gemma4-E2B-it ───────────────────────────────────────
 print(f"Loading Gemma4-E2B-it: {GEMMA4_E2B_NAME} on {DEVICE}...")
 try:
     gemma4_e2b_model = Gemma4ForConditionalGeneration.from_pretrained(
-        GEMMA4_E2B_NAME,
-        torch_dtype=torch.bfloat16,
         device_map="auto" if torch.cuda.is_available() else None,
     ).eval()
     if not torch.cuda.is_available():
@@ -149,23 +134,19 @@ try:
     print("Gemma4-E2B-it model loaded successfully.")
 except Exception as e:
     print(f"Warning: Gemma4-E2B-it model loading failed. Error: {e}")
-    gemma4_e2b_model = None
-    gemma4_e2b_processor = None
 # ── LFM2.5-VL-1.6B ──────────────────────────────────────
 print(f"Loading LFM-1.6B model: {LFM_16_MODEL_NAME} on {DEVICE}...")
 try:
     lfm_16_model = AutoModelForImageTextToText.from_pretrained(
-        LFM_16_MODEL_NAME,
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
     ).eval()
     lfm_16_processor = AutoProcessor.from_pretrained(LFM_16_MODEL_NAME)
     print("LFM-1.6B model loaded successfully.")
 except Exception as e:
     print(f"Warning: LFM-1.6B model loading failed. Error: {e}")
-    lfm_16_model = None
-    lfm_16_processor = None
 # ── Qwen3.5-2B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-2B-Unredacted-MAX: {QWEN_UNREDACTED_NAME} on {DEVICE}...")
@@ -177,28 +158,54 @@ try:
     print("Qwen3.5-2B-Unredacted-MAX model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-2B-Unredacted-MAX model loading failed. Error: {e}")
-    qwen_unredacted_model = None
-    qwen_unredacted_processor = None
 # ── Qwen2.5-VL-3B-Instruct ──────────────────────────────
 print(f"Loading Qwen2.5-VL-3B-Instruct: {QWEN25_VL_3B_NAME} on {DEVICE}...")
 try:
     qwen25_vl_3b_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        QWEN25_VL_3B_NAME,
-        torch_dtype="auto",
-        device_map="auto",
     ).eval()
     qwen25_vl_3b_processor = AutoProcessor.from_pretrained(QWEN25_VL_3B_NAME)
     print("Qwen2.5-VL-3B-Instruct model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen2.5-VL-3B-Instruct model loading failed. Error: {e}")
-    qwen25_vl_3b_model = None
-    qwen25_vl_3b_processor = None
-# --- Utility Functions ---
 def safe_parse_json(text: str):
     text = text.strip()
     text = re.sub(r"^```(json)?", "", text)
     text = re.sub(r"```$", "", text)
     text = text.strip()
@@ -206,13 +213,204 @@ def safe_parse_json(text: str):
         return json.loads(text)
     except json.JSONDecodeError:
         pass
     try:
         return ast.literal_eval(text)
     except Exception:
         return {}
-# --- Inference Generator (Streaming) ---
 @spaces.GPU(duration=120)
 def generate_inference_stream(
     image: Image.Image, category: str, prompt: str, model_id: str = "qwen_vl_2b"
@@ -232,315 +430,208 @@ def generate_inference_stream(
     if model_id == "qwen_vl_2b":
         if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_vl_2b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_vl_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_vl_2b_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_vl_2b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_vl_2b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen3-VL-4B ─────────────────────────────────────
     elif model_id == "qwen_vl_4b":
         if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_vl_4b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_vl_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_vl_4b_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_vl_4b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_vl_4b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
     elif model_id == "qwen_4b_unredacted":
         if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_4b_unredacted_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_4b_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_4b_unredacted_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_4b_unredacted_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_4b_unredacted_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen3.5-4B ───────────────────────────���──────────
     elif model_id == "qwen_4b":
         if qwen_4b_model is None or qwen_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_4b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_4b_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_4b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_4b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen3.5-2B ──────────────────────────────────────
     elif model_id == "qwen_2b":
         if qwen_2b_model is None or qwen_2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_2b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_2b_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_2b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_2b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── LFM-450M ────────────────────────────────────────
     elif model_id == "lfm_450":
         if lfm_450_model is None or lfm_450_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] LFM-450M model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         conversation = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         inputs = lfm_450_processor.apply_chat_template(
             conversation, add_generation_prompt=True,
             return_tensors="pt", return_dict=True, tokenize=True,
         ).to(lfm_450_model.device)
-        streamer = TextIteratorStreamer(
-            lfm_450_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=lfm_450_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024, use_cache=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Gemma4-E2B-it ───────────────────────────────────
     elif model_id == "gemma4_e2b":
         if gemma4_e2b_model is None or gemma4_e2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Gemma4-E2B-it model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = gemma4_e2b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = gemma4_e2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True,
         ).to(gemma4_e2b_model.device)
-        streamer = TextIteratorStreamer(
-            gemma4_e2b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=gemma4_e2b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── LFM-1.6B ────────────────────────────────────────
     elif model_id == "lfm_16":
         if lfm_16_model is None or lfm_16_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] LFM-1.6B model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         conversation = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         inputs = lfm_16_processor.apply_chat_template(
             conversation, add_generation_prompt=True,
             return_tensors="pt", return_dict=True, tokenize=True,
         ).to(lfm_16_model.device)
-        streamer = TextIteratorStreamer(
-            lfm_16_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=lfm_16_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024, use_cache=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen3.5-2B-Unredacted-MAX ───────────────────────
     elif model_id == "qwen_unredacted":
         if qwen_unredacted_model is None or qwen_unredacted_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B-Unredacted-MAX model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen_unredacted_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         inputs = qwen_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_unredacted_model.device)
-        streamer = TextIteratorStreamer(
-            qwen_unredacted_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen_unredacted_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.5, min_p=0.1),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     # ── Qwen2.5-VL-3B-Instruct ──────────────────────────
     elif model_id == "qwen25_vl_3b":
         if qwen25_vl_3b_model is None or qwen25_vl_3b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen2.5-VL-3B-Instruct model not loaded.'})}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         messages = [{"role": "user", "content": [
-            {"type": "image", "image": image},
-            {"type": "text",  "text":  full_prompt},
-        ]}]
         text_input = qwen25_vl_3b_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
         image_inputs, video_inputs = process_vision_info(messages)
         inputs = qwen25_vl_3b_processor(
-            text=[text_input],
-            images=image_inputs,
-            videos=video_inputs,
-            return_tensors="pt",
-            padding=True,
         ).to(qwen25_vl_3b_model.device)
-        streamer = TextIteratorStreamer(
-            qwen25_vl_3b_processor.tokenizer,
-            skip_prompt=True, skip_special_tokens=True, timeout=120,
-        )
-        thread = threading.Thread(
-            target=qwen25_vl_3b_model.generate,
-            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
-                        use_cache=True, temperature=1.0, do_sample=True),
-        )
-        thread.start()
         for tok in streamer:
-            if tok:
-                yield f"data: {json.dumps({'chunk': tok})}\n\n"
-        thread.join()
     yield "data: [DONE]\n\n"
-# --- FastAPI Endpoints ---
 @app.post("/api/run")
 async def run_inference(
     image:    UploadFile = File(...),
@@ -560,7 +651,9 @@ async def run_inference(
         return JSONResponse({"error": str(e)}, status_code=500)
-# --- Frontend UI ---
 @app.get("/", response_class=HTMLResponse)
 async def homepage(request: Request):
     return """
@@ -614,10 +707,8 @@ async def homepage(request: Request):
         .top-bar .sub  { font-size: 11px; color: var(--muted); }
         .top-bar .badge {
             margin-left: auto;
-            background: rgba(124,106,247,0.15);
-            border: 1px solid rgba(124,106,247,0.3);
-            padding: 3px 10px; border-radius: 20px;
-            font-size: 10px; color: var(--accent);
         }
         /* ── Canvas ── */
         #canvas {
@@ -625,8 +716,7 @@ async def homepage(request: Request):
             min-height: calc(100vh - 42px); height: 900px; margin: 0 auto;
         }
         svg.wires {
-            position: absolute; top: 0; left: 0;
-            width: 100%; height: 100%;
             pointer-events: none; z-index: 2; overflow: visible;
         }
         path.wire { fill: none; stroke: var(--wire); stroke-width: 2.5; stroke-linecap: round; }
@@ -707,9 +797,9 @@ async def homepage(request: Request):
             border-radius: 5px; padding: 4px 8px; font-size: 9px; color: var(--muted); overflow: hidden;
         }
         .img-chip.visible { display: flex; }
-        .img-chip .chip-dot { width: 5px; height: 5px; border-radius: 50%; background: var(--accent2); flex-shrink: 0; box-shadow: 0 0 4px var(--accent2); }
-        .img-chip .chip-name { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; flex: 1; color: var(--text); font-size: 9px; }
-        .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
         select, textarea {
             width: 100%; background: rgba(0,0,0,0.3); border: 1px solid var(--node-border);
             color: var(--text); padding: 7px 9px; border-radius: 5px; outline: none;
@@ -732,7 +822,7 @@ async def homepage(request: Request):
         /* ── Output node ── */
         .output-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
         .output-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
-        /* ── Icon buttons (copy / download) ── */
         .icon-btn {
             display: flex; align-items: center; gap: 5px;
             background: rgba(124,106,247,0.10); border: 1px solid rgba(124,106,247,0.25);
@@ -740,16 +830,14 @@ async def homepage(request: Request):
             font-size: 9px; font-weight: 700; font-family: 'JetBrains Mono', monospace;
             color: var(--accent); cursor: pointer; letter-spacing: 0.05em;
             transition: background 0.18s, border-color 0.18s, transform 0.1s; flex-shrink: 0;
-            text-decoration: none;
         }
         .icon-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
         .icon-btn:active { transform: scale(0.95); }
-        .icon-btn.teal {
-            background: rgba(78,205,196,0.10); border-color: rgba(78,205,196,0.25); color: var(--accent2);
-        }
-        .icon-btn.teal:hover { background: rgba(78,205,196,0.22); border-color: var(--accent2); }
-        .icon-btn.copied { background: rgba(78,205,196,0.15); border-color: var(--accent2); color: var(--accent2); }
-        .icon-btn svg { pointer-events: none; flex-shrink: 0; }
         .output-box {
             background: rgba(0,0,0,0.4); border: 1px solid var(--node-border);
             border-radius: 5px; padding: 10px; flex: 1; overflow-y: auto;
@@ -759,20 +847,18 @@ async def homepage(request: Request):
         /* ── Grounding node ─�� */
         .ground-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
         .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
-        .ground-canvas-wrap {
             position: relative; flex: 1; border: 1px solid var(--node-border);
             border-radius: 5px; overflow: hidden; background: #111; min-height: 0;
         }
-        .ground-canvas-wrap canvas {
-            position: absolute; top: 0; left: 0;
-            width: 100%; height: 100%;
-            object-fit: contain; display: block;
-            image-rendering: auto;
         }
         .ground-placeholder {
             position: absolute; inset: 0; display: flex; align-items: center;
-            justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
-            pointer-events: none; z-index: 5;
         }
         .loader {
             width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
@@ -780,36 +866,25 @@ async def homepage(request: Request):
             animation: spin 0.7s linear infinite; display: none;
         }
         @keyframes spin { to { transform: rotate(360deg); } }
-        .status-dot { width: 6px; height: 6px; border-radius: 50%; background: var(--muted); display: inline-block; margin-right: 6px; }
-        .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
         /* ── Model badges ── */
         .model-badge {
-            display: inline-block; padding: 2px 7px; border-radius: 4px;
-            font-size: 9px; font-weight: 700; letter-spacing: 0.06em; text-transform: uppercase;
         }
-        .model-badge.qvl2b    { background: rgba(255,150,50,0.15);  color: #ff9632;        border: 1px solid rgba(255,150,50,0.35); }
-        .model-badge.qvl4b    { background: rgba(255,100,80,0.15);  color: #ff6450;        border: 1px solid rgba(255,100,80,0.35); }
-        .model-badge.q4bunred { background: rgba(255,80,80,0.18);   color: #ff5050;        border: 1px solid rgba(255,80,80,0.40); }
-        .model-badge.q4b      { background: rgba(255,200,80,0.15);  color: #ffc850;        border: 1px solid rgba(255,200,80,0.35); }
-        .model-badge.q2b      { background: rgba(124,106,247,0.2);  color: var(--accent);  border: 1px solid rgba(124,106,247,0.3); }
-        .model-badge.lfm450   { background: rgba(78,205,196,0.15);  color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
-        .model-badge.g4e2b    { background: rgba(66,197,107,0.15);  color: #42c56b;        border: 1px solid rgba(66,197,107,0.35); }
-        .model-badge.lfm16    { background: rgba(107,203,119,0.15); color: #6bcb77;        border: 1px solid rgba(107,203,119,0.35); }
-        .model-badge.qunred   { background: rgba(255,80,160,0.15);  color: #ff50a0;        border: 1px solid rgba(255,80,160,0.35); }
-        .model-badge.q25vl3b  { background: rgba(80,180,255,0.15);  color: #50b4ff;        border: 1px solid rgba(80,180,255,0.35); }
-        .model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
         .canvas-footer { height: 36px; }
-        /* ── Debug panel ── */
-        #debugPanel {
-            position: fixed; bottom: 12px; right: 12px; z-index: 9999;
-            background: rgba(13,13,15,0.95); border: 1px solid var(--node-border);
-            border-radius: 7px; padding: 8px 12px; font-size: 10px; color: var(--muted);
-            max-width: 340px; display: none; backdrop-filter: blur(8px);
-        }
-        #debugPanel.visible { display: block; }
-        #debugPanel .dbg-title { color: var(--accent2); font-weight: 700; margin-bottom: 4px; }
-        #debugPanel pre { white-space: pre-wrap; word-break: break-all; max-height: 120px; overflow-y: auto; color: #a0a0c0; }
     </style>
 </head>
 <body>
@@ -821,12 +896,6 @@ async def homepage(request: Request):
     <span class="badge">10x Vision Models</span>
 </div>
-<!-- Debug panel (toggle with D key) -->
-<div id="debugPanel">
-    <div class="dbg-title">⬡ GROUNDING DEBUG</div>
-    <pre id="debugPre"></pre>
-</div>
 <div id="canvas">
     <svg class="wires">
         <path id="wire-img-task"   class="wire" />
@@ -980,8 +1049,9 @@ async def homepage(request: Request):
                     SAVE
                 </a>
             </div>
-            <div class="ground-canvas-wrap" id="groundWrap">
-                <canvas id="groundCanvas"></canvas>
                 <div class="ground-placeholder" id="groundPlaceholder">
                     Active for Point / Detect tasks.<br>Run inference to visualise.
                 </div>
@@ -1033,7 +1103,8 @@ document.querySelectorAll('.node').forEach(node => {
     });
     document.addEventListener('mousemove', e => {
         if (!drag) return;
-        node.style.left=`${il+e.clientX-sx}px`; node.style.top=`${it+e.clientY-sy}px`;
         updateWires();
     });
     document.addEventListener('mouseup', () => { if(drag){drag=false;node.style.zIndex=10;} });
@@ -1058,22 +1129,33 @@ const chipSize    = document.getElementById('chipSize');
 const dotImg      = document.getElementById('dot-img');
 function formatBytes(b) {
-    if (b<1024) return b+' B'; if (b<1048576) return (b/1024).toFixed(1)+' KB';
     return (b/1048576).toFixed(1)+' MB';
 }
 function handleFile(file) {
-    if (!file||!file.type.startsWith('image/')) return;
-    currentFile=file; imgPreview.src=URL.createObjectURL(file);
-    previewWrap.classList.add('visible'); dropZone.style.display='none';
-    chipName.textContent=file.name; chipSize.textContent=formatBytes(file.size);
-    imgChip.classList.add('visible'); dotImg.classList.add('active');
     requestAnimationFrame(updateWires);
 }
 function clearImage() {
-    currentFile=null; imgPreview.src=''; previewWrap.classList.remove('visible');
-    dropZone.style.display=''; imgChip.classList.remove('visible');
-    chipName.textContent='—'; chipSize.textContent=''; fileInput.value='';
-    dotImg.classList.remove('active'); requestAnimationFrame(updateWires);
 }
 dropZone.onclick     = () => fileInput.click();
 fileInput.onchange   = e  => handleFile(e.target.files[0]);
@@ -1095,63 +1177,63 @@ dotModel.classList.add('active');
 const MODEL_INFO = {
     qwen_vl_2b: {
-        html:   `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
-                 Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
-                 Strong spatial grounding, OCR &amp; instruction-following.`,
         bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.30)',
     },
     qwen_vl_4b: {
-        html:   `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
-                 Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
-                 Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
         bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
     },
     qwen_4b_unredacted: {
-        html:   `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
-                 Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
-                 with extended instruction-following &amp; unrestricted reasoning.`,
         bg: 'rgba(255,80,80,0.07)', border: 'rgba(255,80,80,0.30)',
     },
     qwen_4b: {
-        html:   `<span class="model-badge q4b">QWEN 3.5 · 4B</span><br><br>
-                 Qwen3.5 4B multimodal model by Alibaba Cloud.
-                 Enhanced capacity — richer reasoning &amp; better instruction following.`,
         bg: 'rgba(255,200,80,0.07)', border: 'rgba(255,200,80,0.30)',
     },
     qwen_2b: {
-        html:   `<span class="model-badge q2b">QWEN 3.5 · 2B</span><br><br>
-                 Qwen3.5 2B multimodal model by Alibaba Cloud.
-                 Lightweight &amp; fast — ideal for quick Query, Caption, Point &amp; Detect tasks.`,
         bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
     },
     lfm_450: {
-        html:   `<span class="model-badge lfm450">LFM · 450M</span><br><br>
-                 LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
-                 with solid grounding capabilities.`,
         bg: 'rgba(78,205,196,0.07)', border: 'rgba(78,205,196,0.25)',
     },
     gemma4_e2b: {
-        html:   `<span class="model-badge g4e2b">GEMMA 4 · E2B</span><br><br>
-                 Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
-                 with strong vision-language understanding &amp; instruction-following.`,
         bg: 'rgba(66,197,107,0.07)', border: 'rgba(66,197,107,0.25)',
     },
     lfm_16: {
-        html:   `<span class="model-badge lfm16">LFM · 1.6B</span><br><br>
-                 LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
-                 enhanced reasoning &amp; richer visual understanding.`,
         bg: 'rgba(107,203,119,0.07)', border: 'rgba(107,203,119,0.25)',
     },
     qwen_unredacted: {
-        html:   `<span class="model-badge qunred">QWEN 3.5 · 2B UNREDACTED MAX</span><br><br>
-                 Qwen3.5-2B-Unredacted-MAX by prithivMLmods. Fine-tuned variant of Qwen3.5-2B
-                 with uncensored &amp; extended instruction-following capabilities.`,
         bg: 'rgba(255,80,160,0.07)', border: 'rgba(255,80,160,0.25)',
     },
     qwen25_vl_3b: {
-        html:   `<span class="model-badge q25vl3b">QWEN 2.5-VL · 3B</span><br><br>
-                 Qwen2.5-VL-3B-Instruct by Alibaba Cloud. Powerful 3B vision-language model
-                 with strong grounding, OCR &amp; multi-task visual reasoning.`,
         bg: 'rgba(80,180,255,0.07)', border: 'rgba(80,180,255,0.25)',
     },
 };
@@ -1174,307 +1256,9 @@ const PLACEHOLDERS = {
     Point:   'e.g., The gun held by the person.',
     Detect:  'e.g., The headlight of the car.',
 };
-categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
-// ══════════════════════════════════════════════
-//  DEBUG PANEL  (press D to toggle)
-// ══════════════════════════════════════════════
-const debugPanel = document.getElementById('debugPanel');
-const debugPre   = document.getElementById('debugPre');
-let   debugVisible = false;
-document.addEventListener('keydown', e => {
-    if (e.key === 'd' || e.key === 'D') {
-        debugVisible = !debugVisible;
-        debugPanel.classList.toggle('visible', debugVisible);
-    }
-});
-function dbg(msg) {
-    debugPre.textContent = msg;
-    console.log('[GROUNDING]', msg);
-}
-// ══════════════════════════════════════════════
-//  ROBUST JSON EXTRACTOR  (handles all model output styles)
-// ══════════════════════════════════════════════
-function extractGroundingJSON(raw) {
-    // Step 1: strip <think>…</think> blocks completely
-    let text = raw;
-    for (let i = 0; i < 10; i++) {
-        const next = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
-        if (next === text) break;
-        text = next;
-    }
-    // Step 2: strip markdown fences
-    text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '').trim();
-    dbg('Cleaned text (first 400):' + text.slice(0, 400));
-    // Step 3: Balanced bracket extractor
-    function extractBalanced(str, startIdx, openCh, closeCh) {
-        let depth = 0, inStr = false, esc = false;
-        for (let i = startIdx; i < str.length; i++) {
-            const c = str[i];
-            if (esc)        { esc = false; continue; }
-            if (c === '\\\\') { esc = true;  continue; }
-            if (c === '"')  { inStr = !inStr; continue; }
-            if (inStr)      continue;
-            if (c === openCh)  depth++;
-            if (c === closeCh) {
-                depth--;
-                if (depth === 0) {
-                    try { return JSON.parse(str.slice(startIdx, i + 1)); }
-                    catch (_) { return null; }
-                }
-            }
-        }
-        return null;
-    }
-    // Step 4: scan for ALL '[' positions, try each from last to first
-    const bracketPositions = [];
-    const bracePositions   = [];
-    for (let i = 0; i < text.length; i++) {
-        if (text[i] === '[') bracketPositions.push(i);
-        if (text[i] === '{') bracePositions.push(i);
-    }
-    // Prefer arrays (most models return [{...}, {...}])
-    for (let i = bracketPositions.length - 1; i >= 0; i--) {
-        const r = extractBalanced(text, bracketPositions[i], '[', ']');
-        if (r !== null && Array.isArray(r) && r.length > 0) {
-            dbg('Found array at pos ' + bracketPositions[i] + ': ' + JSON.stringify(r).slice(0, 200));
-            return r;
-        }
-    }
-    // Try objects
-    for (let i = bracePositions.length - 1; i >= 0; i--) {
-        const r = extractBalanced(text, bracePositions[i], '{', '}');
-        if (r !== null) {
-            dbg('Found object at pos ' + bracePositions[i] + ': ' + JSON.stringify(r).slice(0, 200));
-            return r;
-        }
-    }
-    // Step 5: try whole-text parse
-    try { return JSON.parse(text); } catch (_) {}
-    dbg('No JSON found. Raw tail: ' + text.slice(-300));
-    return null;
-}
-// ══════════════════════════════════════════════
-//  COORDINATE NORMALISER
-//  Handles: absolute pixels, 0-1 fractions, 0-1000 Qwen scale
-// ══════════════════════════════════════════════
-function normaliseCoords(arr, W, H) {
-    // arr is [x1,y1,x2,y2] or [x,y]
-    const nums = arr.map(Number);
-    if (arr.length === 4) {
-        let [x1,y1,x2,y2] = nums;
-        // Qwen VL often uses 0-1000 normalised coords
-        const maxVal = Math.max(x1, y1, x2, y2);
-        if (maxVal <= 1.0 && maxVal > 0) {
-            // 0-1 fraction
-            return [x1*W, y1*H, x2*W, y2*H];
-        } else if (maxVal <= 1000 && maxVal > 1) {
-            // 0-1000 scale (Qwen VL convention)
-            return [x1/1000*W, y1/1000*H, x2/1000*W, y2/1000*H];
-        }
-        // Already in pixels
-        return [x1, y1, x2, y2];
-    }
-    if (arr.length === 2) {
-        let [x, y] = nums;
-        const maxVal = Math.max(x, y);
-        if (maxVal <= 1.0 && maxVal > 0) return [x*W, y*H];
-        if (maxVal <= 1000 && maxVal > 1) return [x/1000*W, y/1000*H];
-        return [x, y];
-    }
-    return nums;
-}
-// ══════════════════════════════════════════════
-//  GROUNDING VISUALIZER
-// ══════════════════════════════════════════════
-const groundCanvas      = document.getElementById('groundCanvas');
-const groundWrap        = document.getElementById('groundWrap');
-const groundPlaceholder = document.getElementById('groundPlaceholder');
-const gCtx              = groundCanvas.getContext('2d');
-const downloadBtn       = document.getElementById('downloadBtn');
-const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
-function hexToRgba(hex, alpha) {
-    const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
-    return `rgba(${r},${g},${b},${alpha})`;
-}
-function drawRoundRect(ctx, x, y, w, h, r) {
-    r = Math.min(r, w/2, h/2);
-    ctx.beginPath();
-    ctx.moveTo(x+r, y);
-    ctx.lineTo(x+w-r, y); ctx.quadraticCurveTo(x+w, y, x+w, y+r);
-    ctx.lineTo(x+w, y+h-r); ctx.quadraticCurveTo(x+w, y+h, x+w-r, y+h);
-    ctx.lineTo(x+r, y+h); ctx.quadraticCurveTo(x, y+h, x, y+h-r);
-    ctx.lineTo(x, y+r); ctx.quadraticCurveTo(x, y, x+r, y);
-    ctx.closePath();
-}
-function updateDownloadBtn() {
-    const dataURL = groundCanvas.toDataURL('image/png');
-    const ts      = new Date().toISOString().replace(/[:.]/g,'-').slice(0,19);
-    downloadBtn.href     = dataURL;
-    downloadBtn.download = `grounding_${ts}.png`;
-    downloadBtn.style.display = 'flex';
-}
-function drawGrounding(imgSrc, rawText) {
-    const parsed = extractGroundingJSON(rawText);
-    if (!parsed) {
-        dbg('drawGrounding: no JSON parsed from output.');
-        groundPlaceholder.textContent = 'No grounding coordinates found in model output.';
-        groundPlaceholder.style.display = 'flex';
-        return;
-    }
-    const img = new Image();
-    img.crossOrigin = 'anonymous';
-    img.onload = () => {
-        const W = img.naturalWidth  || img.width  || 512;
-        const H = img.naturalHeight || img.height || 512;
-        // Set canvas to image natural size for crisp drawing
-        groundCanvas.width  = W;
-        groundCanvas.height = H;
-        // Draw base image
-        gCtx.drawImage(img, 0, 0, W, H);
-        // Hide placeholder — canvas is now populated
-        groundPlaceholder.style.display = 'none';
-        const lw = Math.max(2, W / 180);
-        const fs = Math.max(11, Math.min(W / 35, 22));
-        gCtx.lineWidth = lw;
-        const items = Array.isArray(parsed) ? parsed : [parsed];
-        dbg('Drawing ' + items.length + ' item(s) on ' + W + 'x' + H);
-        items.forEach((item, i) => {
-            const col = PALETTE[i % PALETTE.length];
-            // ── Try to extract bbox ───────────────────────
-            let rawBbox = null;
-            if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4) rawBbox = item.bbox_2d;
-            else if (Array.isArray(item?.bbox)   && item.bbox.length   === 4) rawBbox = item.bbox;
-            else if (Array.isArray(item?.box)    && item.box.length    === 4) rawBbox = item.box;
-            // flat array of 4 numbers
-            else if (Array.isArray(item) && item.length === 4 && item.every(v => typeof v === 'number'))
-                rawBbox = item;
-            if (rawBbox) {
-                let [x1, y1, x2, y2] = normaliseCoords(rawBbox, W, H);
-                // Ensure x1<x2, y1<y2
-                if (x2 < x1) [x1, x2] = [x2, x1];
-                if (y2 < y1) [y1, y2] = [y2, y1];
-                const bw = x2 - x1, bh = y2 - y1;
-                // Fill
-                gCtx.fillStyle = hexToRgba(col, 0.18);
-                gCtx.fillRect(x1, y1, bw, bh);
-                // Border
-                gCtx.strokeStyle = col;
-                gCtx.lineWidth   = lw;
-                gCtx.strokeRect(x1, y1, bw, bh);
-                // Corner accent marks
-                const cLen = Math.min(bw, bh, 18);
-                gCtx.lineWidth = lw * 1.8;
-                [[x1,y1],[x2,y1],[x2,y2],[x1,y2]].forEach(([cx,cy]) => {
-                    const sx = cx === x1 ? 1 : -1, sy = cy === y1 ? 1 : -1;
-                    gCtx.beginPath();
-                    gCtx.moveTo(cx + sx*cLen, cy);
-                    gCtx.lineTo(cx, cy);
-                    gCtx.lineTo(cx, cy + sy*cLen);
-                    gCtx.strokeStyle = col;
-                    gCtx.stroke();
-                });
-                gCtx.lineWidth = lw;
-                // Label
-                const lbl = (item?.label ?? item?.class_name ?? item?.name ?? `obj ${i+1}`).toString();
-                gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
-                const tw  = gCtx.measureText(lbl).width;
-                const ph  = fs * 1.5, pw = tw + 14;
-                const lx  = Math.max(0, Math.min(x1, W - pw));
-                const ly  = y1 - ph > 0 ? y1 - ph : y1 + 2;
-                drawRoundRect(gCtx, lx, ly, pw, ph, 4);
-                gCtx.fillStyle = col; gCtx.fill();
-                gCtx.fillStyle = '#fff';
-                gCtx.fillText(lbl, lx + 7, ly + ph * 0.74);
-                return;
-            }
-            // ── Try to extract point ──────────────────────
-            let rawPt = null;
-            if (Array.isArray(item?.point_2d) && item.point_2d.length === 2) rawPt = item.point_2d;
-            else if (Array.isArray(item?.point)   && item.point.length   === 2) rawPt = item.point;
-            else if (Array.isArray(item?.coord)   && item.coord.length   === 2) rawPt = item.coord;
-            else if (Array.isArray(item) && item.length === 2 && item.every(v => typeof v === 'number'))
-                rawPt = item;
-            if (rawPt) {
-                let [x, y] = normaliseCoords(rawPt, W, H);
-                const r = Math.max(7, Math.min(W / 55, 18));
-                const lbl = (item?.label ?? item?.name ?? `pt ${i+1}`).toString();
-                // Outer glow ring
-                gCtx.beginPath(); gCtx.arc(x, y, r * 2.2, 0, Math.PI*2);
-                gCtx.fillStyle = hexToRgba(col, 0.15); gCtx.fill();
-                // Middle ring
-                gCtx.beginPath(); gCtx.arc(x, y, r * 1.4, 0, Math.PI*2);
-                gCtx.fillStyle = hexToRgba(col, 0.25); gCtx.fill();
-                // Core dot
-                gCtx.beginPath(); gCtx.arc(x, y, r, 0, Math.PI*2);
-                gCtx.fillStyle = col; gCtx.fill();
-                gCtx.strokeStyle = '#fff'; gCtx.lineWidth = Math.max(1.5, lw); gCtx.stroke();
-                // Centre dot
-                gCtx.beginPath(); gCtx.arc(x, y, r * 0.3, 0, Math.PI*2);
-                gCtx.fillStyle = '#fff'; gCtx.fill();
-                // Label
-                gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
-                const tw  = gCtx.measureText(lbl).width;
-                const ph  = fs * 1.45, pw = tw + 12;
-                const lx  = Math.min(x + r + 6, W - pw);
-                const ly  = Math.max(0, y - ph/2);
-                drawRoundRect(gCtx, lx, ly, pw, ph, 4);
-                gCtx.fillStyle = col; gCtx.fill();
-                gCtx.fillStyle = '#fff';
-                gCtx.fillText(lbl, lx + 6, ly + ph * 0.74);
-            }
-        });
-        updateDownloadBtn();
-    };
-    img.onerror = (e) => {
-        dbg('Image load error: ' + e);
-        groundPlaceholder.textContent = 'Failed to load image for overlay.';
-        groundPlaceholder.style.display = 'flex';
-    };
-    img.src = imgSrc;
-}
 // ══════════════════════════════════════════════
 //  COPY BUTTON
@@ -1493,8 +1277,8 @@ function resetCopyBtn() {
         </svg> COPY`;
 }
 copyBtn.onclick = () => {
-    const txt = outputBox.innerText||'';
-    if (!txt||txt==='Results will stream here...') return;
     navigator.clipboard.writeText(txt).then(() => {
         copyBtn.classList.add('copied');
         copyBtn.innerHTML = `
@@ -1502,14 +1286,45 @@ copyBtn.onclick = () => {
                  stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
                 <polyline points="20 6 9 17 4 12"/>
             </svg> COPIED`;
-        clearTimeout(copyTimer); copyTimer=setTimeout(resetCopyBtn,2000);
     }).catch(() => {
-        const ta=document.createElement('textarea'); ta.value=txt;
-        ta.style.position='fixed'; ta.style.opacity='0';
-        document.body.appendChild(ta); ta.select(); document.execCommand('copy'); document.body.removeChild(ta);
     });
 };
 // ══════════════════════════════════════════════
 //  RUN INFERENCE
 // ══════════════════════════════════════════════
@@ -1518,41 +1333,38 @@ const btnLoader = document.getElementById('btnLoader');
 const allWires  = ['wire-img-task','wire-model-task','wire-task-out','wire-task-gnd'];
 const dotTask   = document.getElementById('dot-task');
 const dotOut    = document.getElementById('dot-out');
-const dotGnd    = document.getElementById('dot-gnd');
 runBtn.onclick = async () => {
     if (!currentFile) { alert('Please upload an image into the Input Node.'); return; }
     const promptStr = promptInput.value.trim();
     if (!promptStr)  { alert('Please enter a prompt directive.'); return; }
-    // Reset UI
     runBtn.disabled = true;
     btnLoader.style.display = 'inline-block';
     outputBox.innerText = '';
     outputBox.style.color = '';
-    groundPlaceholder.style.display = 'flex';
-    groundPlaceholder.textContent = 'Running inference…';
-    gCtx.clearRect(0, 0, groundCanvas.width, groundCanvas.height);
-    groundCanvas.width  = 1;   // reset canvas
-    groundCanvas.height = 1;
-    downloadBtn.style.display = 'none';
     dotTask.classList.add('active');
     dotOut.classList.remove('active');
-    dotGnd.classList.remove('active');
     allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
     resetCopyBtn();
     const formData = new FormData();
     formData.append('image',    currentFile);
-    formData.append('category', categorySelect.value);
     formData.append('prompt',   promptStr);
     formData.append('model_id', modelSelect.value);
     let fullText = '';
-    // Create a stable object URL for this run
-    const imgObjectURL = URL.createObjectURL(currentFile);
     try {
         const response = await fetch('/api/run', { method: 'POST', body: formData });
         if (!response.ok) {
             const err = await response.json();
@@ -1561,18 +1373,18 @@ runBtn.onclick = async () => {
         const reader  = response.body.getReader();
         const decoder = new TextDecoder('utf-8');
-        let buffer = '';
         while (true) {
             const { value, done } = await reader.read();
             if (done) break;
             buffer += decoder.decode(value, { stream: true });
             const lines = buffer.split('\\n\\n');
-            buffer = lines.pop();  // keep incomplete chunk
             for (const line of lines) {
                 if (!line.startsWith('data: ')) continue;
-                const payload = line.slice(6);   // remove 'data: '
                 if (payload === '[DONE]') break;
                 try {
                     const data = JSON.parse(payload);
@@ -1587,43 +1399,47 @@ runBtn.onclick = async () => {
         dotOut.classList.add('active');
-        // ── Grounding overlay ─────────────────────────
-        const cat = categorySelect.value;
         if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
-            groundPlaceholder.textContent = 'Parsing coordinates…';
-            groundPlaceholder.style.display = 'flex';
-            // Small delay so the UI updates before heavy canvas work
-            setTimeout(() => {
-                const parsed = extractGroundingJSON(fullText);
-                if (parsed !== null) {
-                    dotGnd.classList.add('active');
-                    drawGrounding(imgObjectURL, fullText);
                 } else {
-                    groundPlaceholder.textContent =
-                        'No grounding JSON detected in model output. ' +
-                        'Try rephrasing your prompt or use a VL model.';
-                    groundPlaceholder.style.display = 'flex';
-                    dbg('No JSON found. Full output: ' + fullText.slice(0, 500));
                 }
-            }, 50);
         } else if (cat !== 'Point' && cat !== 'Detect') {
-            groundPlaceholder.textContent = 'Active for Point / Detect tasks. Run inference to visualise.';
-            groundPlaceholder.style.display = 'flex';
         }
     } catch (err) {
         outputBox.innerText = `[Error] ${err.message}`;
         outputBox.style.color = '#ff6b6b';
-        groundPlaceholder.textContent = 'Inference error — see Output Stream node.';
-        groundPlaceholder.style.display = 'flex';
     } finally {
         runBtn.disabled = false;
         btnLoader.style.display = 'none';
         dotTask.classList.remove('active');
         allWires.forEach(id => document.getElementById(id)?.classList.remove('active'));
-        // Revoke object URL after a delay to allow canvas drawing
-        setTimeout(() => URL.revokeObjectURL(imgObjectURL), 10000);
     }
 };
 </script>

 import ast
 import re
 import uuid
+import base64
 import threading
+import numpy as np
 from pathlib import Path
 from typing import Optional
 import spaces
 import torch
+from PIL import Image, ImageDraw, ImageFont
 from gradio import Server
 from fastapi import Request, UploadFile, File, Form
 print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
 try:
     qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
+        QWEN_VL_2B_MODEL_NAME, trust_remote_code=True, torch_dtype=torch.bfloat16,
     ).to(DEVICE).eval()
+    qwen_vl_2b_processor = AutoProcessor.from_pretrained(QWEN_VL_2B_MODEL_NAME, trust_remote_code=True)
     print("Qwen3-VL-2B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
+    qwen_vl_2b_model = None; qwen_vl_2b_processor = None
 # ── Qwen3-VL-4B-Instruct ────────────────────────────────
 print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
 try:
     qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
+        QWEN_VL_4B_MODEL_NAME, trust_remote_code=True, torch_dtype=torch.bfloat16,
     ).to(DEVICE).eval()
+    qwen_vl_4b_processor = AutoProcessor.from_pretrained(QWEN_VL_4B_MODEL_NAME, trust_remote_code=True)
     print("Qwen3-VL-4B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
+    qwen_vl_4b_model = None; qwen_vl_4b_processor = None
 # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
     print("Qwen3.5-4B-Unredacted-MAX model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-4B-Unredacted-MAX model loading failed. Error: {e}")
+    qwen_4b_unredacted_model = None; qwen_4b_unredacted_processor = None
 # ── Qwen3.5-4B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
     print("Qwen3.5-4B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-4B model loading failed. Error: {e}")
+    qwen_4b_model = None; qwen_4b_processor = None
 # ── Qwen3.5-2B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-2B model: {QWEN_2B_MODEL_NAME} on {DEVICE}...")
     print("Qwen3.5-2B model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-2B model loading failed. Error: {e}")
+    qwen_2b_model = None; qwen_2b_processor = None
 # ── LFM2.5-VL-450M ──────────────────────────────────────
 print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
 try:
     lfm_450_model = AutoModelForImageTextToText.from_pretrained(
+        LFM_450_MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16,
     ).eval()
     lfm_450_processor = AutoProcessor.from_pretrained(LFM_450_MODEL_NAME)
     print("LFM-450M model loaded successfully.")
 except Exception as e:
     print(f"Warning: LFM-450M model loading failed. Error: {e}")
+    lfm_450_model = None; lfm_450_processor = None
 # ── Gemma4-E2B-it ───────────────────────────────────────
 print(f"Loading Gemma4-E2B-it: {GEMMA4_E2B_NAME} on {DEVICE}...")
 try:
     gemma4_e2b_model = Gemma4ForConditionalGeneration.from_pretrained(
+        GEMMA4_E2B_NAME, torch_dtype=torch.bfloat16,
         device_map="auto" if torch.cuda.is_available() else None,
     ).eval()
     if not torch.cuda.is_available():
     print("Gemma4-E2B-it model loaded successfully.")
 except Exception as e:
     print(f"Warning: Gemma4-E2B-it model loading failed. Error: {e}")
+    gemma4_e2b_model = None; gemma4_e2b_processor = None
 # ── LFM2.5-VL-1.6B ──────────────────────────────────────
 print(f"Loading LFM-1.6B model: {LFM_16_MODEL_NAME} on {DEVICE}...")
 try:
     lfm_16_model = AutoModelForImageTextToText.from_pretrained(
+        LFM_16_MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16,
     ).eval()
     lfm_16_processor = AutoProcessor.from_pretrained(LFM_16_MODEL_NAME)
     print("LFM-1.6B model loaded successfully.")
 except Exception as e:
     print(f"Warning: LFM-1.6B model loading failed. Error: {e}")
+    lfm_16_model = None; lfm_16_processor = None
 # ── Qwen3.5-2B-Unredacted-MAX ───────────────────────────
 print(f"Loading Qwen3.5-2B-Unredacted-MAX: {QWEN_UNREDACTED_NAME} on {DEVICE}...")
     print("Qwen3.5-2B-Unredacted-MAX model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen3.5-2B-Unredacted-MAX model loading failed. Error: {e}")
+    qwen_unredacted_model = None; qwen_unredacted_processor = None
 # ── Qwen2.5-VL-3B-Instruct ──────────────────────────────
 print(f"Loading Qwen2.5-VL-3B-Instruct: {QWEN25_VL_3B_NAME} on {DEVICE}...")
 try:
     qwen25_vl_3b_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        QWEN25_VL_3B_NAME, torch_dtype="auto", device_map="auto",
     ).eval()
     qwen25_vl_3b_processor = AutoProcessor.from_pretrained(QWEN25_VL_3B_NAME)
     print("Qwen2.5-VL-3B-Instruct model loaded successfully.")
 except Exception as e:
     print(f"Warning: Qwen2.5-VL-3B-Instruct model loading failed. Error: {e}")
+    qwen25_vl_3b_model = None; qwen25_vl_3b_processor = None
+# ─────────────────────────────────────────────────────────────────────────────
+#  SERVER-SIDE ANNOTATION  (mirrors the reference app exactly)
+# ─────────────────────────────────────────────────────────────────────────────
+PALETTE_RGB = [
+    (78,  205, 196),   # teal
+    (124, 106, 247),   # purple
+    (255, 107, 107),   # red
+    (255, 217,  61),   # yellow
+    (107, 203, 119),   # green
+    (255, 146,  43),   # orange
+    (204,  93, 232),   # violet
+    (51,  154, 240),   # blue
+]
+def _get_font(size: int = 14):
+    """Try to load a TrueType font; fall back to PIL default."""
+    for name in ["DejaVuSans-Bold.ttf", "arial.ttf", "Arial.ttf",
+                 "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"]:
+        try:
+            return ImageFont.truetype(name, size)
+        except (IOError, OSError):
+            pass
+    return ImageFont.load_default()
 def safe_parse_json(text: str):
+    """Strip markdown fences + <think> blocks, then parse JSON."""
+    # Remove <think>…</think>
+    text = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE)
     text = text.strip()
+    # Strip markdown fences
     text = re.sub(r"^```(json)?", "", text)
     text = re.sub(r"```$", "", text)
     text = text.strip()
         return json.loads(text)
     except json.JSONDecodeError:
         pass
+    # Try to find the first [...] or {...} block
+    for ch_open, ch_close in [('[', ']'), ('{', '}')]:
+        idx = text.find(ch_open)
+        if idx != -1:
+            depth, in_str, esc = 0, False, False
+            for i in range(idx, len(text)):
+                c = text[i]
+                if esc:          esc = False;  continue
+                if c == '\\':    esc = True;   continue
+                if c == '"':     in_str = not in_str; continue
+                if in_str:       continue
+                if c == ch_open: depth += 1
+                if c == ch_close:
+                    depth -= 1
+                    if depth == 0:
+                        try:
+                            return json.loads(text[idx:i+1])
+                        except Exception:
+                            break
     try:
         return ast.literal_eval(text)
     except Exception:
         return {}
+def annotate_detections(image: Image.Image, parsed) -> Image.Image:
+    """
+    Draw bounding boxes on image.
+    parsed: list of dicts with 'bbox_2d' ([x1,y1,x2,y2] in 0-1000 scale)
+            and optional 'label'.
+    Mirrors reference _run_detection_on_frame output → annotate_image.
+    """
+    image = image.convert("RGB")
+    ow, oh = image.size
+    draw = ImageDraw.Draw(image, "RGBA")
+    font_lbl = _get_font(max(12, min(ow // 35, 22)))
+    items = parsed if isinstance(parsed, list) else [parsed]
+    drawn = 0
+    for i, item in enumerate(items):
+        if not isinstance(item, dict):
+            continue
+        bbox = (item.get("bbox_2d") or item.get("bbox") or item.get("box"))
+        if not bbox or len(bbox) != 4:
+            continue
+        col = PALETTE_RGB[i % len(PALETTE_RGB)]
+        # ── Normalise coordinates (0-1000 → pixels) ──────────────────────
+        x1, y1, x2, y2 = [float(v) for v in bbox]
+        max_v = max(x1, y1, x2, y2)
+        if max_v <= 1.0:                        # 0-1 fraction
+            x1, y1, x2, y2 = x1*ow, y1*oh, x2*ow, y2*oh
+        elif max_v <= 1000.0:                   # 0-1000 Qwen scale
+            x1, y1, x2, y2 = x1/1000*ow, y1/1000*oh, x2/1000*ow, y2/1000*oh
+        # else already in pixels
+        if x2 < x1: x1, x2 = x2, x1
+        if y2 < y1: y1, y2 = y2, y1
+        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+        # ── Fill (semi-transparent) ───────────────────────────────────────
+        draw.rectangle([x1, y1, x2, y2], fill=(*col, 46))
+        # ── Border ───────────────────────────────────────────────────────
+        lw = max(2, ow // 200)
+        for t in range(lw):
+            draw.rectangle([x1+t, y1+t, x2-t, y2-t], outline=(*col, 255))
+        # ── Corner accent marks ───────────────────────────────────────────
+        clen = max(10, min(int((x2-x1)*0.18), int((y2-y1)*0.18), 24))
+        corners = [(x1,y1,1,1),(x2,y1,-1,1),(x2,y2,-1,-1),(x1,y2,1,-1)]
+        for cx, cy, sx, sy in corners:
+            draw.line([(cx, cy),(cx+sx*clen, cy)], fill=col, width=lw+1)
+            draw.line([(cx, cy),(cx, cy+sy*clen)], fill=col, width=lw+1)
+        # ── Label ─────────────────────────────────────────────────────────
+        label = str(item.get("label") or item.get("class_name") or item.get("name") or f"obj {i+1}")
+        try:
+            bb = font_lbl.getbbox(label)
+            tw, th = bb[2]-bb[0], bb[3]-bb[1]
+        except AttributeError:
+            tw, th = font_lbl.getsize(label)
+        pad = 5
+        lx = max(0, min(x1, ow - tw - pad*2))
+        ly = max(0, y1 - th - pad*2) if y1 - th - pad*2 >= 0 else y1 + 2
+        draw.rectangle([lx, ly, lx+tw+pad*2, ly+th+pad*2], fill=(*col, 230))
+        draw.text((lx+pad, ly+pad), label, fill=(255,255,255,255), font=font_lbl)
+        drawn += 1
+    return image
+def annotate_points(image: Image.Image, parsed) -> Image.Image:
+    """
+    Draw point markers on image.
+    parsed: list of dicts with 'point_2d' ([x,y] in 0-1000 scale)
+            and optional 'label'.
+    Mirrors reference _run_point_detection_on_frame → annotate_image_red_points.
+    """
+    image = image.convert("RGB")
+    ow, oh = image.size
+    draw = ImageDraw.Draw(image, "RGBA")
+    font_lbl = _get_font(max(12, min(ow // 35, 22)))
+    items = parsed if isinstance(parsed, list) else [parsed]
+    drawn = 0
+    for i, item in enumerate(items):
+        if not isinstance(item, dict):
+            continue
+        pt = (item.get("point_2d") or item.get("point") or item.get("coord"))
+        if not pt or len(pt) != 2:
+            continue
+        col = PALETTE_RGB[i % len(PALETTE_RGB)]
+        # ── Normalise coordinates ─────────────────────────────────────────
+        x, y = float(pt[0]), float(pt[1])
+        max_v = max(x, y)
+        if max_v <= 1.0:
+            x, y = x*ow, y*oh
+        elif max_v <= 1000.0:
+            x, y = x/1000*ow, y/1000*oh
+        cx, cy = int(x), int(y)
+        r = max(7, min(ow // 55, 18))
+        # ── Glow rings ───────────────────────────────────────────────────
+        draw.ellipse([cx-r*2, cy-r*2, cx+r*2, cy+r*2], fill=(*col, 38))
+        draw.ellipse([cx-int(r*1.4), cy-int(r*1.4), cx+int(r*1.4), cy+int(r*1.4)],
+                     fill=(*col, 64))
+        # ── Core dot ─────────────────────────────────────────────────────
+        draw.ellipse([cx-r, cy-r, cx+r, cy+r], fill=(*col, 255),
+                     outline=(255,255,255,255), width=max(2, r//4))
+        # ── Centre pip ───────────────────────────────────────────────────
+        rp = max(2, r//4)
+        draw.ellipse([cx-rp, cy-rp, cx+rp, cy+rp], fill=(255,255,255,255))
+        # ── Label ─────────────────────────────────────────────────────────
+        label = str(item.get("label") or item.get("name") or f"pt {i+1}")
+        try:
+            bb = font_lbl.getbbox(label)
+            tw, th = bb[2]-bb[0], bb[3]-bb[1]
+        except AttributeError:
+            tw, th = font_lbl.getsize(label)
+        pad = 5
+        lx = min(cx + r + 8, ow - tw - pad*2)
+        ly = max(0, cy - th//2 - pad)
+        draw.rectangle([lx, ly, lx+tw+pad*2, ly+th+pad*2], fill=(*col, 220))
+        draw.text((lx+pad, ly+pad), label, fill=(255,255,255,255), font=font_lbl)
+        drawn += 1
+    return image
+def image_to_b64(img: Image.Image, fmt: str = "PNG") -> str:
+    """Convert PIL image → base64 data-URI."""
+    buf = io.BytesIO()
+    img.save(buf, format=fmt)
+    buf.seek(0)
+    return "data:image/png;base64," + base64.b64encode(buf.read()).decode()
+# ─────────────────────────────────────────────────────────────────────────────
+#  NEW ENDPOINT: /api/annotate
+#  Receives the image + raw model output text + category,
+#  runs server-side annotation, returns base64 PNG.
+# ─────────────────────────────────────────────────────────────────────────────
+@app.post("/api/annotate")
+async def annotate_endpoint(
+    image:    UploadFile = File(...),
+    text:     str        = Form(...),
+    category: str        = Form(...),
+):
+    try:
+        img_bytes = await image.read()
+        img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+        img.thumbnail((512, 512))
+        parsed = safe_parse_json(text)
+        if not parsed:
+            return JSONResponse({"error": "no_json", "b64": None})
+        if category == "Detect":
+            annotated = annotate_detections(img, parsed)
+        elif category == "Point":
+            annotated = annotate_points(img, parsed)
+        else:
+            return JSONResponse({"error": "unsupported_category", "b64": None})
+        return JSONResponse({"b64": image_to_b64(annotated)})
+    except Exception as e:
+        return JSONResponse({"error": str(e), "b64": None}, status_code=500)
+# ─────────────────────────────────────────────────────────────────────────────
+#  STREAMING INFERENCE
+# ─────────────────────────────────────────────────────────────────────────────
 @spaces.GPU(duration=120)
 def generate_inference_stream(
     image: Image.Image, category: str, prompt: str, model_id: str = "qwen_vl_2b"
     if model_id == "qwen_vl_2b":
         if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_vl_2b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_vl_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_vl_2b_model.device)
+        streamer = TextIteratorStreamer(qwen_vl_2b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_vl_2b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.0, do_sample=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen3-VL-4B ─────────────────────────────────────
     elif model_id == "qwen_vl_4b":
         if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_vl_4b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_vl_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_vl_4b_model.device)
+        streamer = TextIteratorStreamer(qwen_vl_4b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_vl_4b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.0, do_sample=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
     elif model_id == "qwen_4b_unredacted":
         if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_4b_unredacted_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_4b_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_4b_unredacted_model.device)
+        streamer = TextIteratorStreamer(qwen_4b_unredacted_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_4b_unredacted_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.5, min_p=0.1)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen3.5-4B ───────────────────────────���──────────
     elif model_id == "qwen_4b":
         if qwen_4b_model is None or qwen_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_4b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_4b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_4b_model.device)
+        streamer = TextIteratorStreamer(qwen_4b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_4b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.5, min_p=0.1)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen3.5-2B ──────────────────────────────────────
     elif model_id == "qwen_2b":
         if qwen_2b_model is None or qwen_2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_2b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_2b_model.device)
+        streamer = TextIteratorStreamer(qwen_2b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_2b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.5, min_p=0.1)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── LFM-450M ────────────────────────────────────────
     elif model_id == "lfm_450":
         if lfm_450_model is None or lfm_450_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] LFM-450M model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         conversation = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         inputs = lfm_450_processor.apply_chat_template(
             conversation, add_generation_prompt=True,
             return_tensors="pt", return_dict=True, tokenize=True,
         ).to(lfm_450_model.device)
+        streamer = TextIteratorStreamer(lfm_450_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=lfm_450_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Gemma4-E2B-it ───────────────────────────────────
     elif model_id == "gemma4_e2b":
         if gemma4_e2b_model is None or gemma4_e2b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Gemma4-E2B-it model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = gemma4_e2b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = gemma4_e2b_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True,
         ).to(gemma4_e2b_model.device)
+        streamer = TextIteratorStreamer(gemma4_e2b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=gemma4_e2b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.0, do_sample=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── LFM-1.6B ────────────────────────────────────────
     elif model_id == "lfm_16":
         if lfm_16_model is None or lfm_16_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] LFM-1.6B model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         conversation = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         inputs = lfm_16_processor.apply_chat_template(
             conversation, add_generation_prompt=True,
             return_tensors="pt", return_dict=True, tokenize=True,
         ).to(lfm_16_model.device)
+        streamer = TextIteratorStreamer(lfm_16_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=lfm_16_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen3.5-2B-Unredacted-MAX ───────────────────────
     elif model_id == "qwen_unredacted":
         if qwen_unredacted_model is None or qwen_unredacted_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B-Unredacted-MAX model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen_unredacted_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         inputs = qwen_unredacted_processor(
             text=[text_input], images=[image], return_tensors="pt", padding=True
         ).to(qwen_unredacted_model.device)
+        streamer = TextIteratorStreamer(qwen_unredacted_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen_unredacted_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.5, min_p=0.1)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     # ── Qwen2.5-VL-3B-Instruct ──────────────────────────
     elif model_id == "qwen25_vl_3b":
         if qwen25_vl_3b_model is None or qwen25_vl_3b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen2.5-VL-3B-Instruct model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"; return
         messages = [{"role": "user", "content": [
+            {"type": "image", "image": image}, {"type": "text", "text": full_prompt}]}]
         text_input = qwen25_vl_3b_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True)
         image_inputs, video_inputs = process_vision_info(messages)
         inputs = qwen25_vl_3b_processor(
+            text=[text_input], images=image_inputs, videos=video_inputs,
+            return_tensors="pt", padding=True,
         ).to(qwen25_vl_3b_model.device)
+        streamer = TextIteratorStreamer(qwen25_vl_3b_processor.tokenizer,
+                                       skip_prompt=True, skip_special_tokens=True, timeout=120)
+        threading.Thread(target=qwen25_vl_3b_model.generate,
+                         kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                                     use_cache=True, temperature=1.0, do_sample=True)).start()
         for tok in streamer:
+            if tok: yield f"data: {json.dumps({'chunk': tok})}\n\n"
     yield "data: [DONE]\n\n"
+# ─────────────────────────────────────────────────────────────────────────────
+#  FastAPI Endpoints
+# ─────────────────────────────────────────────────────────────────────────────
 @app.post("/api/run")
 async def run_inference(
     image:    UploadFile = File(...),
         return JSONResponse({"error": str(e)}, status_code=500)
+# ───────────────────────────��─────────────────────────────────────────────────
+#  Frontend UI
+# ─────────────────────────────────────────────────────────────────────────────
 @app.get("/", response_class=HTMLResponse)
 async def homepage(request: Request):
     return """
         .top-bar .sub  { font-size: 11px; color: var(--muted); }
         .top-bar .badge {
             margin-left: auto;
+            background: rgba(124,106,247,0.15); border: 1px solid rgba(124,106,247,0.3);
+            padding: 3px 10px; border-radius: 20px; font-size: 10px; color: var(--accent);
         }
         /* ── Canvas ── */
         #canvas {
             min-height: calc(100vh - 42px); height: 900px; margin: 0 auto;
         }
         svg.wires {
+            position: absolute; top: 0; left: 0; width: 100%; height: 100%;
             pointer-events: none; z-index: 2; overflow: visible;
         }
         path.wire { fill: none; stroke: var(--wire); stroke-width: 2.5; stroke-linecap: round; }
             border-radius: 5px; padding: 4px 8px; font-size: 9px; color: var(--muted); overflow: hidden;
         }
         .img-chip.visible { display: flex; }
+        .img-chip .chip-dot { width:5px;height:5px;border-radius:50%;background:var(--accent2);flex-shrink:0;box-shadow:0 0 4px var(--accent2); }
+        .img-chip .chip-name { overflow:hidden;text-overflow:ellipsis;white-space:nowrap;flex:1;color:var(--text);font-size:9px; }
+        .img-chip .chip-size { color:var(--muted);flex-shrink:0;font-size:9px; }
         select, textarea {
             width: 100%; background: rgba(0,0,0,0.3); border: 1px solid var(--node-border);
             color: var(--text); padding: 7px 9px; border-radius: 5px; outline: none;
         /* ── Output node ── */
         .output-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
         .output-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
+        /* ── Icon buttons ── */
         .icon-btn {
             display: flex; align-items: center; gap: 5px;
             background: rgba(124,106,247,0.10); border: 1px solid rgba(124,106,247,0.25);
             font-size: 9px; font-weight: 700; font-family: 'JetBrains Mono', monospace;
             color: var(--accent); cursor: pointer; letter-spacing: 0.05em;
             transition: background 0.18s, border-color 0.18s, transform 0.1s; flex-shrink: 0;
+            text-decoration: none; border: 1px solid rgba(124,106,247,0.25);
         }
         .icon-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
         .icon-btn:active { transform: scale(0.95); }
+        .icon-btn.teal { background:rgba(78,205,196,0.10);border-color:rgba(78,205,196,0.25);color:var(--accent2); }
+        .icon-btn.teal:hover { background:rgba(78,205,196,0.22);border-color:var(--accent2); }
+        .icon-btn.copied { background:rgba(78,205,196,0.15);border-color:var(--accent2);color:var(--accent2); }
+        .icon-btn svg { pointer-events:none;flex-shrink:0; }
         .output-box {
             background: rgba(0,0,0,0.4); border: 1px solid var(--node-border);
             border-radius: 5px; padding: 10px; flex: 1; overflow-y: auto;
         /* ── Grounding node ─�� */
         .ground-node-body { padding: 10px; display: flex; flex-direction: column; gap: 6px; flex: 1; overflow: hidden; }
         .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
+        .ground-img-wrap {
             position: relative; flex: 1; border: 1px solid var(--node-border);
             border-radius: 5px; overflow: hidden; background: #111; min-height: 0;
+            display: flex; align-items: center; justify-content: center;
         }
+        .ground-img-wrap img {
+            width: 100%; height: 100%; object-fit: contain; display: block;
         }
         .ground-placeholder {
             position: absolute; inset: 0; display: flex; align-items: center;
+            justify-content: center; font-size: 11px; color: var(--muted);
+            text-align: center; padding: 10px; pointer-events: none; z-index: 5;
         }
         .loader {
             width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
             animation: spin 0.7s linear infinite; display: none;
         }
         @keyframes spin { to { transform: rotate(360deg); } }
+        .status-dot { width:6px;height:6px;border-radius:50%;background:var(--muted);display:inline-block;margin-right:6px; }
+        .status-dot.active { background:var(--accent2);box-shadow:0 0 5px var(--accent2); }
         /* ── Model badges ── */
         .model-badge {
+            display:inline-block;padding:2px 7px;border-radius:4px;
+            font-size:9px;font-weight:700;letter-spacing:0.06em;text-transform:uppercase;
         }
+        .model-badge.qvl2b    { background:rgba(255,150,50,0.15); color:#ff9632;        border:1px solid rgba(255,150,50,0.35); }
+        .model-badge.qvl4b    { background:rgba(255,100,80,0.15); color:#ff6450;        border:1px solid rgba(255,100,80,0.35); }
+        .model-badge.q4bunred { background:rgba(255,80,80,0.18);  color:#ff5050;        border:1px solid rgba(255,80,80,0.40); }
+        .model-badge.q4b      { background:rgba(255,200,80,0.15); color:#ffc850;        border:1px solid rgba(255,200,80,0.35); }
+        .model-badge.q2b      { background:rgba(124,106,247,0.2); color:var(--accent);  border:1px solid rgba(124,106,247,0.3); }
+        .model-badge.lfm450   { background:rgba(78,205,196,0.15); color:var(--accent2); border:1px solid rgba(78,205,196,0.3); }
+        .model-badge.g4e2b    { background:rgba(66,197,107,0.15); color:#42c56b;        border:1px solid rgba(66,197,107,0.35); }
+        .model-badge.lfm16    { background:rgba(107,203,119,0.15);color:#6bcb77;        border:1px solid rgba(107,203,119,0.35); }
+        .model-badge.qunred   { background:rgba(255,80,160,0.15); color:#ff50a0;        border:1px solid rgba(255,80,160,0.35); }
+        .model-badge.q25vl3b  { background:rgba(80,180,255,0.15); color:#50b4ff;        border:1px solid rgba(80,180,255,0.35); }
+        .model-info-box { border-radius:6px;padding:9px;font-size:10px;color:var(--muted);line-height:1.55;flex-shrink:0; }
         .canvas-footer { height: 36px; }
     </style>
 </head>
 <body>
     <span class="badge">10x Vision Models</span>
 </div>
 <div id="canvas">
     <svg class="wires">
         <path id="wire-img-task"   class="wire" />
                     SAVE
                 </a>
             </div>
+            <div class="ground-img-wrap">
+                <!-- Server-rendered annotated image displayed here -->
+                <img id="groundImg" src="" alt="" style="display:none;" />
                 <div class="ground-placeholder" id="groundPlaceholder">
                     Active for Point / Detect tasks.<br>Run inference to visualise.
                 </div>
     });
     document.addEventListener('mousemove', e => {
         if (!drag) return;
+        node.style.left=`${il+e.clientX-sx}px`;
+        node.style.top=`${it+e.clientY-sy}px`;
         updateWires();
     });
     document.addEventListener('mouseup', () => { if(drag){drag=false;node.style.zIndex=10;} });
 const dotImg      = document.getElementById('dot-img');
 function formatBytes(b) {
+    if (b<1024) return b+' B';
+    if (b<1048576) return (b/1024).toFixed(1)+' KB';
     return (b/1048576).toFixed(1)+' MB';
 }
 function handleFile(file) {
+    if (!file || !file.type.startsWith('image/')) return;
+    currentFile = file;
+    imgPreview.src = URL.createObjectURL(file);
+    previewWrap.classList.add('visible');
+    dropZone.style.display = 'none';
+    chipName.textContent = file.name;
+    chipSize.textContent = formatBytes(file.size);
+    imgChip.classList.add('visible');
+    dotImg.classList.add('active');
     requestAnimationFrame(updateWires);
 }
 function clearImage() {
+    currentFile = null;
+    imgPreview.src = '';
+    previewWrap.classList.remove('visible');
+    dropZone.style.display = '';
+    imgChip.classList.remove('visible');
+    chipName.textContent = '—';
+    chipSize.textContent = '';
+    fileInput.value = '';
+    dotImg.classList.remove('active');
+    requestAnimationFrame(updateWires);
 }
 dropZone.onclick     = () => fileInput.click();
 fileInput.onchange   = e  => handleFile(e.target.files[0]);
 const MODEL_INFO = {
     qwen_vl_2b: {
+        html: `<span class="model-badge qvl2b">QWEN3-VL · 2B</span><br><br>
+               Qwen3-VL-2B-Instruct — dedicated vision-language model by Alibaba Cloud.
+               Strong spatial grounding, OCR &amp; instruction-following.`,
         bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.30)',
     },
     qwen_vl_4b: {
+        html: `<span class="model-badge qvl4b">QWEN3-VL · 4B</span><br><br>
+               Qwen3-VL-4B-Instruct — enhanced vision-language model by Alibaba Cloud.
+               Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
         bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
     },
     qwen_4b_unredacted: {
+        html: `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
+               Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
+               with extended instruction-following &amp; unrestricted reasoning.`,
         bg: 'rgba(255,80,80,0.07)', border: 'rgba(255,80,80,0.30)',
     },
     qwen_4b: {
+        html: `<span class="model-badge q4b">QWEN 3.5 · 4B</span><br><br>
+               Qwen3.5 4B multimodal model by Alibaba Cloud.
+               Enhanced capacity — richer reasoning &amp; better instruction following.`,
         bg: 'rgba(255,200,80,0.07)', border: 'rgba(255,200,80,0.30)',
     },
     qwen_2b: {
+        html: `<span class="model-badge q2b">QWEN 3.5 · 2B</span><br><br>
+               Qwen3.5 2B multimodal model by Alibaba Cloud.
+               Lightweight &amp; fast — ideal for quick tasks.`,
         bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
     },
     lfm_450: {
+        html: `<span class="model-badge lfm450">LFM · 450M</span><br><br>
+               LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
+               with solid grounding capabilities.`,
         bg: 'rgba(78,205,196,0.07)', border: 'rgba(78,205,196,0.25)',
     },
     gemma4_e2b: {
+        html: `<span class="model-badge g4e2b">GEMMA 4 · E2B</span><br><br>
+               Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
+               with strong vision-language understanding &amp; instruction-following.`,
         bg: 'rgba(66,197,107,0.07)', border: 'rgba(66,197,107,0.25)',
     },
     lfm_16: {
+        html: `<span class="model-badge lfm16">LFM · 1.6B</span><br><br>
+               LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
+               enhanced reasoning &amp; richer visual understanding.`,
         bg: 'rgba(107,203,119,0.07)', border: 'rgba(107,203,119,0.25)',
     },
     qwen_unredacted: {
+        html: `<span class="model-badge qunred">QWEN 3.5 · 2B UNREDACTED MAX</span><br><br>
+               Qwen3.5-2B-Unredacted-MAX by prithivMLmods. Fine-tuned variant of Qwen3.5-2B
+               with uncensored &amp; extended instruction-following capabilities.`,
         bg: 'rgba(255,80,160,0.07)', border: 'rgba(255,80,160,0.25)',
     },
     qwen25_vl_3b: {
+        html: `<span class="model-badge q25vl3b">QWEN 2.5-VL · 3B</span><br><br>
+               Qwen2.5-VL-3B-Instruct by Alibaba Cloud. Powerful 3B vision-language model
+               with strong grounding, OCR &amp; multi-task visual reasoning.`,
         bg: 'rgba(80,180,255,0.07)', border: 'rgba(80,180,255,0.25)',
     },
 };
     Point:   'e.g., The gun held by the person.',
     Detect:  'e.g., The headlight of the car.',
 };
+categorySelect.onchange = e => {
+    promptInput.placeholder = PLACEHOLDERS[e.target.value] || '';
+};
 // ══════════════════════════════════════════════
 //  COPY BUTTON
         </svg> COPY`;
 }
 copyBtn.onclick = () => {
+    const txt = outputBox.innerText || '';
+    if (!txt || txt === 'Results will stream here...') return;
     navigator.clipboard.writeText(txt).then(() => {
         copyBtn.classList.add('copied');
         copyBtn.innerHTML = `
                  stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
                 <polyline points="20 6 9 17 4 12"/>
             </svg> COPIED`;
+        clearTimeout(copyTimer);
+        copyTimer = setTimeout(resetCopyBtn, 2000);
     }).catch(() => {
+        const ta = document.createElement('textarea');
+        ta.value = txt; ta.style.position = 'fixed'; ta.style.opacity = '0';
+        document.body.appendChild(ta); ta.select();
+        document.execCommand('copy'); document.body.removeChild(ta);
     });
 };
+// ══════════════════════════════════════════════
+//  GROUNDING IMAGE  (server-rendered, base64)
+// ══════════════════════════════════════════════
+const groundImg         = document.getElementById('groundImg');
+const groundPlaceholder = document.getElementById('groundPlaceholder');
+const downloadBtn       = document.getElementById('downloadBtn');
+const dotGnd            = document.getElementById('dot-gnd');
+function showGroundingImage(b64DataUri) {
+    groundImg.src = b64DataUri;
+    groundImg.style.display = 'block';
+    groundPlaceholder.style.display = 'none';
+    // Wire up download button
+    const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
+    downloadBtn.href     = b64DataUri;
+    downloadBtn.download = `grounding_${ts}.png`;
+    downloadBtn.style.display = 'flex';
+    dotGnd.classList.add('active');
+}
+function resetGrounding(msg) {
+    groundImg.src = '';
+    groundImg.style.display = 'none';
+    groundPlaceholder.textContent = msg || 'Active for Point / Detect tasks. Run inference to visualise.';
+    groundPlaceholder.style.display = 'flex';
+    downloadBtn.style.display = 'none';
+    dotGnd.classList.remove('active');
+}
 // ══════════════════════════════════════════════
 //  RUN INFERENCE
 // ══════════════════════════════════════════════
 const allWires  = ['wire-img-task','wire-model-task','wire-task-out','wire-task-gnd'];
 const dotTask   = document.getElementById('dot-task');
 const dotOut    = document.getElementById('dot-out');
 runBtn.onclick = async () => {
     if (!currentFile) { alert('Please upload an image into the Input Node.'); return; }
     const promptStr = promptInput.value.trim();
     if (!promptStr)  { alert('Please enter a prompt directive.'); return; }
+    // ── Reset UI ─────────────────────────────────────────
     runBtn.disabled = true;
     btnLoader.style.display = 'inline-block';
     outputBox.innerText = '';
     outputBox.style.color = '';
     dotTask.classList.add('active');
     dotOut.classList.remove('active');
     allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
     resetCopyBtn();
+    const cat = categorySelect.value;
+    if (cat === 'Point' || cat === 'Detect') {
+        resetGrounding('Running inference…');
+    }
+    // ── Build FormData ────────────────────────────────────
     const formData = new FormData();
     formData.append('image',    currentFile);
+    formData.append('category', cat);
     formData.append('prompt',   promptStr);
     formData.append('model_id', modelSelect.value);
     let fullText = '';
     try {
+        // ── 1. Stream inference ───────────────────────────
         const response = await fetch('/api/run', { method: 'POST', body: formData });
         if (!response.ok) {
             const err = await response.json();
         const reader  = response.body.getReader();
         const decoder = new TextDecoder('utf-8');
+        let   buffer  = '';
         while (true) {
             const { value, done } = await reader.read();
             if (done) break;
             buffer += decoder.decode(value, { stream: true });
             const lines = buffer.split('\\n\\n');
+            buffer = lines.pop();          // keep incomplete chunk
             for (const line of lines) {
                 if (!line.startsWith('data: ')) continue;
+                const payload = line.slice(6);
                 if (payload === '[DONE]') break;
                 try {
                     const data = JSON.parse(payload);
         dotOut.classList.add('active');
+        // ── 2. Server-side annotation for Point / Detect ──
         if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
+            resetGrounding('Annotating image…');
+            try {
+                const annForm = new FormData();
+                annForm.append('image',    currentFile);
+                annForm.append('text',     fullText);
+                annForm.append('category', cat);
+                const annResp = await fetch('/api/annotate', {
+                    method: 'POST', body: annForm,
+                });
+                const annData = await annResp.json();
+                if (annData.b64) {
+                    showGroundingImage(annData.b64);
                 } else {
+                    resetGrounding(
+                        annData.error === 'no_json'
+                            ? 'No grounding coordinates found in model output.'
+                            : `Annotation error: ${annData.error || 'unknown'}`
+                    );
                 }
+            } catch (annErr) {
+                resetGrounding(`Annotation failed: ${annErr.message}`);
+            }
         } else if (cat !== 'Point' && cat !== 'Detect') {
+            resetGrounding('Active for Point / Detect tasks. Run inference to visualise.');
         }
     } catch (err) {
         outputBox.innerText = `[Error] ${err.message}`;
         outputBox.style.color = '#ff6b6b';
+        if (cat === 'Point' || cat === 'Detect') {
+            resetGrounding('Inference error — see Output Stream node.');
+        }
     } finally {
         runBtn.disabled = false;
         btnLoader.style.display = 'none';
         dotTask.classList.remove('active');
         allWires.forEach(id => document.getElementById(id)?.classList.remove('active'));
     }
 };
 </script>