prithivMLmods commited on
Commit
323b307
Β·
verified Β·
1 Parent(s): 9ef8327

update app

Browse files
Files changed (1) hide show
  1. app.py +247 -231
app.py CHANGED
@@ -17,6 +17,7 @@ from fastapi import Request, UploadFile, File, Form
17
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
18
  from transformers import (
19
  Qwen3_5ForConditionalGeneration,
 
20
  AutoProcessor,
21
  AutoModelForImageTextToText,
22
  TextIteratorStreamer,
@@ -32,22 +33,41 @@ DTYPE = (
32
  else torch.float16
33
  )
34
 
35
- QWEN_MODEL_NAME = "Qwen/Qwen3.5-2B"
36
- LFM_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
37
- CATEGORIES = ["Query", "Caption", "Point", "Detect"]
38
 
39
- print(f"Loading Qwen model: {QWEN_MODEL_NAME} on {DEVICE}...")
 
40
  try:
41
  qwen_model = Qwen3_5ForConditionalGeneration.from_pretrained(
42
  QWEN_MODEL_NAME, torch_dtype=DTYPE, device_map=DEVICE,
43
  ).eval()
44
  qwen_processor = AutoProcessor.from_pretrained(QWEN_MODEL_NAME)
45
- print("Qwen model loaded successfully.")
46
  except Exception as e:
47
- print(f"Warning: Qwen model loading failed. Error: {e}")
48
  qwen_model = None
49
  qwen_processor = None
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  print(f"Loading LFM model: {LFM_MODEL_NAME} on {DEVICE}...")
52
  try:
53
  lfm_model = AutoModelForImageTextToText.from_pretrained(
@@ -81,7 +101,9 @@ def safe_parse_json(text: str):
81
 
82
  # --- Inference Generator (Streaming) ---
83
  @spaces.GPU(duration=120)
84
- def generate_inference_stream(image: Image.Image, category: str, prompt: str, model_id: str = "qwen"):
 
 
85
  if category == "Query":
86
  full_prompt = prompt
87
  elif category == "Caption":
@@ -93,7 +115,55 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
93
  else:
94
  full_prompt = prompt
95
 
96
- if model_id == "lfm":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  if lfm_model is None or lfm_processor is None:
98
  yield f"data: {json.dumps({'chunk': '[Error] LFM model not loaded.'})}\n\n"
99
  yield "data: [DONE]\n\n"
@@ -104,11 +174,10 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
104
  "role": "user",
105
  "content": [
106
  {"type": "image", "image": image},
107
- {"type": "text", "text": full_prompt},
108
  ],
109
  }
110
  ]
111
-
112
  inputs = lfm_processor.apply_chat_template(
113
  conversation,
114
  add_generation_prompt=True,
@@ -123,7 +192,6 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
123
  skip_special_tokens=True,
124
  timeout=120,
125
  )
126
-
127
  thread = threading.Thread(
128
  target=lfm_model.generate,
129
  kwargs=dict(
@@ -139,9 +207,10 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
139
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
140
  thread.join()
141
 
 
142
  else:
143
  if qwen_model is None or qwen_processor is None:
144
- yield f"data: {json.dumps({'chunk': '[Error] Qwen model not loaded.'})}\n\n"
145
  yield "data: [DONE]\n\n"
146
  return
147
 
@@ -150,15 +219,13 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
150
  "role": "user",
151
  "content": [
152
  {"type": "image", "image": image},
153
- {"type": "text", "text": full_prompt},
154
  ],
155
  }
156
  ]
157
-
158
  text_input = qwen_processor.apply_chat_template(
159
  messages, tokenize=False, add_generation_prompt=True
160
  )
161
-
162
  inputs = qwen_processor(
163
  text=[text_input], images=[image], return_tensors="pt", padding=True
164
  ).to(qwen_model.device)
@@ -169,7 +236,6 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
169
  skip_special_tokens=True,
170
  timeout=120,
171
  )
172
-
173
  thread = threading.Thread(
174
  target=qwen_model.generate,
175
  kwargs=dict(
@@ -193,10 +259,10 @@ def generate_inference_stream(image: Image.Image, category: str, prompt: str, mo
193
  # --- FastAPI Endpoints ---
194
  @app.post("/api/run")
195
  async def run_inference(
196
- image: UploadFile = File(...),
197
- category: str = Form(...),
198
- prompt: str = Form(...),
199
- model_id: str = Form("qwen"),
200
  ):
201
  try:
202
  img_bytes = await image.read()
@@ -205,7 +271,7 @@ async def run_inference(
205
 
206
  return StreamingResponse(
207
  generate_inference_stream(img, category, prompt, model_id),
208
- media_type="text/event-stream"
209
  )
210
  except Exception as e:
211
  return JSONResponse({"error": str(e)}, status_code=500)
@@ -224,18 +290,18 @@ async def homepage(request: Request):
224
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600;700&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
225
  <style>
226
  :root {
227
- --bg: #0d0d0f;
228
- --grid: #1a1a1f;
229
- --node-bg: #13131a;
230
  --node-header: #1c1c26;
231
  --node-border: #2a2a3a;
232
- --accent: #7c6af7;
233
- --accent2: #4ecdc4;
234
- --accent3: #ff6b6b;
235
- --text: #e8e8f0;
236
- --muted: #6b6b8a;
237
- --port: #4ecdc4;
238
- --wire: #2a2a4a;
239
  --wire-active: #7c6af7;
240
  }
241
 
@@ -259,10 +325,10 @@ async def homepage(request: Request):
259
  overflow-y: auto;
260
  }
261
 
 
262
  .top-bar {
263
- position: sticky;
264
- top: 0; left: 0; right: 0;
265
- height: 48px;
266
  background: rgba(13,13,15,0.95);
267
  border-bottom: 1px solid var(--node-border);
268
  display: flex; align-items: center; padding: 0 20px;
@@ -270,21 +336,22 @@ async def homepage(request: Request):
270
  backdrop-filter: blur(12px);
271
  }
272
  .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
273
- .top-bar .sep { color: var(--node-border); }
274
- .top-bar .sub { font-size: 11px; color: var(--muted); }
275
  .top-bar .badge {
276
  margin-left: auto;
277
  background: rgba(124,106,247,0.15);
278
  border: 1px solid rgba(124,106,247,0.3);
279
- padding: 3px 10px;
280
- border-radius: 20px; font-size: 10px; color: var(--accent);
281
  }
282
 
 
283
  #canvas {
284
  position: relative;
285
  width: 1340px;
286
- min-height: calc(100vh - 48px);
287
- height: 1000px;
288
  margin: 0 auto;
289
  }
290
 
@@ -294,7 +361,6 @@ async def homepage(request: Request):
294
  pointer-events: none; z-index: 2;
295
  overflow: visible;
296
  }
297
-
298
  path.wire {
299
  fill: none; stroke: var(--wire); stroke-width: 2.5;
300
  stroke-linecap: round;
@@ -306,28 +372,27 @@ async def homepage(request: Request):
306
  }
307
  @keyframes flow { to { stroke-dashoffset: -24; } }
308
 
309
- /* ─── Nodes ─── */
310
  .node {
311
  position: absolute;
312
  width: 300px;
313
  background: var(--node-bg);
314
  border: 1px solid var(--node-border);
315
  border-radius: 10px;
316
- box-shadow: 0 12px 40px rgba(0,0,0,0.6), 0 0 0 0px rgba(124,106,247,0);
317
  z-index: 10;
318
- display: flex;
319
- flex-direction: column;
320
  transition: box-shadow 0.2s;
321
  }
322
  .node:hover {
323
- box-shadow: 0 12px 40px rgba(0,0,0,0.6), 0 0 0 1px rgba(124,106,247,0.3);
324
  }
325
-
326
- .node.fixed-height { height: 420px; }
327
 
328
  .node-header {
329
  background: var(--node-header);
330
- padding: 10px 14px;
331
  border-bottom: 1px solid var(--node-border);
332
  border-radius: 10px 10px 0 0;
333
  font-size: 11px; font-weight: 700;
@@ -344,27 +409,26 @@ async def homepage(request: Request):
344
  }
345
 
346
  .node-body {
347
- padding: 14px;
348
- display: flex; flex-direction: column; gap: 12px;
349
  flex: 1; overflow: hidden;
350
  }
351
 
352
- /* ─── Ports ─── */
353
  .port {
354
  position: absolute;
355
  width: 12px; height: 12px;
356
  background: var(--node-bg);
357
  border: 2px solid var(--port);
358
- border-radius: 50%;
359
- z-index: 30;
360
  }
361
  .port.out { right: -7px; }
362
- .port.in { left: -7px; }
363
 
364
- /* ─── Form Elements ─── */
365
  label {
366
  font-size: 10px; color: var(--muted);
367
- font-weight: 600; display: block; margin-bottom: 4px;
368
  letter-spacing: 0.08em; text-transform: uppercase;
369
  }
370
 
@@ -372,23 +436,21 @@ async def homepage(request: Request):
372
 
373
  .file-upload {
374
  border: 1.5px dashed var(--node-border);
375
- border-radius: 8px; padding: 16px 12px;
376
  text-align: center; cursor: pointer;
377
  font-size: 11px; color: var(--muted);
378
  transition: border-color 0.2s, background 0.2s;
379
  background: rgba(255,255,255,0.01);
380
- display: flex; flex-direction: column; align-items: center; gap: 8px;
381
  }
382
  .file-upload:hover { border-color: var(--accent); background: rgba(124,106,247,0.04); }
383
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
384
  .file-upload:hover svg { opacity: 0.9; }
385
 
386
  .img-preview {
387
- width: 100%; height: 230px;
388
- object-fit: contain;
389
- border-radius: 6px;
390
- display: none;
391
- background: #000;
392
  border: 1px solid var(--node-border);
393
  }
394
 
@@ -396,7 +458,7 @@ async def homepage(request: Request):
396
  width: 100%;
397
  background: rgba(0,0,0,0.3);
398
  border: 1px solid var(--node-border);
399
- color: var(--text); padding: 9px 11px;
400
  border-radius: 6px; outline: none;
401
  font-size: 12px; font-family: 'JetBrains Mono', monospace;
402
  resize: none; transition: border-color 0.2s;
@@ -407,7 +469,7 @@ async def homepage(request: Request):
407
  button.run-btn {
408
  background: linear-gradient(135deg, var(--accent), #9b59b6);
409
  color: #fff; border: none;
410
- padding: 10px; border-radius: 7px;
411
  font-weight: 700; font-size: 12px;
412
  font-family: 'JetBrains Mono', monospace;
413
  cursor: pointer;
@@ -415,47 +477,39 @@ async def homepage(request: Request):
415
  display: flex; justify-content: center; align-items: center; gap: 8px;
416
  letter-spacing: 0.04em;
417
  }
418
- button.run-btn:hover { opacity: 0.9; }
419
- button.run-btn:active { transform: scale(0.98); }
420
  button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
421
 
422
  .output-box {
423
  background: rgba(0,0,0,0.4);
424
  border: 1px solid var(--node-border);
425
- border-radius: 6px; padding: 12px;
426
- flex: 1;
427
- overflow-y: auto;
428
  font-size: 12px; line-height: 1.6;
429
  color: #c8c8e0; white-space: pre-wrap;
430
  user-select: text;
431
  font-family: 'JetBrains Mono', monospace;
432
  }
433
 
434
- /* Grounding canvas */
435
  .ground-canvas-wrap {
436
  position: relative; flex: 1;
437
  border: 1px solid var(--node-border);
438
  border-radius: 6px; overflow: hidden;
439
- background: #000;
440
- min-height: 0;
441
- }
442
- .ground-canvas-wrap canvas {
443
- width: 100%; height: 100%;
444
- object-fit: contain;
445
- display: block;
446
  }
 
447
  .ground-placeholder {
448
  position: absolute; inset: 0;
449
  display: flex; align-items: center; justify-content: center;
450
- font-size: 11px; color: var(--muted); text-align: center;
451
- padding: 12px;
452
  }
453
 
454
  .loader {
455
  width: 12px; height: 12px;
456
  border: 2px solid rgba(255,255,255,0.3);
457
- border-top-color: #fff;
458
- border-radius: 50%;
459
  animation: spin 0.7s linear infinite;
460
  display: none;
461
  }
@@ -463,8 +517,7 @@ async def homepage(request: Request):
463
 
464
  .status-dot {
465
  width: 7px; height: 7px; border-radius: 50%;
466
- background: var(--muted); display: inline-block;
467
- margin-right: 6px;
468
  }
469
  .status-dot.active { background: var(--accent2); box-shadow: 0 0 6px var(--accent2); }
470
 
@@ -473,13 +526,11 @@ async def homepage(request: Request):
473
  border-radius: 4px; font-size: 9px; font-weight: 700;
474
  letter-spacing: 0.06em; text-transform: uppercase;
475
  }
476
- .model-badge.qwen { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
477
- .model-badge.lfm { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
 
478
 
479
- /* scroll hint at bottom */
480
- .canvas-footer {
481
- height: 40px;
482
- }
483
  </style>
484
  </head>
485
  <body>
@@ -488,7 +539,7 @@ async def homepage(request: Request):
488
  <span class="logo">MULTIMODAL EDGE</span>
489
  <span class="sep">|</span>
490
  <span class="sub">Node-Based Inference Canvas</span>
491
- <span class="badge">v2.0 β€” DUAL MODEL</span>
492
  </div>
493
 
494
  <div id="canvas">
@@ -500,7 +551,7 @@ async def homepage(request: Request):
500
  </svg>
501
 
502
  <!-- ─── ID 01 : Image Input ─── -->
503
- <div class="node fixed-height" id="node-img" style="left:40px; top:60px;">
504
  <div class="node-header">
505
  <span><span class="status-dot" id="dot-img"></span>Input Image</span>
506
  <span class="id">ID: 01</span>
@@ -509,7 +560,9 @@ async def homepage(request: Request):
509
  <div>
510
  <label>Upload Image</label>
511
  <div class="file-upload" id="dropZone">
512
- <svg width="36" height="36" viewBox="0 0 24 24" fill="none" stroke="#7c6af7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
 
 
513
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
514
  <circle cx="8.5" cy="8.5" r="1.5"/>
515
  <polyline points="21 15 16 10 5 21"/>
@@ -520,11 +573,11 @@ async def homepage(request: Request):
520
  <img id="imgPreview" class="img-preview" />
521
  </div>
522
  </div>
523
- <div class="port out" id="port-img-out" style="top:50%; transform:translateY(-50%);"></div>
524
  </div>
525
 
526
  <!-- ─── ID 02 : Model Selector ─── -->
527
- <div class="node fixed-height" id="node-model" style="left:40px; top:500px;">
528
  <div class="node-header">
529
  <span><span class="status-dot" id="dot-model"></span>Model Selector</span>
530
  <span class="id">ID: 02</span>
@@ -534,22 +587,25 @@ async def homepage(request: Request):
534
  <label>Active Model</label>
535
  <select id="modelSelect">
536
  <option value="qwen">Qwen3.5-2B (Vision-Language)</option>
 
537
  <option value="lfm">LFM2.5-VL-450M (LiquidAI)</option>
538
  </select>
539
  </div>
540
- <div id="modelInfoBox" style="background:rgba(124,106,247,0.07); border:1px solid rgba(124,106,247,0.2); border-radius:6px; padding:10px; font-size:10px; color:var(--muted); line-height:1.6;">
541
- <span class="model-badge qwen">QWEN</span>
542
- <br><br>
543
- Qwen3.5 2B parameter multimodal model by Alibaba Cloud. Supports Query, Caption, Point &amp; Detect tasks with streaming output.
 
 
544
  </div>
545
  <div style="flex:1;"></div>
546
  </div>
547
- <div class="port out" id="port-model-out" style="top:50%; transform:translateY(-50%);"></div>
548
  </div>
549
 
550
- <!-- ─── ID 03 : Task Node ─── -->
551
- <div class="node fixed-height" id="node-task" style="left:430px; top:60px;">
552
- <div class="port in" id="port-task-in" style="top:50%; transform:translateY(-50%);"></div>
553
  <div class="node-header">
554
  <span><span class="status-dot" id="dot-task"></span>Task Config</span>
555
  <span class="id">ID: 03</span>
@@ -566,19 +622,20 @@ async def homepage(request: Request):
566
  </div>
567
  <div>
568
  <label>Prompt Directive</label>
569
- <textarea id="promptInput" rows="5" placeholder="e.g., Count the total number of boats and describe the environment."></textarea>
 
570
  </div>
571
  <button class="run-btn" id="runBtn">
572
  <span>Execute</span>
573
  <span class="loader" id="btnLoader"></span>
574
  </button>
575
  </div>
576
- <div class="port out" id="port-task-out" style="top:50%; transform:translateY(-50%);"></div>
577
  </div>
578
 
579
- <!-- ─── ID 04 : Output Node ─── -->
580
- <div class="node fixed-height" id="node-out" style="left:820px; top:60px;">
581
- <div class="port in" id="port-out-in" style="top:50%; transform:translateY(-50%);"></div>
582
  <div class="node-header">
583
  <span><span class="status-dot" id="dot-out"></span>Output Stream</span>
584
  <span class="id">ID: 04</span>
@@ -590,8 +647,8 @@ async def homepage(request: Request):
590
  </div>
591
 
592
  <!-- ─── ID 05 : Grounding Visualiser ─── -->
593
- <div class="node fixed-height" id="node-gnd" style="left:820px; top:500px;">
594
- <div class="port in" id="port-gnd-in" style="top:50%; transform:translateY(-50%);"></div>
595
  <div class="node-header">
596
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
597
  <span class="id">ID: 05</span>
@@ -612,7 +669,7 @@ async def homepage(request: Request):
612
 
613
  <script>
614
  // ══════════════════════════════════════════════
615
- // WIRE DRAWING (relative to #canvas)
616
  // ══════════════════════════════════════════════
617
  const canvasEl = document.getElementById('canvas');
618
 
@@ -651,13 +708,11 @@ function updateWires() {
651
  document.querySelectorAll('.node').forEach(node => {
652
  const header = node.querySelector('.node-header');
653
  let drag = false, sx, sy, il, it;
654
-
655
  header.addEventListener('mousedown', e => {
656
  drag = true; sx = e.clientX; sy = e.clientY;
657
  il = parseInt(node.style.left) || 0;
658
  it = parseInt(node.style.top) || 0;
659
- node.style.zIndex = 100;
660
- e.preventDefault();
661
  });
662
  document.addEventListener('mousemove', e => {
663
  if (!drag) return;
@@ -673,7 +728,7 @@ document.querySelectorAll('.node').forEach(node => {
673
  window.addEventListener('resize', updateWires);
674
  window.addEventListener('scroll', updateWires);
675
  document.addEventListener('scroll', updateWires, true);
676
- requestAnimationFrame(() => { updateWires(); });
677
 
678
  // ══════════════════════════════════════════════
679
  // FILE UPLOAD
@@ -704,7 +759,7 @@ dropZone.ondrop = e => {
704
  };
705
 
706
  // ══════════════════════════════════════════════
707
- // MODEL SELECTOR INFO
708
  // ══════════════════════════════════════════════
709
  const modelSelect = document.getElementById('modelSelect');
710
  const modelInfoBox = document.getElementById('modelInfoBox');
@@ -712,8 +767,18 @@ const dotModel = document.getElementById('dot-model');
712
  dotModel.classList.add('active');
713
 
714
  const MODEL_INFO = {
715
- qwen: `<span class="model-badge qwen">QWEN</span><br><br>Qwen3.5 2B parameter multimodal model by Alibaba Cloud. Supports Query, Caption, Point &amp; Detect tasks with streaming output.`,
716
- lfm: `<span class="model-badge lfm">LFM</span><br><br>LFM2.5-VL 450M parameter vision-language model by LiquidAI. Ultra-lightweight edge model with strong grounding capabilities.`,
 
 
 
 
 
 
 
 
 
 
717
  };
718
 
719
  modelSelect.onchange = () => {
@@ -721,7 +786,7 @@ modelSelect.onchange = () => {
721
  };
722
 
723
  // ══════════════════════════════════════════════
724
- // CATEGORY β†’ PLACEHOLDER
725
  // ══════════════════════════════════════════════
726
  const categorySelect = document.getElementById('categorySelect');
727
  const promptInput = document.getElementById('promptInput');
@@ -736,24 +801,18 @@ categorySelect.onchange = e => {
736
  };
737
 
738
  // ══════════════════════════════════════════════
739
- // JSON PARSER (robust)
740
  // ═════════════════════════════════════���════════
741
  function safeParseJSON(text) {
742
- // Strip markdown fences
743
  text = text.trim()
744
  .replace(/^```(json)?\\s*/i, '')
745
- .replace(/\\s*```$/, '')
746
  .trim();
747
  try { return JSON.parse(text); } catch(_) {}
748
- // Try to extract first JSON array or object
749
- const arrMatch = text.match(/\\[\\s*[\\s\\S]*?\\]/);
750
- if (arrMatch) {
751
- try { return JSON.parse(arrMatch[0]); } catch(_) {}
752
- }
753
  const objMatch = text.match(/\\{[\\s\\S]*?\\}/);
754
- if (objMatch) {
755
- try { return JSON.parse(objMatch[0]); } catch(_) {}
756
- }
757
  return null;
758
  }
759
 
@@ -764,147 +823,107 @@ const groundCanvas = document.getElementById('groundCanvas');
764
  const groundPlaceholder = document.getElementById('groundPlaceholder');
765
  const gCtx = groundCanvas.getContext('2d');
766
 
767
- /*
768
- Handles all common model output formats:
769
- bbox_2d : [x1,y1,x2,y2] β€” pixel or normalised
770
- bbox : [x1,y1,x2,y2]
771
- point_2d: [x,y] β€” pixel or normalised
772
- point : [x,y]
773
- raw arrays of 4 numbers β†’ bbox
774
- raw arrays of 2 numbers β†’ point
775
- */
 
 
 
 
 
 
 
 
 
 
 
 
 
776
  function drawGrounding(imgSrc, jsonText) {
777
  const parsed = safeParseJSON(jsonText);
778
- if (!parsed) {
779
- console.warn('Grounding: could not parse JSON:', jsonText);
780
- return;
781
- }
782
 
783
  const img = new Image();
784
  img.onload = () => {
785
- const W = img.naturalWidth;
786
- const H = img.naturalHeight;
787
  groundCanvas.width = W;
788
  groundCanvas.height = H;
789
  gCtx.drawImage(img, 0, 0);
790
  groundPlaceholder.style.display = 'none';
791
 
792
- const lw = Math.max(2, W / 200);
793
- const fs = Math.max(12, W / 40);
794
  gCtx.lineWidth = lw;
795
  gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
796
 
797
- // Normalise to array of items
798
  const items = Array.isArray(parsed) ? parsed : [parsed];
799
 
800
  items.forEach((item, i) => {
801
- // ── Detect / bbox ──
 
 
802
  let bbox = null;
803
- if (item && item.bbox_2d && Array.isArray(item.bbox_2d) && item.bbox_2d.length === 4) {
804
- bbox = item.bbox_2d;
805
- } else if (item && item.bbox && Array.isArray(item.bbox) && item.bbox.length === 4) {
806
- bbox = item.bbox;
807
- } else if (Array.isArray(item) && item.length === 4 && item.every(n => typeof n === 'number')) {
808
- bbox = item;
809
- }
810
 
811
  if (bbox) {
812
- let [x1, y1, x2, y2] = bbox;
813
- // Auto-detect pixel vs normalised (values > 2 β†’ pixel coords)
814
- const isNorm = x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1;
815
- if (isNorm) { x1*=W; y1*=H; x2*=W; y2*=H; }
816
- const bw = x2 - x1, bh = y2 - y1;
817
- const label = item.label || `${i+1}`;
818
- const colors = [
819
- '#4ecdc4','#7c6af7','#ff6b6b','#ffd93d',
820
- '#6bcb77','#ff922b','#cc5de8','#339af0'
821
- ];
822
- const col = colors[i % colors.length];
823
-
824
- // Fill
825
- gCtx.fillStyle = col.replace(/^#/,'') === col
826
- ? col + '33'
827
- : hexToRgba(col, 0.18);
828
- gCtx.fillRect(x1, y1, bw, bh);
829
 
830
- // Stroke
 
831
  gCtx.strokeStyle = col;
832
  gCtx.strokeRect(x1, y1, bw, bh);
833
 
834
- // Label pill
835
- const textW = gCtx.measureText(label).width;
836
- const ph = fs * 1.4, pw = textW + 10;
837
- const lx = x1, ly = Math.max(0, y1 - ph);
838
  gCtx.fillStyle = col;
839
- roundRect(gCtx, lx, ly, pw, ph, 4);
840
- gCtx.fill();
841
  gCtx.fillStyle = '#fff';
842
- gCtx.fillText(label, lx + 5, ly + ph * 0.75);
843
  return;
844
  }
845
 
846
  // ── Point ──
847
  let pt = null;
848
- if (item && item.point_2d && Array.isArray(item.point_2d) && item.point_2d.length === 2) {
849
- pt = item.point_2d;
850
- } else if (item && item.point && Array.isArray(item.point) && item.point.length === 2) {
851
- pt = item.point;
852
- } else if (Array.isArray(item) && item.length === 2 && item.every(n => typeof n === 'number')) {
853
- pt = item;
854
- }
855
 
856
  if (pt) {
857
- let [x, y] = pt;
858
- const isNorm = x <= 1 && y <= 1;
859
- if (isNorm) { x *= W; y *= H; }
860
- const r = Math.max(8, W / 60);
861
- const col = '#4ecdc4';
862
 
863
- // Outer ring
864
  gCtx.beginPath();
865
- gCtx.arc(x, y, r * 1.6, 0, Math.PI * 2);
866
- gCtx.fillStyle = 'rgba(78,205,196,0.15)';
867
- gCtx.fill();
868
 
869
- // Main dot
870
  gCtx.beginPath();
871
- gCtx.arc(x, y, r, 0, Math.PI * 2);
872
- gCtx.fillStyle = col;
873
- gCtx.fill();
874
- gCtx.strokeStyle = '#fff';
875
- gCtx.stroke();
876
 
877
- // Label
878
- const label = item.label || `${i+1}`;
879
  gCtx.fillStyle = '#fff';
880
- gCtx.fillText(label, x + r + 4, y + fs * 0.4);
881
  }
882
  });
883
  };
884
  img.src = imgSrc;
885
  }
886
 
887
- function hexToRgba(hex, alpha) {
888
- const r = parseInt(hex.slice(1,3),16);
889
- const g = parseInt(hex.slice(3,5),16);
890
- const b = parseInt(hex.slice(5,7),16);
891
- return `rgba(${r},${g},${b},${alpha})`;
892
- }
893
-
894
- function roundRect(ctx, x, y, w, h, r) {
895
- ctx.beginPath();
896
- ctx.moveTo(x + r, y);
897
- ctx.lineTo(x + w - r, y);
898
- ctx.quadraticCurveTo(x + w, y, x + w, y + r);
899
- ctx.lineTo(x + w, y + h - r);
900
- ctx.quadraticCurveTo(x + w, y + h, x + w - r, y + h);
901
- ctx.lineTo(x + r, y + h);
902
- ctx.quadraticCurveTo(x, y + h, x, y + h - r);
903
- ctx.lineTo(x, y + r);
904
- ctx.quadraticCurveTo(x, y, x + r, y);
905
- ctx.closePath();
906
- }
907
-
908
  // ══════════════════════════════════════════════
909
  // RUN INFERENCE
910
  // ══════════════════════════════════════════════
@@ -921,7 +940,6 @@ runBtn.onclick = async () => {
921
  const promptStr = promptInput.value.trim();
922
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
923
 
924
- // UI: running state
925
  runBtn.disabled = true;
926
  btnLoader.style.display = 'inline-block';
927
  outputBox.innerText = '';
@@ -974,8 +992,6 @@ runBtn.onclick = async () => {
974
  }
975
 
976
  dotOut.classList.add('active');
977
-
978
- // Visualise grounding
979
  const cat = categorySelect.value;
980
  if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
981
  dotGnd.classList.add('active');
 
17
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
18
  from transformers import (
19
  Qwen3_5ForConditionalGeneration,
20
+ Qwen3VLForConditionalGeneration,
21
  AutoProcessor,
22
  AutoModelForImageTextToText,
23
  TextIteratorStreamer,
 
33
  else torch.float16
34
  )
35
 
36
+ QWEN_MODEL_NAME = "Qwen/Qwen3.5-2B"
37
+ QWEN_VL_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
38
+ LFM_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
39
 
40
+ # ── Qwen3.5-2B ──────────────────────────────────────────
41
+ print(f"Loading Qwen3.5 model: {QWEN_MODEL_NAME} on {DEVICE}...")
42
  try:
43
  qwen_model = Qwen3_5ForConditionalGeneration.from_pretrained(
44
  QWEN_MODEL_NAME, torch_dtype=DTYPE, device_map=DEVICE,
45
  ).eval()
46
  qwen_processor = AutoProcessor.from_pretrained(QWEN_MODEL_NAME)
47
+ print("Qwen3.5 model loaded successfully.")
48
  except Exception as e:
49
+ print(f"Warning: Qwen3.5 model loading failed. Error: {e}")
50
  qwen_model = None
51
  qwen_processor = None
52
 
53
+ # ── Qwen3-VL-2B-Instruct ────────────────────────────────
54
+ print(f"Loading Qwen3-VL model: {QWEN_VL_MODEL_NAME} on {DEVICE}...")
55
+ try:
56
+ qwen_vl_model = Qwen3VLForConditionalGeneration.from_pretrained(
57
+ QWEN_VL_MODEL_NAME,
58
+ trust_remote_code=True,
59
+ torch_dtype=torch.bfloat16,
60
+ ).to(DEVICE).eval()
61
+ qwen_vl_processor = AutoProcessor.from_pretrained(
62
+ QWEN_VL_MODEL_NAME, trust_remote_code=True
63
+ )
64
+ print("Qwen3-VL model loaded successfully.")
65
+ except Exception as e:
66
+ print(f"Warning: Qwen3-VL model loading failed. Error: {e}")
67
+ qwen_vl_model = None
68
+ qwen_vl_processor = None
69
+
70
+ # ── LFM2.5-VL-450M ──────────────────────────────────────
71
  print(f"Loading LFM model: {LFM_MODEL_NAME} on {DEVICE}...")
72
  try:
73
  lfm_model = AutoModelForImageTextToText.from_pretrained(
 
101
 
102
  # --- Inference Generator (Streaming) ---
103
  @spaces.GPU(duration=120)
104
+ def generate_inference_stream(
105
+ image: Image.Image, category: str, prompt: str, model_id: str = "qwen"
106
+ ):
107
  if category == "Query":
108
  full_prompt = prompt
109
  elif category == "Caption":
 
115
  else:
116
  full_prompt = prompt
117
 
118
+ # ── Qwen3-VL ────────────────────────────────────────
119
+ if model_id == "qwen_vl":
120
+ if qwen_vl_model is None or qwen_vl_processor is None:
121
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL model not loaded.'})}\n\n"
122
+ yield "data: [DONE]\n\n"
123
+ return
124
+
125
+ messages = [
126
+ {
127
+ "role": "user",
128
+ "content": [
129
+ {"type": "image", "image": image},
130
+ {"type": "text", "text": full_prompt},
131
+ ],
132
+ }
133
+ ]
134
+
135
+ text_input = qwen_vl_processor.apply_chat_template(
136
+ messages, tokenize=False, add_generation_prompt=True
137
+ )
138
+ inputs = qwen_vl_processor(
139
+ text=[text_input], images=[image], return_tensors="pt", padding=True
140
+ ).to(qwen_vl_model.device)
141
+
142
+ streamer = TextIteratorStreamer(
143
+ qwen_vl_processor.tokenizer,
144
+ skip_prompt=True,
145
+ skip_special_tokens=True,
146
+ timeout=120,
147
+ )
148
+ thread = threading.Thread(
149
+ target=qwen_vl_model.generate,
150
+ kwargs=dict(
151
+ **inputs,
152
+ streamer=streamer,
153
+ max_new_tokens=1024,
154
+ use_cache=True,
155
+ temperature=1.0,
156
+ do_sample=True,
157
+ ),
158
+ )
159
+ thread.start()
160
+ for tok in streamer:
161
+ if tok:
162
+ yield f"data: {json.dumps({'chunk': tok})}\n\n"
163
+ thread.join()
164
+
165
+ # ── LFM ─────────────────────────────────────────────
166
+ elif model_id == "lfm":
167
  if lfm_model is None or lfm_processor is None:
168
  yield f"data: {json.dumps({'chunk': '[Error] LFM model not loaded.'})}\n\n"
169
  yield "data: [DONE]\n\n"
 
174
  "role": "user",
175
  "content": [
176
  {"type": "image", "image": image},
177
+ {"type": "text", "text": full_prompt},
178
  ],
179
  }
180
  ]
 
181
  inputs = lfm_processor.apply_chat_template(
182
  conversation,
183
  add_generation_prompt=True,
 
192
  skip_special_tokens=True,
193
  timeout=120,
194
  )
 
195
  thread = threading.Thread(
196
  target=lfm_model.generate,
197
  kwargs=dict(
 
207
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
208
  thread.join()
209
 
210
+ # ── Qwen3.5-2B (default) ────────────────────────────
211
  else:
212
  if qwen_model is None or qwen_processor is None:
213
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5 model not loaded.'})}\n\n"
214
  yield "data: [DONE]\n\n"
215
  return
216
 
 
219
  "role": "user",
220
  "content": [
221
  {"type": "image", "image": image},
222
+ {"type": "text", "text": full_prompt},
223
  ],
224
  }
225
  ]
 
226
  text_input = qwen_processor.apply_chat_template(
227
  messages, tokenize=False, add_generation_prompt=True
228
  )
 
229
  inputs = qwen_processor(
230
  text=[text_input], images=[image], return_tensors="pt", padding=True
231
  ).to(qwen_model.device)
 
236
  skip_special_tokens=True,
237
  timeout=120,
238
  )
 
239
  thread = threading.Thread(
240
  target=qwen_model.generate,
241
  kwargs=dict(
 
259
  # --- FastAPI Endpoints ---
260
  @app.post("/api/run")
261
  async def run_inference(
262
+ image: UploadFile = File(...),
263
+ category: str = Form(...),
264
+ prompt: str = Form(...),
265
+ model_id: str = Form("qwen"),
266
  ):
267
  try:
268
  img_bytes = await image.read()
 
271
 
272
  return StreamingResponse(
273
  generate_inference_stream(img, category, prompt, model_id),
274
+ media_type="text/event-stream",
275
  )
276
  except Exception as e:
277
  return JSONResponse({"error": str(e)}, status_code=500)
 
290
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600;700&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
291
  <style>
292
  :root {
293
+ --bg: #0d0d0f;
294
+ --grid: #1a1a1f;
295
+ --node-bg: #13131a;
296
  --node-header: #1c1c26;
297
  --node-border: #2a2a3a;
298
+ --accent: #7c6af7;
299
+ --accent2: #4ecdc4;
300
+ --accent3: #ff6b6b;
301
+ --text: #e8e8f0;
302
+ --muted: #6b6b8a;
303
+ --port: #4ecdc4;
304
+ --wire: #2a2a4a;
305
  --wire-active: #7c6af7;
306
  }
307
 
 
325
  overflow-y: auto;
326
  }
327
 
328
+ /* ── Top Bar ── */
329
  .top-bar {
330
+ position: sticky; top: 0; left: 0; right: 0;
331
+ height: 44px;
 
332
  background: rgba(13,13,15,0.95);
333
  border-bottom: 1px solid var(--node-border);
334
  display: flex; align-items: center; padding: 0 20px;
 
336
  backdrop-filter: blur(12px);
337
  }
338
  .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
339
+ .top-bar .sep { color: var(--node-border); }
340
+ .top-bar .sub { font-size: 11px; color: var(--muted); }
341
  .top-bar .badge {
342
  margin-left: auto;
343
  background: rgba(124,106,247,0.15);
344
  border: 1px solid rgba(124,106,247,0.3);
345
+ padding: 3px 10px; border-radius: 20px;
346
+ font-size: 10px; color: var(--accent);
347
  }
348
 
349
+ /* ── Canvas ── */
350
  #canvas {
351
  position: relative;
352
  width: 1340px;
353
+ min-height: calc(100vh - 44px);
354
+ height: 920px;
355
  margin: 0 auto;
356
  }
357
 
 
361
  pointer-events: none; z-index: 2;
362
  overflow: visible;
363
  }
 
364
  path.wire {
365
  fill: none; stroke: var(--wire); stroke-width: 2.5;
366
  stroke-linecap: round;
 
372
  }
373
  @keyframes flow { to { stroke-dashoffset: -24; } }
374
 
375
+ /* ── Nodes ── */
376
  .node {
377
  position: absolute;
378
  width: 300px;
379
  background: var(--node-bg);
380
  border: 1px solid var(--node-border);
381
  border-radius: 10px;
382
+ box-shadow: 0 10px 32px rgba(0,0,0,0.55), 0 0 0 0px rgba(124,106,247,0);
383
  z-index: 10;
384
+ display: flex; flex-direction: column;
 
385
  transition: box-shadow 0.2s;
386
  }
387
  .node:hover {
388
+ box-shadow: 0 10px 32px rgba(0,0,0,0.55), 0 0 0 1px rgba(124,106,247,0.3);
389
  }
390
+ /* ↓ reduced from 420px β†’ 370px */
391
+ .node.fixed-height { height: 370px; }
392
 
393
  .node-header {
394
  background: var(--node-header);
395
+ padding: 8px 13px;
396
  border-bottom: 1px solid var(--node-border);
397
  border-radius: 10px 10px 0 0;
398
  font-size: 11px; font-weight: 700;
 
409
  }
410
 
411
  .node-body {
412
+ padding: 12px;
413
+ display: flex; flex-direction: column; gap: 10px;
414
  flex: 1; overflow: hidden;
415
  }
416
 
417
+ /* ── Ports ── */
418
  .port {
419
  position: absolute;
420
  width: 12px; height: 12px;
421
  background: var(--node-bg);
422
  border: 2px solid var(--port);
423
+ border-radius: 50%; z-index: 30;
 
424
  }
425
  .port.out { right: -7px; }
426
+ .port.in { left: -7px; }
427
 
428
+ /* ── Labels ── */
429
  label {
430
  font-size: 10px; color: var(--muted);
431
+ font-weight: 600; display: block; margin-bottom: 3px;
432
  letter-spacing: 0.08em; text-transform: uppercase;
433
  }
434
 
 
436
 
437
  .file-upload {
438
  border: 1.5px dashed var(--node-border);
439
+ border-radius: 8px; padding: 14px 10px;
440
  text-align: center; cursor: pointer;
441
  font-size: 11px; color: var(--muted);
442
  transition: border-color 0.2s, background 0.2s;
443
  background: rgba(255,255,255,0.01);
444
+ display: flex; flex-direction: column; align-items: center; gap: 6px;
445
  }
446
  .file-upload:hover { border-color: var(--accent); background: rgba(124,106,247,0.04); }
447
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
448
  .file-upload:hover svg { opacity: 0.9; }
449
 
450
  .img-preview {
451
+ width: 100%; height: 190px;
452
+ object-fit: contain; border-radius: 6px;
453
+ display: none; background: #000;
 
 
454
  border: 1px solid var(--node-border);
455
  }
456
 
 
458
  width: 100%;
459
  background: rgba(0,0,0,0.3);
460
  border: 1px solid var(--node-border);
461
+ color: var(--text); padding: 8px 10px;
462
  border-radius: 6px; outline: none;
463
  font-size: 12px; font-family: 'JetBrains Mono', monospace;
464
  resize: none; transition: border-color 0.2s;
 
469
  button.run-btn {
470
  background: linear-gradient(135deg, var(--accent), #9b59b6);
471
  color: #fff; border: none;
472
+ padding: 9px; border-radius: 7px;
473
  font-weight: 700; font-size: 12px;
474
  font-family: 'JetBrains Mono', monospace;
475
  cursor: pointer;
 
477
  display: flex; justify-content: center; align-items: center; gap: 8px;
478
  letter-spacing: 0.04em;
479
  }
480
+ button.run-btn:hover { opacity: 0.9; }
481
+ button.run-btn:active { transform: scale(0.98); }
482
  button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
483
 
484
  .output-box {
485
  background: rgba(0,0,0,0.4);
486
  border: 1px solid var(--node-border);
487
+ border-radius: 6px; padding: 11px;
488
+ flex: 1; overflow-y: auto;
 
489
  font-size: 12px; line-height: 1.6;
490
  color: #c8c8e0; white-space: pre-wrap;
491
  user-select: text;
492
  font-family: 'JetBrains Mono', monospace;
493
  }
494
 
495
+ /* Grounding */
496
  .ground-canvas-wrap {
497
  position: relative; flex: 1;
498
  border: 1px solid var(--node-border);
499
  border-radius: 6px; overflow: hidden;
500
+ background: #000; min-height: 0;
 
 
 
 
 
 
501
  }
502
+ .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
503
  .ground-placeholder {
504
  position: absolute; inset: 0;
505
  display: flex; align-items: center; justify-content: center;
506
+ font-size: 11px; color: var(--muted); text-align: center; padding: 12px;
 
507
  }
508
 
509
  .loader {
510
  width: 12px; height: 12px;
511
  border: 2px solid rgba(255,255,255,0.3);
512
+ border-top-color: #fff; border-radius: 50%;
 
513
  animation: spin 0.7s linear infinite;
514
  display: none;
515
  }
 
517
 
518
  .status-dot {
519
  width: 7px; height: 7px; border-radius: 50%;
520
+ background: var(--muted); display: inline-block; margin-right: 6px;
 
521
  }
522
  .status-dot.active { background: var(--accent2); box-shadow: 0 0 6px var(--accent2); }
523
 
 
526
  border-radius: 4px; font-size: 9px; font-weight: 700;
527
  letter-spacing: 0.06em; text-transform: uppercase;
528
  }
529
+ .model-badge.qwen { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
530
+ .model-badge.qwen-vl { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
531
+ .model-badge.lfm { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
532
 
533
+ .canvas-footer { height: 40px; }
 
 
 
534
  </style>
535
  </head>
536
  <body>
 
539
  <span class="logo">MULTIMODAL EDGE</span>
540
  <span class="sep">|</span>
541
  <span class="sub">Node-Based Inference Canvas</span>
542
+ <span class="badge">v2.1 β€” TRI MODEL</span>
543
  </div>
544
 
545
  <div id="canvas">
 
551
  </svg>
552
 
553
  <!-- ─── ID 01 : Image Input ─── -->
554
+ <div class="node fixed-height" id="node-img" style="left:40px; top:56px;">
555
  <div class="node-header">
556
  <span><span class="status-dot" id="dot-img"></span>Input Image</span>
557
  <span class="id">ID: 01</span>
 
560
  <div>
561
  <label>Upload Image</label>
562
  <div class="file-upload" id="dropZone">
563
+ <svg width="34" height="34" viewBox="0 0 24 24" fill="none"
564
+ stroke="#7c6af7" stroke-width="1.5"
565
+ stroke-linecap="round" stroke-linejoin="round">
566
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
567
  <circle cx="8.5" cy="8.5" r="1.5"/>
568
  <polyline points="21 15 16 10 5 21"/>
 
573
  <img id="imgPreview" class="img-preview" />
574
  </div>
575
  </div>
576
+ <div class="port out" id="port-img-out" style="top:50%;transform:translateY(-50%);"></div>
577
  </div>
578
 
579
  <!-- ─── ID 02 : Model Selector ─── -->
580
+ <div class="node fixed-height" id="node-model" style="left:40px; top:446px;">
581
  <div class="node-header">
582
  <span><span class="status-dot" id="dot-model"></span>Model Selector</span>
583
  <span class="id">ID: 02</span>
 
587
  <label>Active Model</label>
588
  <select id="modelSelect">
589
  <option value="qwen">Qwen3.5-2B (Vision-Language)</option>
590
+ <option value="qwen_vl">Qwen3-VL-2B-Instruct</option>
591
  <option value="lfm">LFM2.5-VL-450M (LiquidAI)</option>
592
  </select>
593
  </div>
594
+ <div id="modelInfoBox"
595
+ style="background:rgba(124,106,247,0.07);border:1px solid rgba(124,106,247,0.2);
596
+ border-radius:6px;padding:10px;font-size:10px;color:var(--muted);line-height:1.6;">
597
+ <span class="model-badge qwen">QWEN 3.5</span><br><br>
598
+ Qwen3.5 2B parameter multimodal model by Alibaba Cloud.
599
+ Supports Query, Caption, Point &amp; Detect with streaming output.
600
  </div>
601
  <div style="flex:1;"></div>
602
  </div>
603
+ <div class="port out" id="port-model-out" style="top:50%;transform:translateY(-50%);"></div>
604
  </div>
605
 
606
+ <!-- ─── ID 03 : Task Config ─── -->
607
+ <div class="node fixed-height" id="node-task" style="left:430px; top:56px;">
608
+ <div class="port in" id="port-task-in" style="top:50%;transform:translateY(-50%);"></div>
609
  <div class="node-header">
610
  <span><span class="status-dot" id="dot-task"></span>Task Config</span>
611
  <span class="id">ID: 03</span>
 
622
  </div>
623
  <div>
624
  <label>Prompt Directive</label>
625
+ <textarea id="promptInput" rows="4"
626
+ placeholder="e.g., Count the total number of boats and describe the environment."></textarea>
627
  </div>
628
  <button class="run-btn" id="runBtn">
629
  <span>Execute</span>
630
  <span class="loader" id="btnLoader"></span>
631
  </button>
632
  </div>
633
+ <div class="port out" id="port-task-out" style="top:50%;transform:translateY(-50%);"></div>
634
  </div>
635
 
636
+ <!-- ─── ID 04 : Output Stream ─── -->
637
+ <div class="node fixed-height" id="node-out" style="left:820px; top:56px;">
638
+ <div class="port in" id="port-out-in" style="top:50%;transform:translateY(-50%);"></div>
639
  <div class="node-header">
640
  <span><span class="status-dot" id="dot-out"></span>Output Stream</span>
641
  <span class="id">ID: 04</span>
 
647
  </div>
648
 
649
  <!-- ─── ID 05 : Grounding Visualiser ─── -->
650
+ <div class="node fixed-height" id="node-gnd" style="left:820px; top:446px;">
651
+ <div class="port in" id="port-gnd-in" style="top:50%;transform:translateY(-50%);"></div>
652
  <div class="node-header">
653
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
654
  <span class="id">ID: 05</span>
 
669
 
670
  <script>
671
  // ══════════════════════════════════════════════
672
+ // WIRE DRAWING
673
  // ══════════════════════════════════════════════
674
  const canvasEl = document.getElementById('canvas');
675
 
 
708
  document.querySelectorAll('.node').forEach(node => {
709
  const header = node.querySelector('.node-header');
710
  let drag = false, sx, sy, il, it;
 
711
  header.addEventListener('mousedown', e => {
712
  drag = true; sx = e.clientX; sy = e.clientY;
713
  il = parseInt(node.style.left) || 0;
714
  it = parseInt(node.style.top) || 0;
715
+ node.style.zIndex = 100; e.preventDefault();
 
716
  });
717
  document.addEventListener('mousemove', e => {
718
  if (!drag) return;
 
728
  window.addEventListener('resize', updateWires);
729
  window.addEventListener('scroll', updateWires);
730
  document.addEventListener('scroll', updateWires, true);
731
+ requestAnimationFrame(updateWires);
732
 
733
  // ══════════════════════════════════════════════
734
  // FILE UPLOAD
 
759
  };
760
 
761
  // ══════════════════════════════════════════════
762
+ // MODEL SELECTOR
763
  // ══════════════════════════════════════════════
764
  const modelSelect = document.getElementById('modelSelect');
765
  const modelInfoBox = document.getElementById('modelInfoBox');
 
767
  dotModel.classList.add('active');
768
 
769
  const MODEL_INFO = {
770
+ qwen: `
771
+ <span class="model-badge qwen">QWEN 3.5</span><br><br>
772
+ Qwen3.5 2B parameter multimodal model by Alibaba Cloud.
773
+ Supports Query, Caption, Point &amp; Detect with streaming output.`,
774
+ qwen_vl: `
775
+ <span class="model-badge qwen-vl">QWEN3-VL</span><br><br>
776
+ Qwen3-VL-2B-Instruct β€” dedicated vision-language model by Alibaba Cloud.
777
+ Strong spatial grounding, OCR, and instruction-following capabilities.`,
778
+ lfm: `
779
+ <span class="model-badge lfm">LFM</span><br><br>
780
+ LFM2.5-VL 450M parameter vision-language model by LiquidAI.
781
+ Ultra-lightweight edge model with strong grounding capabilities.`,
782
  };
783
 
784
  modelSelect.onchange = () => {
 
786
  };
787
 
788
  // ══════════════════════════════════════════════
789
+ // CATEGORY PLACEHOLDER
790
  // ══════════════════════════════════════════════
791
  const categorySelect = document.getElementById('categorySelect');
792
  const promptInput = document.getElementById('promptInput');
 
801
  };
802
 
803
  // ══════════════════════════════════════════════
804
+ // JSON PARSER
805
  // ═════════════════════════════════════���════════
806
  function safeParseJSON(text) {
 
807
  text = text.trim()
808
  .replace(/^```(json)?\\s*/i, '')
809
+ .replace(/\\s*```$/, '')
810
  .trim();
811
  try { return JSON.parse(text); } catch(_) {}
812
+ const arrMatch = text.match(/\\[[\\s\\S]*?\\]/);
813
+ if (arrMatch) { try { return JSON.parse(arrMatch[0]); } catch(_) {} }
 
 
 
814
  const objMatch = text.match(/\\{[\\s\\S]*?\\}/);
815
+ if (objMatch) { try { return JSON.parse(objMatch[0]); } catch(_) {} }
 
 
816
  return null;
817
  }
818
 
 
823
  const groundPlaceholder = document.getElementById('groundPlaceholder');
824
  const gCtx = groundCanvas.getContext('2d');
825
 
826
+ const PALETTE = [
827
+ '#4ecdc4','#7c6af7','#ff6b6b','#ffd93d',
828
+ '#6bcb77','#ff922b','#cc5de8','#339af0'
829
+ ];
830
+
831
+ function hexToRgba(hex, alpha) {
832
+ const r = parseInt(hex.slice(1,3),16);
833
+ const g = parseInt(hex.slice(3,5),16);
834
+ const b = parseInt(hex.slice(5,7),16);
835
+ return `rgba(${r},${g},${b},${alpha})`;
836
+ }
837
+
838
+ function roundRect(ctx, x, y, w, h, r) {
839
+ ctx.beginPath();
840
+ ctx.moveTo(x+r, y);
841
+ ctx.lineTo(x+w-r, y); ctx.quadraticCurveTo(x+w, y, x+w, y+r);
842
+ ctx.lineTo(x+w, y+h-r); ctx.quadraticCurveTo(x+w, y+h, x+w-r, y+h);
843
+ ctx.lineTo(x+r, y+h); ctx.quadraticCurveTo(x, y+h, x, y+h-r);
844
+ ctx.lineTo(x, y+r); ctx.quadraticCurveTo(x, y, x+r, y);
845
+ ctx.closePath();
846
+ }
847
+
848
  function drawGrounding(imgSrc, jsonText) {
849
  const parsed = safeParseJSON(jsonText);
850
+ if (!parsed) { console.warn('Grounding: could not parse JSON:', jsonText); return; }
 
 
 
851
 
852
  const img = new Image();
853
  img.onload = () => {
854
+ const W = img.naturalWidth, H = img.naturalHeight;
 
855
  groundCanvas.width = W;
856
  groundCanvas.height = H;
857
  gCtx.drawImage(img, 0, 0);
858
  groundPlaceholder.style.display = 'none';
859
 
860
+ const lw = Math.max(2, W/200);
861
+ const fs = Math.max(12, W/40);
862
  gCtx.lineWidth = lw;
863
  gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
864
 
 
865
  const items = Array.isArray(parsed) ? parsed : [parsed];
866
 
867
  items.forEach((item, i) => {
868
+ const col = PALETTE[i % PALETTE.length];
869
+
870
+ // ── Bounding box ──
871
  let bbox = null;
872
+ if (item?.bbox_2d?.length === 4) bbox = item.bbox_2d;
873
+ else if (item?.bbox?.length === 4) bbox = item.bbox;
874
+ else if (Array.isArray(item) && item.length === 4 && item.every(n => typeof n === 'number')) bbox = item;
 
 
 
 
875
 
876
  if (bbox) {
877
+ let [x1,y1,x2,y2] = bbox;
878
+ // Auto-detect normalised vs pixel
879
+ if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) { x1*=W; y1*=H; x2*=W; y2*=H; }
880
+ const bw = x2-x1, bh = y2-y1;
881
+ const label = item?.label || `${i+1}`;
 
 
 
 
 
 
 
 
 
 
 
 
882
 
883
+ gCtx.fillStyle = hexToRgba(col, 0.18);
884
+ gCtx.fillRect(x1, y1, bw, bh);
885
  gCtx.strokeStyle = col;
886
  gCtx.strokeRect(x1, y1, bw, bh);
887
 
888
+ const tw = gCtx.measureText(label).width;
889
+ const ph = fs * 1.4, pw = tw + 10;
890
+ const lx = x1, ly = Math.max(0, y1 - ph);
 
891
  gCtx.fillStyle = col;
892
+ roundRect(gCtx, lx, ly, pw, ph, 4); gCtx.fill();
 
893
  gCtx.fillStyle = '#fff';
894
+ gCtx.fillText(label, lx+5, ly + ph*0.76);
895
  return;
896
  }
897
 
898
  // ── Point ──
899
  let pt = null;
900
+ if (item?.point_2d?.length === 2) pt = item.point_2d;
901
+ else if (item?.point?.length === 2) pt = item.point;
902
+ else if (Array.isArray(item) && item.length === 2 && item.every(n => typeof n === 'number')) pt = item;
 
 
 
 
903
 
904
  if (pt) {
905
+ let [x,y] = pt;
906
+ if (x <= 1 && y <= 1) { x*=W; y*=H; }
907
+ const r = Math.max(8, W/60);
908
+ const label = item?.label || `${i+1}`;
 
909
 
 
910
  gCtx.beginPath();
911
+ gCtx.arc(x, y, r*1.6, 0, Math.PI*2);
912
+ gCtx.fillStyle = hexToRgba(col, 0.15); gCtx.fill();
 
913
 
 
914
  gCtx.beginPath();
915
+ gCtx.arc(x, y, r, 0, Math.PI*2);
916
+ gCtx.fillStyle = col; gCtx.fill();
917
+ gCtx.strokeStyle = '#fff'; gCtx.stroke();
 
 
918
 
 
 
919
  gCtx.fillStyle = '#fff';
920
+ gCtx.fillText(label, x + r + 4, y + fs*0.4);
921
  }
922
  });
923
  };
924
  img.src = imgSrc;
925
  }
926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
927
  // ══════════════════════════════════════════════
928
  // RUN INFERENCE
929
  // ══════════════════════════════════════════════
 
940
  const promptStr = promptInput.value.trim();
941
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
942
 
 
943
  runBtn.disabled = true;
944
  btnLoader.style.display = 'inline-block';
945
  outputBox.innerText = '';
 
992
  }
993
 
994
  dotOut.classList.add('active');
 
 
995
  const cat = categorySelect.value;
996
  if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
997
  dotGnd.classList.add('active');