prithivMLmods commited on
Commit
3ce64c9
Β·
verified Β·
1 Parent(s): d750f5a

update app

Browse files
Files changed (1) hide show
  1. app.py +142 -82
app.py CHANGED
@@ -33,23 +33,37 @@ DTYPE = (
33
  else torch.float16
34
  )
35
 
36
- QWEN_MODEL_NAME = "Qwen/Qwen3.5-2B"
 
37
  QWEN_VL_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
38
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
39
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # ── Qwen3.5-2B ──────────────────────────────────────────
42
- print(f"Loading Qwen3.5 model: {QWEN_MODEL_NAME} on {DEVICE}...")
43
  try:
44
- qwen_model = Qwen3_5ForConditionalGeneration.from_pretrained(
45
- QWEN_MODEL_NAME, torch_dtype=DTYPE, device_map=DEVICE,
46
  ).eval()
47
- qwen_processor = AutoProcessor.from_pretrained(QWEN_MODEL_NAME)
48
- print("Qwen3.5 model loaded successfully.")
49
  except Exception as e:
50
- print(f"Warning: Qwen3.5 model loading failed. Error: {e}")
51
- qwen_model = None
52
- qwen_processor = None
53
 
54
  # ── Qwen3-VL-2B-Instruct ────────────────────────────────
55
  print(f"Loading Qwen3-VL model: {QWEN_VL_MODEL_NAME} on {DEVICE}...")
@@ -118,7 +132,7 @@ def safe_parse_json(text: str):
118
  # --- Inference Generator (Streaming) ---
119
  @spaces.GPU(duration=120)
120
  def generate_inference_stream(
121
- image: Image.Image, category: str, prompt: str, model_id: str = "qwen"
122
  ):
123
  if category == "Query":
124
  full_prompt = prompt
@@ -131,8 +145,72 @@ def generate_inference_stream(
131
  else:
132
  full_prompt = prompt
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # ── Qwen3-VL ────────────────────────────────────────
135
- if model_id == "qwen_vl":
136
  if qwen_vl_model is None or qwen_vl_processor is None:
137
  yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL model not loaded.'})}\n\n"
138
  yield "data: [DONE]\n\n"
@@ -221,38 +299,6 @@ def generate_inference_stream(
221
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
222
  thread.join()
223
 
224
- # ── Qwen3.5-2B (default) ────────────────────────────
225
- else:
226
- if qwen_model is None or qwen_processor is None:
227
- yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5 model not loaded.'})}\n\n"
228
- yield "data: [DONE]\n\n"
229
- return
230
-
231
- messages = [{"role": "user", "content": [
232
- {"type": "image", "image": image},
233
- {"type": "text", "text": full_prompt},
234
- ]}]
235
- text_input = qwen_processor.apply_chat_template(
236
- messages, tokenize=False, add_generation_prompt=True
237
- )
238
- inputs = qwen_processor(
239
- text=[text_input], images=[image], return_tensors="pt", padding=True
240
- ).to(qwen_model.device)
241
- streamer = TextIteratorStreamer(
242
- qwen_processor.tokenizer,
243
- skip_prompt=True, skip_special_tokens=True, timeout=120,
244
- )
245
- thread = threading.Thread(
246
- target=qwen_model.generate,
247
- kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
248
- use_cache=True, temperature=1.5, min_p=0.1),
249
- )
250
- thread.start()
251
- for tok in streamer:
252
- if tok:
253
- yield f"data: {json.dumps({'chunk': tok})}\n\n"
254
- thread.join()
255
-
256
  yield "data: [DONE]\n\n"
257
 
258
 
@@ -262,7 +308,7 @@ async def run_inference(
262
  image: UploadFile = File(...),
263
  category: str = Form(...),
264
  prompt: str = Form(...),
265
- model_id: str = Form("qwen"),
266
  ):
267
  try:
268
  img_bytes = await image.read()
@@ -348,9 +394,9 @@ async def homepage(request: Request):
348
  /* ── Canvas ── */
349
  #canvas {
350
  position: relative;
351
- width: 1340px;
352
  min-height: calc(100vh - 42px);
353
- height: 880px;
354
  margin: 0 auto;
355
  }
356
 
@@ -378,7 +424,7 @@ async def homepage(request: Request):
378
  background: var(--node-bg);
379
  border: 1px solid var(--node-border);
380
  border-radius: 9px;
381
- box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 0px rgba(124,106,247,0);
382
  z-index: 10;
383
  display: flex; flex-direction: column;
384
  transition: box-shadow 0.2s;
@@ -386,7 +432,6 @@ async def homepage(request: Request):
386
  .node:hover {
387
  box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3);
388
  }
389
- /* ── reduced to 330px ── */
390
  .node.fixed-height { height: 330px; }
391
 
392
  .node-header {
@@ -474,7 +519,7 @@ async def homepage(request: Request):
474
  cursor: pointer;
475
  transition: opacity 0.2s, transform 0.1s;
476
  display: flex; justify-content: center; align-items: center; gap: 8px;
477
- letter-spacing: 0.04em;
478
  }
479
  button.run-btn:hover { opacity: 0.9; }
480
  button.run-btn:active { transform: scale(0.98); }
@@ -526,15 +571,17 @@ async def homepage(request: Request):
526
  box-shadow: 0 0 5px var(--accent2);
527
  }
528
 
 
529
  .model-badge {
530
  display: inline-block; padding: 2px 7px;
531
  border-radius: 4px; font-size: 9px; font-weight: 700;
532
  letter-spacing: 0.06em; text-transform: uppercase;
533
  }
534
- .model-badge.qwen { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
535
- .model-badge.qwen-vl { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
536
- .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
537
- .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
 
538
 
539
  .model-info-box {
540
  border-radius: 6px; padding: 9px;
@@ -551,7 +598,7 @@ async def homepage(request: Request):
551
  <span class="logo">MULTIMODAL EDGE</span>
552
  <span class="sep">|</span>
553
  <span class="sub">Node-Based Inference Canvas</span>
554
- <span class="badge">v2.2 β€” QUAD MODEL</span>
555
  </div>
556
 
557
  <div id="canvas">
@@ -598,17 +645,18 @@ async def homepage(request: Request):
598
  <div>
599
  <label>Active Model</label>
600
  <select id="modelSelect">
601
- <option value="qwen">Qwen3.5-2B</option>
 
602
  <option value="qwen_vl">Qwen3-VL-2B-Instruct</option>
603
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
604
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
605
  </select>
606
  </div>
607
  <div id="modelInfoBox" class="model-info-box"
608
- style="background:rgba(124,106,247,0.07);border:1px solid rgba(124,106,247,0.2);">
609
- <span class="model-badge qwen">QWEN 3.5</span><br><br>
610
- Qwen3.5 2B multimodal model by Alibaba Cloud.
611
- Supports Query, Caption, Point &amp; Detect with streaming output.
612
  </div>
613
  <div style="flex:1;"></div>
614
  </div>
@@ -779,38 +827,50 @@ const dotModel = document.getElementById('dot-model');
779
  dotModel.classList.add('active');
780
 
781
  const MODEL_INFO = {
782
- qwen: {
783
- html: `<span class="model-badge qwen">QWEN 3.5</span><br><br>
784
- Qwen3.5 2B multimodal model by Alibaba Cloud.
785
- Supports Query, Caption, Point &amp; Detect with streaming output.`,
786
- bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.2)'
 
 
 
 
 
 
 
 
787
  },
788
  qwen_vl: {
789
- html: `<span class="model-badge qwen-vl">QWEN3-VL</span><br><br>
790
- Qwen3-VL-2B-Instruct β€” dedicated vision-language model by Alibaba Cloud.
791
- Strong spatial grounding, OCR &amp; instruction-following.`,
792
- bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.25)'
 
793
  },
794
  lfm_450: {
795
- html: `<span class="model-badge lfm450">LFM 450M</span><br><br>
796
- LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
797
- with strong grounding capabilities.`,
798
- bg: 'rgba(78,205,196,0.07)', border: 'rgba(78,205,196,0.2)'
 
799
  },
800
  lfm_16: {
801
- html: `<span class="model-badge lfm16">LFM 1.6B</span><br><br>
802
- LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
803
- enhanced reasoning and richer visual understanding.`,
804
- bg: 'rgba(107,203,119,0.07)', border: 'rgba(107,203,119,0.25)'
 
805
  },
806
  };
807
 
808
  modelSelect.onchange = () => {
809
  const info = MODEL_INFO[modelSelect.value];
810
  if (!info) return;
811
- modelInfoBox.innerHTML = info.html;
812
- modelInfoBox.style.background = info.bg;
813
- modelInfoBox.style.borderColor = info.border;
 
814
  };
815
 
816
  // ══════════════════════════════════════════════
@@ -897,7 +957,7 @@ function drawGrounding(imgSrc, jsonText) {
897
 
898
  // ── Bounding box ──
899
  let bbox = null;
900
- if (item?.bbox_2d?.length === 4) bbox = item.bbox_2d;
901
  else if (item?.bbox?.length === 4) bbox = item.bbox;
902
  else if (Array.isArray(item) && item.length === 4 &&
903
  item.every(n => typeof n === 'number')) bbox = item;
@@ -907,7 +967,7 @@ function drawGrounding(imgSrc, jsonText) {
907
  if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
908
  x1*=W; y1*=H; x2*=W; y2*=H;
909
  }
910
- const bw = x2-x1, bh = y2-y1;
911
  const lbl = item?.label || `${i+1}`;
912
 
913
  gCtx.fillStyle = hexToRgba(col, 0.18);
@@ -927,7 +987,7 @@ function drawGrounding(imgSrc, jsonText) {
927
 
928
  // ── Point ──
929
  let pt = null;
930
- if (item?.point_2d?.length === 2) pt = item.point_2d;
931
  else if (item?.point?.length === 2) pt = item.point;
932
  else if (Array.isArray(item) && item.length === 2 &&
933
  item.every(n => typeof n === 'number')) pt = item;
 
33
  else torch.float16
34
  )
35
 
36
+ QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
37
+ QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
38
  QWEN_VL_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
39
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
40
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
41
 
42
+ # ── Qwen3.5-4B ──────────────────────────────────────────
43
+ print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
44
+ try:
45
+ qwen_4b_model = Qwen3_5ForConditionalGeneration.from_pretrained(
46
+ QWEN_4B_MODEL_NAME, torch_dtype=DTYPE, device_map=DEVICE,
47
+ ).eval()
48
+ qwen_4b_processor = AutoProcessor.from_pretrained(QWEN_4B_MODEL_NAME)
49
+ print("Qwen3.5-4B model loaded successfully.")
50
+ except Exception as e:
51
+ print(f"Warning: Qwen3.5-4B model loading failed. Error: {e}")
52
+ qwen_4b_model = None
53
+ qwen_4b_processor = None
54
+
55
  # ── Qwen3.5-2B ──────────────────────────────────────────
56
+ print(f"Loading Qwen3.5-2B model: {QWEN_2B_MODEL_NAME} on {DEVICE}...")
57
  try:
58
+ qwen_2b_model = Qwen3_5ForConditionalGeneration.from_pretrained(
59
+ QWEN_2B_MODEL_NAME, torch_dtype=DTYPE, device_map=DEVICE,
60
  ).eval()
61
+ qwen_2b_processor = AutoProcessor.from_pretrained(QWEN_2B_MODEL_NAME)
62
+ print("Qwen3.5-2B model loaded successfully.")
63
  except Exception as e:
64
+ print(f"Warning: Qwen3.5-2B model loading failed. Error: {e}")
65
+ qwen_2b_model = None
66
+ qwen_2b_processor = None
67
 
68
  # ── Qwen3-VL-2B-Instruct ────────────────────────────────
69
  print(f"Loading Qwen3-VL model: {QWEN_VL_MODEL_NAME} on {DEVICE}...")
 
132
  # --- Inference Generator (Streaming) ---
133
  @spaces.GPU(duration=120)
134
  def generate_inference_stream(
135
+ image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b"
136
  ):
137
  if category == "Query":
138
  full_prompt = prompt
 
145
  else:
146
  full_prompt = prompt
147
 
148
+ # ── Qwen3.5-4B ──────────────────────────────────────
149
+ if model_id == "qwen_4b":
150
+ if qwen_4b_model is None or qwen_4b_processor is None:
151
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
152
+ yield "data: [DONE]\n\n"
153
+ return
154
+
155
+ messages = [{"role": "user", "content": [
156
+ {"type": "image", "image": image},
157
+ {"type": "text", "text": full_prompt},
158
+ ]}]
159
+ text_input = qwen_4b_processor.apply_chat_template(
160
+ messages, tokenize=False, add_generation_prompt=True
161
+ )
162
+ inputs = qwen_4b_processor(
163
+ text=[text_input], images=[image], return_tensors="pt", padding=True
164
+ ).to(qwen_4b_model.device)
165
+ streamer = TextIteratorStreamer(
166
+ qwen_4b_processor.tokenizer,
167
+ skip_prompt=True, skip_special_tokens=True, timeout=120,
168
+ )
169
+ thread = threading.Thread(
170
+ target=qwen_4b_model.generate,
171
+ kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
172
+ use_cache=True, temperature=1.5, min_p=0.1),
173
+ )
174
+ thread.start()
175
+ for tok in streamer:
176
+ if tok:
177
+ yield f"data: {json.dumps({'chunk': tok})}\n\n"
178
+ thread.join()
179
+
180
+ # ── Qwen3.5-2B ──────────────────────────────────────
181
+ elif model_id == "qwen_2b":
182
+ if qwen_2b_model is None or qwen_2b_processor is None:
183
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-2B model not loaded.'})}\n\n"
184
+ yield "data: [DONE]\n\n"
185
+ return
186
+
187
+ messages = [{"role": "user", "content": [
188
+ {"type": "image", "image": image},
189
+ {"type": "text", "text": full_prompt},
190
+ ]}]
191
+ text_input = qwen_2b_processor.apply_chat_template(
192
+ messages, tokenize=False, add_generation_prompt=True
193
+ )
194
+ inputs = qwen_2b_processor(
195
+ text=[text_input], images=[image], return_tensors="pt", padding=True
196
+ ).to(qwen_2b_model.device)
197
+ streamer = TextIteratorStreamer(
198
+ qwen_2b_processor.tokenizer,
199
+ skip_prompt=True, skip_special_tokens=True, timeout=120,
200
+ )
201
+ thread = threading.Thread(
202
+ target=qwen_2b_model.generate,
203
+ kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
204
+ use_cache=True, temperature=1.5, min_p=0.1),
205
+ )
206
+ thread.start()
207
+ for tok in streamer:
208
+ if tok:
209
+ yield f"data: {json.dumps({'chunk': tok})}\n\n"
210
+ thread.join()
211
+
212
  # ── Qwen3-VL ────────────────────────────────────────
213
+ elif model_id == "qwen_vl":
214
  if qwen_vl_model is None or qwen_vl_processor is None:
215
  yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL model not loaded.'})}\n\n"
216
  yield "data: [DONE]\n\n"
 
299
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
300
  thread.join()
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  yield "data: [DONE]\n\n"
303
 
304
 
 
308
  image: UploadFile = File(...),
309
  category: str = Form(...),
310
  prompt: str = Form(...),
311
+ model_id: str = Form("qwen_4b"),
312
  ):
313
  try:
314
  img_bytes = await image.read()
 
394
  /* ── Canvas ── */
395
  #canvas {
396
  position: relative;
397
+ width: 1360px;
398
  min-height: calc(100vh - 42px);
399
+ height: 900px;
400
  margin: 0 auto;
401
  }
402
 
 
424
  background: var(--node-bg);
425
  border: 1px solid var(--node-border);
426
  border-radius: 9px;
427
+ box-shadow: 0 8px 28px rgba(0,0,0,0.5);
428
  z-index: 10;
429
  display: flex; flex-direction: column;
430
  transition: box-shadow 0.2s;
 
432
  .node:hover {
433
  box-shadow: 0 8px 28px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3);
434
  }
 
435
  .node.fixed-height { height: 330px; }
436
 
437
  .node-header {
 
519
  cursor: pointer;
520
  transition: opacity 0.2s, transform 0.1s;
521
  display: flex; justify-content: center; align-items: center; gap: 8px;
522
+ letter-spacing: 0.04em; flex-shrink: 0;
523
  }
524
  button.run-btn:hover { opacity: 0.9; }
525
  button.run-btn:active { transform: scale(0.98); }
 
571
  box-shadow: 0 0 5px var(--accent2);
572
  }
573
 
574
+ /* ── Model badges ── */
575
  .model-badge {
576
  display: inline-block; padding: 2px 7px;
577
  border-radius: 4px; font-size: 9px; font-weight: 700;
578
  letter-spacing: 0.06em; text-transform: uppercase;
579
  }
580
+ .model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
581
+ .model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
582
+ .model-badge.qvl { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
583
+ .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
584
+ .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
585
 
586
  .model-info-box {
587
  border-radius: 6px; padding: 9px;
 
598
  <span class="logo">MULTIMODAL EDGE</span>
599
  <span class="sep">|</span>
600
  <span class="sub">Node-Based Inference Canvas</span>
601
+ <span class="badge">v2.3 β€” PENTA MODEL</span>
602
  </div>
603
 
604
  <div id="canvas">
 
645
  <div>
646
  <label>Active Model</label>
647
  <select id="modelSelect">
648
+ <option value="qwen_4b">Qwen3.5-4B</option>
649
+ <option value="qwen_2b">Qwen3.5-2B</option>
650
  <option value="qwen_vl">Qwen3-VL-2B-Instruct</option>
651
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
652
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
653
  </select>
654
  </div>
655
  <div id="modelInfoBox" class="model-info-box"
656
+ style="background:rgba(255,200,80,0.07);border:1px solid rgba(255,200,80,0.3);">
657
+ <span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
658
+ Qwen3.5 4B multimodal model by Alibaba Cloud.
659
+ Enhanced capacity over 2B β€” richer reasoning, better instruction following.
660
  </div>
661
  <div style="flex:1;"></div>
662
  </div>
 
827
  dotModel.classList.add('active');
828
 
829
  const MODEL_INFO = {
830
+ qwen_4b: {
831
+ html: `<span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
832
+ Qwen3.5 4B multimodal model by Alibaba Cloud.
833
+ Enhanced capacity over 2B β€” richer reasoning &amp; better instruction following.`,
834
+ bg: 'rgba(255,200,80,0.07)',
835
+ border: 'rgba(255,200,80,0.30)',
836
+ },
837
+ qwen_2b: {
838
+ html: `<span class="model-badge q2b">QWEN 3.5 Β· 2B</span><br><br>
839
+ Qwen3.5 2B multimodal model by Alibaba Cloud.
840
+ Lightweight &amp; fast β€” ideal for quick Query, Caption, Point &amp; Detect tasks.`,
841
+ bg: 'rgba(124,106,247,0.07)',
842
+ border: 'rgba(124,106,247,0.25)',
843
  },
844
  qwen_vl: {
845
+ html: `<span class="model-badge qvl">QWEN3-VL Β· 2B</span><br><br>
846
+ Qwen3-VL-2B-Instruct β€” dedicated vision-language model by Alibaba Cloud.
847
+ Strong spatial grounding, OCR &amp; instruction-following.`,
848
+ bg: 'rgba(255,150,50,0.07)',
849
+ border: 'rgba(255,150,50,0.25)',
850
  },
851
  lfm_450: {
852
+ html: `<span class="model-badge lfm450">LFM Β· 450M</span><br><br>
853
+ LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
854
+ with solid grounding capabilities.`,
855
+ bg: 'rgba(78,205,196,0.07)',
856
+ border: 'rgba(78,205,196,0.25)',
857
  },
858
  lfm_16: {
859
+ html: `<span class="model-badge lfm16">LFM Β· 1.6B</span><br><br>
860
+ LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
861
+ enhanced reasoning &amp; richer visual understanding.`,
862
+ bg: 'rgba(107,203,119,0.07)',
863
+ border: 'rgba(107,203,119,0.25)',
864
  },
865
  };
866
 
867
  modelSelect.onchange = () => {
868
  const info = MODEL_INFO[modelSelect.value];
869
  if (!info) return;
870
+ modelInfoBox.innerHTML = info.html;
871
+ modelInfoBox.style.background = info.bg;
872
+ modelInfoBox.style.borderColor = info.border;
873
+ modelInfoBox.style.border = `1px solid ${info.border}`;
874
  };
875
 
876
  // ══════════════════════════════════════════════
 
957
 
958
  // ── Bounding box ──
959
  let bbox = null;
960
+ if (item?.bbox_2d?.length === 4) bbox = item.bbox_2d;
961
  else if (item?.bbox?.length === 4) bbox = item.bbox;
962
  else if (Array.isArray(item) && item.length === 4 &&
963
  item.every(n => typeof n === 'number')) bbox = item;
 
967
  if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
968
  x1*=W; y1*=H; x2*=W; y2*=H;
969
  }
970
+ const bw = x2-x1, bh = y2-y1;
971
  const lbl = item?.label || `${i+1}`;
972
 
973
  gCtx.fillStyle = hexToRgba(col, 0.18);
 
987
 
988
  // ── Point ──
989
  let pt = null;
990
+ if (item?.point_2d?.length === 2) pt = item.point_2d;
991
  else if (item?.point?.length === 2) pt = item.point;
992
  else if (Array.isArray(item) && item.length === 2 &&
993
  item.every(n => typeof n === 'number')) pt = item;