prithivMLmods commited on
Commit
9013c20
Β·
verified Β·
1 Parent(s): 237d494

update app

Browse files
Files changed (1) hide show
  1. app.py +210 -288
app.py CHANGED
@@ -425,10 +425,7 @@ def generate_inference_stream(
425
  messages, tokenize=False, add_generation_prompt=True
426
  )
427
  inputs = gemma4_e2b_processor(
428
- text=[text_input],
429
- images=[image],
430
- return_tensors="pt",
431
- padding=True,
432
  ).to(gemma4_e2b_model.device)
433
  streamer = TextIteratorStreamer(
434
  gemma4_e2b_processor.tokenizer,
@@ -590,16 +587,13 @@ async def homepage(request: Request):
590
  --wire: #2a2a4a;
591
  --wire-active: #7c6af7;
592
  }
593
-
594
  * { box-sizing: border-box; margin: 0; padding: 0; }
595
-
596
  html, body {
597
  min-height: 100%;
598
  background: var(--bg);
599
  color: var(--text);
600
  font-family: 'JetBrains Mono', monospace;
601
  }
602
-
603
  body {
604
  background-image:
605
  radial-gradient(circle at 20% 50%, rgba(124,106,247,0.04) 0%, transparent 50%),
@@ -607,10 +601,8 @@ async def homepage(request: Request):
607
  linear-gradient(var(--grid) 1px, transparent 1px),
608
  linear-gradient(90deg, var(--grid) 1px, transparent 1px);
609
  background-size: 100% 100%, 100% 100%, 24px 24px, 24px 24px;
610
- overflow-x: auto;
611
- overflow-y: auto;
612
  }
613
-
614
  /* ── Top Bar ── */
615
  .top-bar {
616
  position: sticky; top: 0; left: 0; right: 0;
@@ -621,9 +613,9 @@ async def homepage(request: Request):
621
  gap: 12px; z-index: 1000;
622
  backdrop-filter: blur(12px);
623
  }
624
- .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
625
- .top-bar .sep { color: var(--node-border); }
626
- .top-bar .sub { font-size: 11px; color: var(--muted); }
627
  .top-bar .badge {
628
  margin-left: auto;
629
  background: rgba(124,106,247,0.15);
@@ -631,7 +623,6 @@ async def homepage(request: Request):
631
  padding: 3px 10px; border-radius: 20px;
632
  font-size: 10px; color: var(--accent);
633
  }
634
-
635
  /* ── Canvas ── */
636
  #canvas {
637
  position: relative;
@@ -640,7 +631,6 @@ async def homepage(request: Request):
640
  height: 900px;
641
  margin: 0 auto;
642
  }
643
-
644
  svg.wires {
645
  position: absolute; top: 0; left: 0;
646
  width: 100%; height: 100%;
@@ -657,17 +647,14 @@ async def homepage(request: Request):
657
  animation: flow 0.6s linear infinite;
658
  }
659
  @keyframes flow { to { stroke-dashoffset: -24; } }
660
-
661
  /* ── Nodes ── */
662
  .node {
663
- position: absolute;
664
- width: 295px;
665
  background: var(--node-bg);
666
  border: 1px solid var(--node-border);
667
  border-radius: 9px;
668
  box-shadow: 0 8px 28px rgba(0,0,0,0.5);
669
- z-index: 10;
670
- display: flex; flex-direction: column;
671
  transition: box-shadow 0.2s;
672
  }
673
  .node:hover {
@@ -675,7 +662,6 @@ async def homepage(request: Request):
675
  0 0 0 1px rgba(124,106,247,0.3);
676
  }
677
  .node.fixed-height { height: 340px; }
678
-
679
  .node-header {
680
  background: var(--node-header);
681
  padding: 7px 12px;
@@ -684,8 +670,7 @@ async def homepage(request: Request):
684
  font-size: 11px; font-weight: 700;
685
  cursor: grab;
686
  display: flex; justify-content: space-between; align-items: center;
687
- flex-shrink: 0;
688
- user-select: none;
689
  }
690
  .node-header:active { cursor: grabbing; }
691
  .node-header .id {
@@ -693,33 +678,27 @@ async def homepage(request: Request):
693
  background: rgba(255,255,255,0.04);
694
  padding: 2px 7px; border-radius: 4px;
695
  }
696
-
697
  .node-body {
698
  padding: 10px;
699
  display: flex; flex-direction: column; gap: 8px;
700
  flex: 1; overflow: hidden;
701
  }
702
-
703
  /* ── Ports ── */
704
  .port {
705
- position: absolute;
706
- width: 11px; height: 11px;
707
  background: var(--node-bg);
708
  border: 2px solid var(--port);
709
  border-radius: 50%; z-index: 30;
710
  }
711
  .port.out { right: -6px; }
712
  .port.in { left: -6px; }
713
-
714
  /* ── Labels ── */
715
  label {
716
  font-size: 10px; color: var(--muted);
717
  font-weight: 600; display: block; margin-bottom: 3px;
718
  letter-spacing: 0.07em; text-transform: uppercase;
719
  }
720
-
721
  input[type="file"] { display: none; }
722
-
723
  /* ── Upload Zone ── */
724
  .file-upload {
725
  border: 1.5px dashed var(--node-border);
@@ -736,58 +715,38 @@ async def homepage(request: Request):
736
  }
737
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
738
  .file-upload:hover svg { opacity: 0.9; }
739
-
740
  /* ── Preview wrapper ── */
741
  .preview-wrap {
742
- display: none;
743
- position: relative;
744
- border-radius: 7px;
745
- overflow: hidden;
746
- border: 1px solid var(--node-border);
747
- background: #000;
748
  }
749
  .preview-wrap.visible { display: block; }
750
-
751
- .img-preview {
752
- width: 100%;
753
- height: 170px;
754
- object-fit: contain;
755
- display: block;
756
- }
757
-
758
  /* ── Clear button ── */
759
  .clear-btn {
760
- position: absolute;
761
- top: 6px; right: 6px;
762
- width: 24px; height: 24px;
763
- border-radius: 50%;
764
  background: rgba(13,13,15,0.80);
765
  border: 1px solid var(--node-border);
766
- color: var(--accent3);
767
- cursor: pointer;
768
  display: flex; align-items: center; justify-content: center;
769
  transition: background 0.18s, border-color 0.18s, transform 0.12s;
770
- z-index: 20;
771
- backdrop-filter: blur(6px);
772
  }
773
  .clear-btn:hover {
774
  background: rgba(255,107,107,0.18);
775
- border-color: var(--accent3);
776
- transform: scale(1.08);
777
  }
778
  .clear-btn:active { transform: scale(0.95); }
779
  .clear-btn svg { pointer-events: none; }
780
-
781
  /* ── Filename chip ── */
782
  .img-chip {
783
- display: none;
784
- align-items: center; gap: 6px;
785
  background: rgba(124,106,247,0.08);
786
  border: 1px solid rgba(124,106,247,0.22);
787
- border-radius: 5px;
788
- padding: 4px 8px;
789
- font-size: 9px; color: var(--muted);
790
- overflow: hidden;
791
  }
792
  .img-chip.visible { display: flex; }
793
  .img-chip .chip-dot {
@@ -800,13 +759,9 @@ async def homepage(request: Request):
800
  white-space: nowrap; flex: 1;
801
  color: var(--text); font-size: 9px;
802
  }
803
- .img-chip .chip-size {
804
- color: var(--muted); flex-shrink: 0; font-size: 9px;
805
- }
806
-
807
  select, textarea {
808
- width: 100%;
809
- background: rgba(0,0,0,0.3);
810
  border: 1px solid var(--node-border);
811
  color: var(--text); padding: 7px 9px;
812
  border-radius: 5px; outline: none;
@@ -815,65 +770,47 @@ async def homepage(request: Request):
815
  }
816
  select:focus, textarea:focus { border-color: var(--accent); }
817
  select option { background: #1c1c26; }
818
-
819
  button.run-btn {
820
  background: linear-gradient(135deg, var(--accent), #9b59b6);
821
- color: #fff; border: none;
822
- padding: 8px; border-radius: 6px;
823
  font-weight: 700; font-size: 11px;
824
- font-family: 'JetBrains Mono', monospace;
825
- cursor: pointer;
826
  transition: opacity 0.2s, transform 0.1s;
827
  display: flex; justify-content: center; align-items: center; gap: 8px;
828
  letter-spacing: 0.04em; flex-shrink: 0;
829
  }
830
- button.run-btn:hover { opacity: 0.9; }
831
- button.run-btn:active { transform: scale(0.98); }
832
- button.run-btn:disabled {
833
- background: var(--node-border); cursor: not-allowed; color: #555;
834
- }
835
-
836
- /* ── Output node body layout ── */
837
  .output-node-body {
838
- padding: 10px;
839
- display: flex; flex-direction: column; gap: 6px;
840
- flex: 1; overflow: hidden;
841
  }
842
-
843
- /* ── Output header row ── */
844
  .output-header-row {
845
  display: flex; align-items: center;
846
- justify-content: space-between;
847
- flex-shrink: 0;
848
  }
849
-
850
  /* ── Copy button ── */
851
  .copy-btn {
852
  display: flex; align-items: center; gap: 5px;
853
  background: rgba(124,106,247,0.10);
854
  border: 1px solid rgba(124,106,247,0.25);
855
- border-radius: 5px;
856
- padding: 3px 8px;
857
  font-size: 9px; font-weight: 700;
858
  font-family: 'JetBrains Mono', monospace;
859
- color: var(--accent);
860
- cursor: pointer;
861
  letter-spacing: 0.05em;
862
  transition: background 0.18s, border-color 0.18s, transform 0.1s;
863
  flex-shrink: 0;
864
  }
865
- .copy-btn:hover {
866
- background: rgba(124,106,247,0.22);
867
- border-color: var(--accent);
868
- }
869
  .copy-btn:active { transform: scale(0.95); }
870
  .copy-btn.copied {
871
  background: rgba(78,205,196,0.15);
872
- border-color: var(--accent2);
873
- color: var(--accent2);
874
  }
875
  .copy-btn svg { pointer-events: none; flex-shrink: 0; }
876
-
877
  .output-box {
878
  background: rgba(0,0,0,0.4);
879
  border: 1px solid var(--node-border);
@@ -882,10 +819,8 @@ async def homepage(request: Request):
882
  font-size: 11px; line-height: 1.6;
883
  color: #c8c8e0; white-space: pre-wrap;
884
  user-select: text;
885
- font-family: 'JetBrains Mono', monospace;
886
- min-height: 0;
887
  }
888
-
889
  /* ── Grounding ── */
890
  .ground-canvas-wrap {
891
  position: relative; flex: 1;
@@ -893,34 +828,24 @@ async def homepage(request: Request):
893
  border-radius: 5px; overflow: hidden;
894
  background: #000; min-height: 0;
895
  }
896
- .ground-canvas-wrap canvas {
897
- width: 100%; height: 100%;
898
- object-fit: contain; display: block;
899
- }
900
  .ground-placeholder {
901
  position: absolute; inset: 0;
902
  display: flex; align-items: center; justify-content: center;
903
  font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
904
  }
905
-
906
  .loader {
907
  width: 11px; height: 11px;
908
  border: 2px solid rgba(255,255,255,0.3);
909
  border-top-color: #fff; border-radius: 50%;
910
- animation: spin 0.7s linear infinite;
911
- display: none;
912
  }
913
  @keyframes spin { to { transform: rotate(360deg); } }
914
-
915
  .status-dot {
916
  width: 6px; height: 6px; border-radius: 50%;
917
  background: var(--muted); display: inline-block; margin-right: 6px;
918
  }
919
- .status-dot.active {
920
- background: var(--accent2);
921
- box-shadow: 0 0 5px var(--accent2);
922
- }
923
-
924
  /* ── Model badges ── */
925
  .model-badge {
926
  display: inline-block; padding: 2px 7px;
@@ -937,13 +862,10 @@ async def homepage(request: Request):
937
  .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
938
  .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
939
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
940
-
941
  .model-info-box {
942
  border-radius: 6px; padding: 9px;
943
- font-size: 10px; color: var(--muted); line-height: 1.55;
944
- flex-shrink: 0;
945
  }
946
-
947
  .canvas-footer { height: 36px; }
948
  </style>
949
  </head>
@@ -953,7 +875,7 @@ async def homepage(request: Request):
953
  <span class="logo">MULTIMODAL EDGE</span>
954
  <span class="sep">|</span>
955
  <span class="sub">Node-Based Inference Canvas</span>
956
- <span class="badge">v2.8 β€” DECA MODEL</span>
957
  </div>
958
 
959
  <div id="canvas">
@@ -1118,29 +1040,23 @@ async def homepage(request: Request):
1118
  // WIRE DRAWING
1119
  // ══════════════════════════════════════════════
1120
  const canvasEl = document.getElementById('canvas');
1121
-
1122
  function portCenter(id) {
1123
  const el = document.getElementById(id);
1124
- if (!el) return { x: 0, y: 0 };
1125
  const er = el.getBoundingClientRect();
1126
  const cr = canvasEl.getBoundingClientRect();
1127
- return {
1128
- x: er.left + er.width / 2 - cr.left,
1129
- y: er.top + er.height / 2 - cr.top
1130
- };
1131
  }
1132
-
1133
  function bezier(p1, p2) {
1134
  const dx = Math.abs(p2.x - p1.x) * 0.55;
1135
  return `M ${p1.x} ${p1.y} C ${p1.x+dx} ${p1.y}, ${p2.x-dx} ${p2.y}, ${p2.x} ${p2.y}`;
1136
  }
1137
-
1138
  function updateWires() {
1139
  const wires = [
1140
  ['wire-img-task', 'port-img-out', 'port-task-in'],
1141
- ['wire-model-task', 'port-model-out', 'port-task-in'],
1142
- ['wire-task-out', 'port-task-out', 'port-out-in'],
1143
- ['wire-task-gnd', 'port-task-out', 'port-gnd-in'],
1144
  ];
1145
  for (const [id, from, to] of wires) {
1146
  const el = document.getElementById(id);
@@ -1156,8 +1072,7 @@ document.querySelectorAll('.node').forEach(node => {
1156
  let drag = false, sx, sy, il, it;
1157
  header.addEventListener('mousedown', e => {
1158
  drag = true; sx = e.clientX; sy = e.clientY;
1159
- il = parseInt(node.style.left) || 0;
1160
- it = parseInt(node.style.top) || 0;
1161
  node.style.zIndex = 100; e.preventDefault();
1162
  });
1163
  document.addEventListener('mousemove', e => {
@@ -1166,11 +1081,8 @@ document.querySelectorAll('.node').forEach(node => {
1166
  node.style.top = `${it + e.clientY - sy}px`;
1167
  updateWires();
1168
  });
1169
- document.addEventListener('mouseup', () => {
1170
- if (drag) { drag = false; node.style.zIndex = 10; }
1171
- });
1172
  });
1173
-
1174
  window.addEventListener('resize', updateWires);
1175
  window.addEventListener('scroll', updateWires);
1176
  document.addEventListener('scroll', updateWires, true);
@@ -1180,7 +1092,6 @@ requestAnimationFrame(updateWires);
1180
  // FILE UPLOAD + CLEAR
1181
  // ══════════════════════════════════════════════
1182
  let currentFile = null;
1183
-
1184
  const dropZone = document.getElementById('dropZone');
1185
  const fileInput = document.getElementById('fileInput');
1186
  const previewWrap = document.getElementById('previewWrap');
@@ -1191,12 +1102,11 @@ const chipName = document.getElementById('chipName');
1191
  const chipSize = document.getElementById('chipSize');
1192
  const dotImg = document.getElementById('dot-img');
1193
 
1194
- function formatBytes(bytes) {
1195
- if (bytes < 1024) return bytes + ' B';
1196
- if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB';
1197
- return (bytes / 1048576).toFixed(1) + ' MB';
1198
  }
1199
-
1200
  function handleFile(file) {
1201
  if (!file || !file.type.startsWith('image/')) return;
1202
  currentFile = file;
@@ -1209,27 +1119,22 @@ function handleFile(file) {
1209
  dotImg.classList.add('active');
1210
  requestAnimationFrame(updateWires);
1211
  }
1212
-
1213
  function clearImage() {
1214
- currentFile = null;
1215
- imgPreview.src = '';
1216
  previewWrap.classList.remove('visible');
1217
  dropZone.style.display = '';
1218
  imgChip.classList.remove('visible');
1219
- chipName.textContent = 'β€”';
1220
- chipSize.textContent = '';
1221
- fileInput.value = '';
1222
- dotImg.classList.remove('active');
1223
  requestAnimationFrame(updateWires);
1224
  }
1225
-
1226
  dropZone.onclick = () => fileInput.click();
1227
  fileInput.onchange = e => handleFile(e.target.files[0]);
1228
  clearBtn.onclick = e => { e.stopPropagation(); clearImage(); };
1229
- dropZone.ondragover = e => { e.preventDefault(); dropZone.style.borderColor = 'var(--accent)'; };
1230
- dropZone.ondragleave = () => { dropZone.style.borderColor = ''; };
1231
  dropZone.ondrop = e => {
1232
- e.preventDefault(); dropZone.style.borderColor = '';
1233
  if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
1234
  };
1235
 
@@ -1246,80 +1151,69 @@ const MODEL_INFO = {
1246
  html: `<span class="model-badge q4bunred">QWEN 3.5 Β· 4B UNREDACTED MAX</span><br><br>
1247
  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
1248
  with extended instruction-following &amp; unrestricted reasoning.`,
1249
- bg: 'rgba(255,80,80,0.07)',
1250
- border: 'rgba(255,80,80,0.30)',
1251
  },
1252
  qwen_4b: {
1253
  html: `<span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
1254
  Qwen3.5 4B multimodal model by Alibaba Cloud.
1255
- Enhanced capacity over 2B β€” richer reasoning &amp; better instruction following.`,
1256
- bg: 'rgba(255,200,80,0.07)',
1257
- border: 'rgba(255,200,80,0.30)',
1258
  },
1259
  qwen_2b: {
1260
  html: `<span class="model-badge q2b">QWEN 3.5 Β· 2B</span><br><br>
1261
  Qwen3.5 2B multimodal model by Alibaba Cloud.
1262
  Lightweight &amp; fast β€” ideal for quick Query, Caption, Point &amp; Detect tasks.`,
1263
- bg: 'rgba(124,106,247,0.07)',
1264
- border: 'rgba(124,106,247,0.25)',
1265
  },
1266
  qwen_vl_2b: {
1267
  html: `<span class="model-badge qvl2b">QWEN3-VL Β· 2B</span><br><br>
1268
  Qwen3-VL-2B-Instruct β€” dedicated vision-language model by Alibaba Cloud.
1269
  Strong spatial grounding, OCR &amp; instruction-following.`,
1270
- bg: 'rgba(255,150,50,0.07)',
1271
- border: 'rgba(255,150,50,0.25)',
1272
  },
1273
  qwen_vl_4b: {
1274
  html: `<span class="model-badge qvl4b">QWEN3-VL Β· 4B</span><br><br>
1275
  Qwen3-VL-4B-Instruct β€” enhanced vision-language model by Alibaba Cloud.
1276
  Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
1277
- bg: 'rgba(255,100,80,0.07)',
1278
- border: 'rgba(255,100,80,0.25)',
1279
  },
1280
  lfm_450: {
1281
  html: `<span class="model-badge lfm450">LFM Β· 450M</span><br><br>
1282
  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
1283
  with solid grounding capabilities.`,
1284
- bg: 'rgba(78,205,196,0.07)',
1285
- border: 'rgba(78,205,196,0.25)',
1286
  },
1287
  gemma4_e2b: {
1288
  html: `<span class="model-badge g4e2b">GEMMA 4 Β· E2B</span><br><br>
1289
  Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
1290
  with strong vision-language understanding &amp; instruction-following.`,
1291
- bg: 'rgba(66,197,107,0.07)',
1292
- border: 'rgba(66,197,107,0.25)',
1293
  },
1294
  lfm_16: {
1295
  html: `<span class="model-badge lfm16">LFM Β· 1.6B</span><br><br>
1296
  LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
1297
  enhanced reasoning &amp; richer visual understanding.`,
1298
- bg: 'rgba(107,203,119,0.07)',
1299
- border: 'rgba(107,203,119,0.25)',
1300
  },
1301
  qwen_unredacted: {
1302
  html: `<span class="model-badge qunred">QWEN 3.5 Β· 2B UNREDACTED MAX</span><br><br>
1303
  Qwen3.5-2B-Unredacted-MAX by prithivMLmods. Fine-tuned variant of Qwen3.5-2B
1304
  with uncensored &amp; extended instruction-following capabilities.`,
1305
- bg: 'rgba(255,80,160,0.07)',
1306
- border: 'rgba(255,80,160,0.25)',
1307
  },
1308
  qwen25_vl_3b: {
1309
  html: `<span class="model-badge q25vl3b">QWEN 2.5-VL Β· 3B</span><br><br>
1310
  Qwen2.5-VL-3B-Instruct by Alibaba Cloud. Powerful 3B vision-language model
1311
  with strong grounding, OCR &amp; multi-task visual reasoning.`,
1312
- bg: 'rgba(80,180,255,0.07)',
1313
- border: 'rgba(80,180,255,0.25)',
1314
  },
1315
  };
1316
-
1317
  modelSelect.onchange = () => {
1318
  const info = MODEL_INFO[modelSelect.value];
1319
  if (!info) return;
1320
  modelInfoBox.innerHTML = info.html;
1321
  modelInfoBox.style.background = info.bg;
1322
- modelInfoBox.style.border = `1px solid ${info.border}`;
1323
  };
1324
 
1325
  // ══════════════════════════════════════════════
@@ -1327,71 +1221,105 @@ modelSelect.onchange = () => {
1327
  // ══════════════════════════════════════════════
1328
  const categorySelect = document.getElementById('categorySelect');
1329
  const promptInput = document.getElementById('promptInput');
1330
- const PLACEHOLDERS = {
1331
  Query: 'e.g., Count the total number of boats and describe the environment.',
1332
  Caption: 'e.g., short | normal | detailed',
1333
  Point: 'e.g., The gun held by the person.',
1334
  Detect: 'e.g., The headlight of the car.',
1335
  };
1336
- categorySelect.onchange = e => {
1337
- promptInput.placeholder = PLACEHOLDERS[e.target.value] || '';
1338
- };
1339
 
1340
  // ══════════════════════════════════════════════
1341
  // ROBUST JSON EXTRACTOR
1342
- // Strips <think>…</think> blocks then pulls
1343
- // the first JSON array or object from the text.
 
 
 
 
 
 
 
1344
  // ══════════════════════════════════════════════
1345
  function extractGroundingJSON(raw) {
1346
- // 1. Remove <think>…</think> blocks
1347
- let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, '');
 
 
 
 
 
 
 
1348
 
1349
- // 2. Strip markdown code fences
1350
- text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '');
 
 
 
1351
 
 
 
1352
  text = text.trim();
1353
 
1354
- // 3. Try JSON array first [ … ]
1355
- const arrIdx = text.indexOf('[');
1356
- if (arrIdx !== -1) {
1357
  let depth = 0, inStr = false, esc = false;
1358
- for (let i = arrIdx; i < text.length; i++) {
1359
- const c = text[i];
1360
- if (esc) { esc = false; continue; }
1361
  if (c === '\\\\') { esc = true; continue; }
1362
- if (c === '"') { inStr = !inStr; continue; }
1363
- if (inStr) continue;
1364
- if (c === '[') depth++;
1365
- if (c === ']') {
1366
  depth--;
1367
  if (depth === 0) {
1368
- try { return JSON.parse(text.slice(arrIdx, i + 1)); } catch(_) { break; }
 
1369
  }
1370
  }
1371
  }
 
1372
  }
1373
 
1374
- // 4. Try JSON object { … }
1375
- const objIdx = text.indexOf('{');
1376
- if (objIdx !== -1) {
1377
- let depth = 0, inStr = false, esc = false;
1378
- for (let i = objIdx; i < text.length; i++) {
1379
- const c = text[i];
1380
- if (esc) { esc = false; continue; }
1381
- if (c === '\\\\') { esc = true; continue; }
1382
- if (c === '"') { inStr = !inStr; continue; }
1383
- if (inStr) continue;
1384
- if (c === '{') depth++;
1385
- if (c === '}') {
1386
- depth--;
1387
- if (depth === 0) {
1388
- try { return JSON.parse(text.slice(objIdx, i + 1)); } catch(_) { break; }
1389
- }
1390
- }
1391
- }
 
1392
  }
1393
 
1394
- // 5. Last resort
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1395
  try { return JSON.parse(text); } catch(_) {}
1396
  return null;
1397
  }
@@ -1403,10 +1331,7 @@ const groundCanvas = document.getElementById('groundCanvas');
1403
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1404
  const gCtx = groundCanvas.getContext('2d');
1405
 
1406
- const PALETTE = [
1407
- '#4ecdc4','#7c6af7','#ff6b6b','#ffd93d',
1408
- '#6bcb77','#ff922b','#cc5de8','#339af0'
1409
- ];
1410
 
1411
  function hexToRgba(hex, alpha) {
1412
  const r = parseInt(hex.slice(1,3),16);
@@ -1414,21 +1339,20 @@ function hexToRgba(hex, alpha) {
1414
  const b = parseInt(hex.slice(5,7),16);
1415
  return `rgba(${r},${g},${b},${alpha})`;
1416
  }
1417
-
1418
  function roundRect(ctx, x, y, w, h, r) {
1419
  ctx.beginPath();
1420
- ctx.moveTo(x+r, y);
1421
- ctx.lineTo(x+w-r, y); ctx.quadraticCurveTo(x+w, y, x+w, y+r);
1422
- ctx.lineTo(x+w, y+h-r); ctx.quadraticCurveTo(x+w, y+h, x+w-r, y+h);
1423
- ctx.lineTo(x+r, y+h); ctx.quadraticCurveTo(x, y+h, x, y+h-r);
1424
- ctx.lineTo(x, y+r); ctx.quadraticCurveTo(x, y, x+r, y);
1425
  ctx.closePath();
1426
  }
1427
 
1428
  function drawGrounding(imgSrc, rawText) {
1429
  const parsed = extractGroundingJSON(rawText);
1430
  if (!parsed) {
1431
- console.warn('Grounding: could not extract JSON from output:', rawText);
1432
  return;
1433
  }
1434
 
@@ -1440,17 +1364,17 @@ function drawGrounding(imgSrc, rawText) {
1440
  gCtx.drawImage(img, 0, 0);
1441
  groundPlaceholder.style.display = 'none';
1442
 
1443
- const lw = Math.max(2, W / 200);
1444
- const fs = Math.max(12, W / 40);
1445
  gCtx.lineWidth = lw;
1446
- gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1447
 
1448
  const items = Array.isArray(parsed) ? parsed : [parsed];
1449
 
1450
  items.forEach((item, i) => {
1451
  const col = PALETTE[i % PALETTE.length];
1452
 
1453
- // ── Bounding box ─────────────────────────────
1454
  let bbox = null;
1455
  if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
1456
  bbox = item.bbox_2d;
@@ -1461,30 +1385,35 @@ function drawGrounding(imgSrc, rawText) {
1461
  bbox = item;
1462
 
1463
  if (bbox) {
1464
- let [x1, y1, x2, y2] = bbox.map(Number);
 
1465
  if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
1466
- x1 *= W; y1 *= H; x2 *= W; y2 *= H;
1467
  }
1468
- const bw = x2 - x1, bh = y2 - y1;
1469
- const lbl = item?.label ?? `obj ${i + 1}`;
 
 
 
 
1470
 
1471
- gCtx.fillStyle = hexToRgba(col, 0.18);
1472
- gCtx.fillRect(x1, y1, bw, bh);
1473
  gCtx.strokeStyle = col;
1474
- gCtx.strokeRect(x1, y1, bw, bh);
 
1475
 
1476
  const tw = gCtx.measureText(lbl).width;
1477
- const ph = fs * 1.4, pw = tw + 10;
1478
- const lx = x1, ly = Math.max(0, y1 - ph);
1479
  gCtx.fillStyle = col;
1480
- roundRect(gCtx, lx, ly, pw, ph, 4);
1481
- gCtx.fill();
1482
  gCtx.fillStyle = '#fff';
1483
- gCtx.fillText(lbl, lx + 5, ly + ph * 0.76);
1484
  return;
1485
  }
1486
 
1487
- // ── Point ────────────────────────────────────
1488
  let pt = null;
1489
  if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
1490
  pt = item.point_2d;
@@ -1495,28 +1424,28 @@ function drawGrounding(imgSrc, rawText) {
1495
  pt = item;
1496
 
1497
  if (pt) {
1498
- let [x, y] = pt.map(Number);
1499
- if (x <= 1 && y <= 1) { x *= W; y *= H; }
1500
- const r = Math.max(8, W / 60);
1501
- const lbl = item?.label ?? `pt ${i + 1}`;
1502
 
1503
  gCtx.beginPath();
1504
- gCtx.arc(x, y, r * 1.7, 0, Math.PI * 2);
1505
- gCtx.fillStyle = hexToRgba(col, 0.15);
1506
- gCtx.fill();
1507
 
1508
  gCtx.beginPath();
1509
- gCtx.arc(x, y, r, 0, Math.PI * 2);
1510
- gCtx.fillStyle = col;
1511
- gCtx.fill();
1512
  gCtx.strokeStyle = '#fff';
 
1513
  gCtx.stroke();
1514
 
1515
  gCtx.fillStyle = '#fff';
1516
- gCtx.fillText(lbl, x + r + 4, y + fs * 0.4);
1517
  }
1518
  });
1519
  };
 
1520
  img.src = imgSrc;
1521
  }
1522
 
@@ -1527,6 +1456,17 @@ const copyBtn = document.getElementById('copyBtn');
1527
  const outputBox = document.getElementById('outputBox');
1528
  let copyTimer = null;
1529
 
 
 
 
 
 
 
 
 
 
 
 
1530
  copyBtn.onclick = () => {
1531
  const txt = outputBox.innerText || '';
1532
  if (!txt || txt === 'Results will stream here...') return;
@@ -1539,24 +1479,12 @@ copyBtn.onclick = () => {
1539
  <polyline points="20 6 9 17 4 12"/>
1540
  </svg> COPIED`;
1541
  clearTimeout(copyTimer);
1542
- copyTimer = setTimeout(() => {
1543
- copyBtn.classList.remove('copied');
1544
- copyBtn.innerHTML = `
1545
- <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1546
- stroke="currentColor" stroke-width="2.2"
1547
- stroke-linecap="round" stroke-linejoin="round">
1548
- <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1549
- <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1550
- </svg> COPY`;
1551
- }, 2000);
1552
  }).catch(() => {
1553
  const ta = document.createElement('textarea');
1554
- ta.value = txt;
1555
- ta.style.position = 'fixed'; ta.style.opacity = '0';
1556
- document.body.appendChild(ta);
1557
- ta.select();
1558
- document.execCommand('copy');
1559
- document.body.removeChild(ta);
1560
  });
1561
  };
1562
 
@@ -1585,16 +1513,7 @@ runBtn.onclick = async () => {
1585
  dotOut.classList.remove('active');
1586
  dotGnd.classList.remove('active');
1587
  allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
1588
-
1589
- // Reset copy button
1590
- copyBtn.classList.remove('copied');
1591
- copyBtn.innerHTML = `
1592
- <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1593
- stroke="currentColor" stroke-width="2.2"
1594
- stroke-linecap="round" stroke-linejoin="round">
1595
- <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1596
- <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1597
- </svg> COPY`;
1598
 
1599
  const formData = new FormData();
1600
  formData.append('image', currentFile);
@@ -1603,9 +1522,10 @@ runBtn.onclick = async () => {
1603
  formData.append('model_id', modelSelect.value);
1604
 
1605
  let fullText = '';
 
1606
 
1607
  try {
1608
- const response = await fetch('/api/run', { method: 'POST', body: formData });
1609
  if (!response.ok) {
1610
  const err = await response.json();
1611
  throw new Error(err.error || 'Execution failed.');
@@ -1618,12 +1538,12 @@ runBtn.onclick = async () => {
1618
  while (true) {
1619
  const { value, done } = await reader.read();
1620
  if (done) break;
1621
- buffer += decoder.decode(value, { stream: true });
1622
  const lines = buffer.split('\\n\\n');
1623
  buffer = lines.pop();
1624
  for (const line of lines) {
1625
  if (!line.startsWith('data: ')) continue;
1626
- const payload = line.replace('data: ', '');
1627
  if (payload === '[DONE]') break;
1628
  try {
1629
  const data = JSON.parse(payload);
@@ -1638,13 +1558,15 @@ runBtn.onclick = async () => {
1638
 
1639
  dotOut.classList.add('active');
1640
 
1641
- // Attempt grounding overlay for Point / Detect
1642
  const cat = categorySelect.value;
1643
  if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
1644
  const parsed = extractGroundingJSON(fullText);
1645
- if (parsed) {
1646
  dotGnd.classList.add('active');
1647
- drawGrounding(URL.createObjectURL(currentFile), fullText);
 
 
1648
  }
1649
  }
1650
 
 
425
  messages, tokenize=False, add_generation_prompt=True
426
  )
427
  inputs = gemma4_e2b_processor(
428
+ text=[text_input], images=[image], return_tensors="pt", padding=True,
 
 
 
429
  ).to(gemma4_e2b_model.device)
430
  streamer = TextIteratorStreamer(
431
  gemma4_e2b_processor.tokenizer,
 
587
  --wire: #2a2a4a;
588
  --wire-active: #7c6af7;
589
  }
 
590
  * { box-sizing: border-box; margin: 0; padding: 0; }
 
591
  html, body {
592
  min-height: 100%;
593
  background: var(--bg);
594
  color: var(--text);
595
  font-family: 'JetBrains Mono', monospace;
596
  }
 
597
  body {
598
  background-image:
599
  radial-gradient(circle at 20% 50%, rgba(124,106,247,0.04) 0%, transparent 50%),
 
601
  linear-gradient(var(--grid) 1px, transparent 1px),
602
  linear-gradient(90deg, var(--grid) 1px, transparent 1px);
603
  background-size: 100% 100%, 100% 100%, 24px 24px, 24px 24px;
604
+ overflow-x: auto; overflow-y: auto;
 
605
  }
 
606
  /* ── Top Bar ── */
607
  .top-bar {
608
  position: sticky; top: 0; left: 0; right: 0;
 
613
  gap: 12px; z-index: 1000;
614
  backdrop-filter: blur(12px);
615
  }
616
+ .top-bar .logo { font-size: 13px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
617
+ .top-bar .sep { color: var(--node-border); }
618
+ .top-bar .sub { font-size: 11px; color: var(--muted); }
619
  .top-bar .badge {
620
  margin-left: auto;
621
  background: rgba(124,106,247,0.15);
 
623
  padding: 3px 10px; border-radius: 20px;
624
  font-size: 10px; color: var(--accent);
625
  }
 
626
  /* ── Canvas ── */
627
  #canvas {
628
  position: relative;
 
631
  height: 900px;
632
  margin: 0 auto;
633
  }
 
634
  svg.wires {
635
  position: absolute; top: 0; left: 0;
636
  width: 100%; height: 100%;
 
647
  animation: flow 0.6s linear infinite;
648
  }
649
  @keyframes flow { to { stroke-dashoffset: -24; } }
 
650
  /* ── Nodes ── */
651
  .node {
652
+ position: absolute; width: 295px;
 
653
  background: var(--node-bg);
654
  border: 1px solid var(--node-border);
655
  border-radius: 9px;
656
  box-shadow: 0 8px 28px rgba(0,0,0,0.5);
657
+ z-index: 10; display: flex; flex-direction: column;
 
658
  transition: box-shadow 0.2s;
659
  }
660
  .node:hover {
 
662
  0 0 0 1px rgba(124,106,247,0.3);
663
  }
664
  .node.fixed-height { height: 340px; }
 
665
  .node-header {
666
  background: var(--node-header);
667
  padding: 7px 12px;
 
670
  font-size: 11px; font-weight: 700;
671
  cursor: grab;
672
  display: flex; justify-content: space-between; align-items: center;
673
+ flex-shrink: 0; user-select: none;
 
674
  }
675
  .node-header:active { cursor: grabbing; }
676
  .node-header .id {
 
678
  background: rgba(255,255,255,0.04);
679
  padding: 2px 7px; border-radius: 4px;
680
  }
 
681
  .node-body {
682
  padding: 10px;
683
  display: flex; flex-direction: column; gap: 8px;
684
  flex: 1; overflow: hidden;
685
  }
 
686
  /* ── Ports ── */
687
  .port {
688
+ position: absolute; width: 11px; height: 11px;
 
689
  background: var(--node-bg);
690
  border: 2px solid var(--port);
691
  border-radius: 50%; z-index: 30;
692
  }
693
  .port.out { right: -6px; }
694
  .port.in { left: -6px; }
 
695
  /* ── Labels ── */
696
  label {
697
  font-size: 10px; color: var(--muted);
698
  font-weight: 600; display: block; margin-bottom: 3px;
699
  letter-spacing: 0.07em; text-transform: uppercase;
700
  }
 
701
  input[type="file"] { display: none; }
 
702
  /* ── Upload Zone ── */
703
  .file-upload {
704
  border: 1.5px dashed var(--node-border);
 
715
  }
716
  .file-upload svg { opacity: 0.5; transition: opacity 0.2s; }
717
  .file-upload:hover svg { opacity: 0.9; }
 
718
  /* ── Preview wrapper ── */
719
  .preview-wrap {
720
+ display: none; position: relative;
721
+ border-radius: 7px; overflow: hidden;
722
+ border: 1px solid var(--node-border); background: #000;
 
 
 
723
  }
724
  .preview-wrap.visible { display: block; }
725
+ .img-preview { width: 100%; height: 170px; object-fit: contain; display: block; }
 
 
 
 
 
 
 
726
  /* ── Clear button ── */
727
  .clear-btn {
728
+ position: absolute; top: 6px; right: 6px;
729
+ width: 24px; height: 24px; border-radius: 50%;
 
 
730
  background: rgba(13,13,15,0.80);
731
  border: 1px solid var(--node-border);
732
+ color: var(--accent3); cursor: pointer;
 
733
  display: flex; align-items: center; justify-content: center;
734
  transition: background 0.18s, border-color 0.18s, transform 0.12s;
735
+ z-index: 20; backdrop-filter: blur(6px);
 
736
  }
737
  .clear-btn:hover {
738
  background: rgba(255,107,107,0.18);
739
+ border-color: var(--accent3); transform: scale(1.08);
 
740
  }
741
  .clear-btn:active { transform: scale(0.95); }
742
  .clear-btn svg { pointer-events: none; }
 
743
  /* ── Filename chip ── */
744
  .img-chip {
745
+ display: none; align-items: center; gap: 6px;
 
746
  background: rgba(124,106,247,0.08);
747
  border: 1px solid rgba(124,106,247,0.22);
748
+ border-radius: 5px; padding: 4px 8px;
749
+ font-size: 9px; color: var(--muted); overflow: hidden;
 
 
750
  }
751
  .img-chip.visible { display: flex; }
752
  .img-chip .chip-dot {
 
759
  white-space: nowrap; flex: 1;
760
  color: var(--text); font-size: 9px;
761
  }
762
+ .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 9px; }
 
 
 
763
  select, textarea {
764
+ width: 100%; background: rgba(0,0,0,0.3);
 
765
  border: 1px solid var(--node-border);
766
  color: var(--text); padding: 7px 9px;
767
  border-radius: 5px; outline: none;
 
770
  }
771
  select:focus, textarea:focus { border-color: var(--accent); }
772
  select option { background: #1c1c26; }
 
773
  button.run-btn {
774
  background: linear-gradient(135deg, var(--accent), #9b59b6);
775
+ color: #fff; border: none; padding: 8px; border-radius: 6px;
 
776
  font-weight: 700; font-size: 11px;
777
+ font-family: 'JetBrains Mono', monospace; cursor: pointer;
 
778
  transition: opacity 0.2s, transform 0.1s;
779
  display: flex; justify-content: center; align-items: center; gap: 8px;
780
  letter-spacing: 0.04em; flex-shrink: 0;
781
  }
782
+ button.run-btn:hover { opacity: 0.9; }
783
+ button.run-btn:active { transform: scale(0.98); }
784
+ button.run-btn:disabled { background: var(--node-border); cursor: not-allowed; color: #555; }
785
+ /* ── Output node ── */
 
 
 
786
  .output-node-body {
787
+ padding: 10px; display: flex; flex-direction: column;
788
+ gap: 6px; flex: 1; overflow: hidden;
 
789
  }
 
 
790
  .output-header-row {
791
  display: flex; align-items: center;
792
+ justify-content: space-between; flex-shrink: 0;
 
793
  }
 
794
  /* ── Copy button ── */
795
  .copy-btn {
796
  display: flex; align-items: center; gap: 5px;
797
  background: rgba(124,106,247,0.10);
798
  border: 1px solid rgba(124,106,247,0.25);
799
+ border-radius: 5px; padding: 3px 8px;
 
800
  font-size: 9px; font-weight: 700;
801
  font-family: 'JetBrains Mono', monospace;
802
+ color: var(--accent); cursor: pointer;
 
803
  letter-spacing: 0.05em;
804
  transition: background 0.18s, border-color 0.18s, transform 0.1s;
805
  flex-shrink: 0;
806
  }
807
+ .copy-btn:hover { background: rgba(124,106,247,0.22); border-color: var(--accent); }
 
 
 
808
  .copy-btn:active { transform: scale(0.95); }
809
  .copy-btn.copied {
810
  background: rgba(78,205,196,0.15);
811
+ border-color: var(--accent2); color: var(--accent2);
 
812
  }
813
  .copy-btn svg { pointer-events: none; flex-shrink: 0; }
 
814
  .output-box {
815
  background: rgba(0,0,0,0.4);
816
  border: 1px solid var(--node-border);
 
819
  font-size: 11px; line-height: 1.6;
820
  color: #c8c8e0; white-space: pre-wrap;
821
  user-select: text;
822
+ font-family: 'JetBrains Mono', monospace; min-height: 0;
 
823
  }
 
824
  /* ── Grounding ── */
825
  .ground-canvas-wrap {
826
  position: relative; flex: 1;
 
828
  border-radius: 5px; overflow: hidden;
829
  background: #000; min-height: 0;
830
  }
831
+ .ground-canvas-wrap canvas { width: 100%; height: 100%; object-fit: contain; display: block; }
 
 
 
832
  .ground-placeholder {
833
  position: absolute; inset: 0;
834
  display: flex; align-items: center; justify-content: center;
835
  font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
836
  }
 
837
  .loader {
838
  width: 11px; height: 11px;
839
  border: 2px solid rgba(255,255,255,0.3);
840
  border-top-color: #fff; border-radius: 50%;
841
+ animation: spin 0.7s linear infinite; display: none;
 
842
  }
843
  @keyframes spin { to { transform: rotate(360deg); } }
 
844
  .status-dot {
845
  width: 6px; height: 6px; border-radius: 50%;
846
  background: var(--muted); display: inline-block; margin-right: 6px;
847
  }
848
+ .status-dot.active { background: var(--accent2); box-shadow: 0 0 5px var(--accent2); }
 
 
 
 
849
  /* ── Model badges ── */
850
  .model-badge {
851
  display: inline-block; padding: 2px 7px;
 
862
  .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
863
  .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
864
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
 
865
  .model-info-box {
866
  border-radius: 6px; padding: 9px;
867
+ font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0;
 
868
  }
 
869
  .canvas-footer { height: 36px; }
870
  </style>
871
  </head>
 
875
  <span class="logo">MULTIMODAL EDGE</span>
876
  <span class="sep">|</span>
877
  <span class="sub">Node-Based Inference Canvas</span>
878
+ <span class="badge">10x Vision Models</span>
879
  </div>
880
 
881
  <div id="canvas">
 
1040
  // WIRE DRAWING
1041
  // ══════════════════════════════════════════════
1042
  const canvasEl = document.getElementById('canvas');
 
1043
  function portCenter(id) {
1044
  const el = document.getElementById(id);
1045
+ if (!el) return { x:0, y:0 };
1046
  const er = el.getBoundingClientRect();
1047
  const cr = canvasEl.getBoundingClientRect();
1048
+ return { x: er.left + er.width/2 - cr.left, y: er.top + er.height/2 - cr.top };
 
 
 
1049
  }
 
1050
  function bezier(p1, p2) {
1051
  const dx = Math.abs(p2.x - p1.x) * 0.55;
1052
  return `M ${p1.x} ${p1.y} C ${p1.x+dx} ${p1.y}, ${p2.x-dx} ${p2.y}, ${p2.x} ${p2.y}`;
1053
  }
 
1054
  function updateWires() {
1055
  const wires = [
1056
  ['wire-img-task', 'port-img-out', 'port-task-in'],
1057
+ ['wire-model-task', 'port-model-out','port-task-in'],
1058
+ ['wire-task-out', 'port-task-out', 'port-out-in'],
1059
+ ['wire-task-gnd', 'port-task-out', 'port-gnd-in'],
1060
  ];
1061
  for (const [id, from, to] of wires) {
1062
  const el = document.getElementById(id);
 
1072
  let drag = false, sx, sy, il, it;
1073
  header.addEventListener('mousedown', e => {
1074
  drag = true; sx = e.clientX; sy = e.clientY;
1075
+ il = parseInt(node.style.left)||0; it = parseInt(node.style.top)||0;
 
1076
  node.style.zIndex = 100; e.preventDefault();
1077
  });
1078
  document.addEventListener('mousemove', e => {
 
1081
  node.style.top = `${it + e.clientY - sy}px`;
1082
  updateWires();
1083
  });
1084
+ document.addEventListener('mouseup', () => { if (drag) { drag=false; node.style.zIndex=10; } });
 
 
1085
  });
 
1086
  window.addEventListener('resize', updateWires);
1087
  window.addEventListener('scroll', updateWires);
1088
  document.addEventListener('scroll', updateWires, true);
 
1092
  // FILE UPLOAD + CLEAR
1093
  // ══════════════════════════════════════════════
1094
  let currentFile = null;
 
1095
  const dropZone = document.getElementById('dropZone');
1096
  const fileInput = document.getElementById('fileInput');
1097
  const previewWrap = document.getElementById('previewWrap');
 
1102
  const chipSize = document.getElementById('chipSize');
1103
  const dotImg = document.getElementById('dot-img');
1104
 
1105
+ function formatBytes(b) {
1106
+ if (b < 1024) return b + ' B';
1107
+ if (b < 1048576) return (b/1024).toFixed(1) + ' KB';
1108
+ return (b/1048576).toFixed(1) + ' MB';
1109
  }
 
1110
  function handleFile(file) {
1111
  if (!file || !file.type.startsWith('image/')) return;
1112
  currentFile = file;
 
1119
  dotImg.classList.add('active');
1120
  requestAnimationFrame(updateWires);
1121
  }
 
1122
  function clearImage() {
1123
+ currentFile = null; imgPreview.src = '';
 
1124
  previewWrap.classList.remove('visible');
1125
  dropZone.style.display = '';
1126
  imgChip.classList.remove('visible');
1127
+ chipName.textContent = 'β€”'; chipSize.textContent = '';
1128
+ fileInput.value = ''; dotImg.classList.remove('active');
 
 
1129
  requestAnimationFrame(updateWires);
1130
  }
 
1131
  dropZone.onclick = () => fileInput.click();
1132
  fileInput.onchange = e => handleFile(e.target.files[0]);
1133
  clearBtn.onclick = e => { e.stopPropagation(); clearImage(); };
1134
+ dropZone.ondragover = e => { e.preventDefault(); dropZone.style.borderColor='var(--accent)'; };
1135
+ dropZone.ondragleave = () => { dropZone.style.borderColor=''; };
1136
  dropZone.ondrop = e => {
1137
+ e.preventDefault(); dropZone.style.borderColor='';
1138
  if (e.dataTransfer.files.length) handleFile(e.dataTransfer.files[0]);
1139
  };
1140
 
 
1151
  html: `<span class="model-badge q4bunred">QWEN 3.5 Β· 4B UNREDACTED MAX</span><br><br>
1152
  Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
1153
  with extended instruction-following &amp; unrestricted reasoning.`,
1154
+ bg: 'rgba(255,80,80,0.07)', border: 'rgba(255,80,80,0.30)',
 
1155
  },
1156
  qwen_4b: {
1157
  html: `<span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
1158
  Qwen3.5 4B multimodal model by Alibaba Cloud.
1159
+ Enhanced capacity β€” richer reasoning &amp; better instruction following.`,
1160
+ bg: 'rgba(255,200,80,0.07)', border: 'rgba(255,200,80,0.30)',
 
1161
  },
1162
  qwen_2b: {
1163
  html: `<span class="model-badge q2b">QWEN 3.5 Β· 2B</span><br><br>
1164
  Qwen3.5 2B multimodal model by Alibaba Cloud.
1165
  Lightweight &amp; fast β€” ideal for quick Query, Caption, Point &amp; Detect tasks.`,
1166
+ bg: 'rgba(124,106,247,0.07)', border: 'rgba(124,106,247,0.25)',
 
1167
  },
1168
  qwen_vl_2b: {
1169
  html: `<span class="model-badge qvl2b">QWEN3-VL Β· 2B</span><br><br>
1170
  Qwen3-VL-2B-Instruct β€” dedicated vision-language model by Alibaba Cloud.
1171
  Strong spatial grounding, OCR &amp; instruction-following.`,
1172
+ bg: 'rgba(255,150,50,0.07)', border: 'rgba(255,150,50,0.25)',
 
1173
  },
1174
  qwen_vl_4b: {
1175
  html: `<span class="model-badge qvl4b">QWEN3-VL Β· 4B</span><br><br>
1176
  Qwen3-VL-4B-Instruct β€” enhanced vision-language model by Alibaba Cloud.
1177
  Superior spatial grounding, richer OCR &amp; stronger multi-step reasoning.`,
1178
+ bg: 'rgba(255,100,80,0.07)', border: 'rgba(255,100,80,0.25)',
 
1179
  },
1180
  lfm_450: {
1181
  html: `<span class="model-badge lfm450">LFM Β· 450M</span><br><br>
1182
  LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
1183
  with solid grounding capabilities.`,
1184
+ bg: 'rgba(78,205,196,0.07)', border: 'rgba(78,205,196,0.25)',
 
1185
  },
1186
  gemma4_e2b: {
1187
  html: `<span class="model-badge g4e2b">GEMMA 4 Β· E2B</span><br><br>
1188
  Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
1189
  with strong vision-language understanding &amp; instruction-following.`,
1190
+ bg: 'rgba(66,197,107,0.07)', border: 'rgba(66,197,107,0.25)',
 
1191
  },
1192
  lfm_16: {
1193
  html: `<span class="model-badge lfm16">LFM Β· 1.6B</span><br><br>
1194
  LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
1195
  enhanced reasoning &amp; richer visual understanding.`,
1196
+ bg: 'rgba(107,203,119,0.07)', border: 'rgba(107,203,119,0.25)',
 
1197
  },
1198
  qwen_unredacted: {
1199
  html: `<span class="model-badge qunred">QWEN 3.5 Β· 2B UNREDACTED MAX</span><br><br>
1200
  Qwen3.5-2B-Unredacted-MAX by prithivMLmods. Fine-tuned variant of Qwen3.5-2B
1201
  with uncensored &amp; extended instruction-following capabilities.`,
1202
+ bg: 'rgba(255,80,160,0.07)', border: 'rgba(255,80,160,0.25)',
 
1203
  },
1204
  qwen25_vl_3b: {
1205
  html: `<span class="model-badge q25vl3b">QWEN 2.5-VL Β· 3B</span><br><br>
1206
  Qwen2.5-VL-3B-Instruct by Alibaba Cloud. Powerful 3B vision-language model
1207
  with strong grounding, OCR &amp; multi-task visual reasoning.`,
1208
+ bg: 'rgba(80,180,255,0.07)', border: 'rgba(80,180,255,0.25)',
 
1209
  },
1210
  };
 
1211
  modelSelect.onchange = () => {
1212
  const info = MODEL_INFO[modelSelect.value];
1213
  if (!info) return;
1214
  modelInfoBox.innerHTML = info.html;
1215
  modelInfoBox.style.background = info.bg;
1216
+ modelInfoBox.style.border = `1px solid ${info.border}`;
1217
  };
1218
 
1219
  // ══════════════════════════════════════════════
 
1221
  // ══════════════════════════════════════════════
1222
  const categorySelect = document.getElementById('categorySelect');
1223
  const promptInput = document.getElementById('promptInput');
1224
+ const PLACEHOLDERS = {
1225
  Query: 'e.g., Count the total number of boats and describe the environment.',
1226
  Caption: 'e.g., short | normal | detailed',
1227
  Point: 'e.g., The gun held by the person.',
1228
  Detect: 'e.g., The headlight of the car.',
1229
  };
1230
+ categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value] || ''; };
 
 
1231
 
1232
  // ══════════════════════════════════════════════
1233
  // ROBUST JSON EXTRACTOR
1234
+ // Strategy:
1235
+ // 1. Strip ALL <think>…</think> blocks (greedy,
1236
+ // handles the tag appearing after the JSON too)
1237
+ // 2. Strip markdown fences
1238
+ // 3. Find the LAST occurrence of a JSON array [ ]
1239
+ // or object { } β€” models typically emit the
1240
+ // clean JSON block after their reasoning prose
1241
+ // 4. Use a bracket-depth walker to extract it
1242
+ // precisely without cutting off nested objects
1243
  // ══════════════════════════════════════════════
1244
  function extractGroundingJSON(raw) {
1245
+ // Step 1 β€” kill ALL <think> … </think> sections
1246
+ // Use greedy .* with DOTALL flag emulation via [\s\S]
1247
+ // Run multiple passes in case of nested/malformed tags
1248
+ let text = raw;
1249
+ let prev = null;
1250
+ while (prev !== text) {
1251
+ prev = text;
1252
+ text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
1253
+ }
1254
 
1255
+ // Step 2 β€” strip markdown code fences ```json … ```
1256
+ text = text.replace(/```(?:json)?[\\s\\S]*?```/gi, function(m) {
1257
+ // Keep the inner content, just remove the fences
1258
+ return m.replace(/```(?:json)?/gi, '').replace(/```/g, '');
1259
+ });
1260
 
1261
+ // Step 3 β€” strip any remaining lone fence markers
1262
+ text = text.replace(/```/g, '');
1263
  text = text.trim();
1264
 
1265
+ // Helper: walk from startIdx and extract a balanced
1266
+ // bracket expression (open/close must match).
1267
+ function extractBalanced(str, startIdx, openCh, closeCh) {
1268
  let depth = 0, inStr = false, esc = false;
1269
+ for (let i = startIdx; i < str.length; i++) {
1270
+ const c = str[i];
1271
+ if (esc) { esc = false; continue; }
1272
  if (c === '\\\\') { esc = true; continue; }
1273
+ if (c === '"') { inStr = !inStr; continue; }
1274
+ if (inStr) continue;
1275
+ if (c === openCh) depth++;
1276
+ if (c === closeCh) {
1277
  depth--;
1278
  if (depth === 0) {
1279
+ try { return JSON.parse(str.slice(startIdx, i + 1)); }
1280
+ catch(_) { return null; }
1281
  }
1282
  }
1283
  }
1284
+ return null;
1285
  }
1286
 
1287
+ // Step 4 β€” find the LAST JSON array in the text
1288
+ // (models often emit prose first, JSON last)
1289
+ let lastArrIdx = -1;
1290
+ for (let i = text.length - 1; i >= 0; i--) {
1291
+ if (text[i] === '[') { lastArrIdx = i; break; }
1292
+ }
1293
+ if (lastArrIdx !== -1) {
1294
+ const result = extractBalanced(text, lastArrIdx, '[', ']');
1295
+ if (result !== null) return result;
1296
+ }
1297
+
1298
+ // Step 5 β€” find the LAST JSON object in the text
1299
+ let lastObjIdx = -1;
1300
+ for (let i = text.length - 1; i >= 0; i--) {
1301
+ if (text[i] === '{') { lastObjIdx = i; break; }
1302
+ }
1303
+ if (lastObjIdx !== -1) {
1304
+ const result = extractBalanced(text, lastObjIdx, '{', '}');
1305
+ if (result !== null) return result;
1306
  }
1307
 
1308
+ // Step 6 β€” try FIRST array (fallback)
1309
+ const firstArr = text.indexOf('[');
1310
+ if (firstArr !== -1) {
1311
+ const result = extractBalanced(text, firstArr, '[', ']');
1312
+ if (result !== null) return result;
1313
+ }
1314
+
1315
+ // Step 7 β€” try FIRST object (fallback)
1316
+ const firstObj = text.indexOf('{');
1317
+ if (firstObj !== -1) {
1318
+ const result = extractBalanced(text, firstObj, '{', '}');
1319
+ if (result !== null) return result;
1320
+ }
1321
+
1322
+ // Step 8 β€” last resort full parse
1323
  try { return JSON.parse(text); } catch(_) {}
1324
  return null;
1325
  }
 
1331
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1332
  const gCtx = groundCanvas.getContext('2d');
1333
 
1334
+ const PALETTE = ['#4ecdc4','#7c6af7','#ff6b6b','#ffd93d','#6bcb77','#ff922b','#cc5de8','#339af0'];
 
 
 
1335
 
1336
  function hexToRgba(hex, alpha) {
1337
  const r = parseInt(hex.slice(1,3),16);
 
1339
  const b = parseInt(hex.slice(5,7),16);
1340
  return `rgba(${r},${g},${b},${alpha})`;
1341
  }
 
1342
  function roundRect(ctx, x, y, w, h, r) {
1343
  ctx.beginPath();
1344
+ ctx.moveTo(x+r,y);
1345
+ ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
1346
+ ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
1347
+ ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
1348
+ ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y);
1349
  ctx.closePath();
1350
  }
1351
 
1352
  function drawGrounding(imgSrc, rawText) {
1353
  const parsed = extractGroundingJSON(rawText);
1354
  if (!parsed) {
1355
+ console.warn('Grounding: could not extract JSON:', rawText.slice(0, 200));
1356
  return;
1357
  }
1358
 
 
1364
  gCtx.drawImage(img, 0, 0);
1365
  groundPlaceholder.style.display = 'none';
1366
 
1367
+ const lw = Math.max(2, W/200);
1368
+ const fs = Math.max(12, W/40);
1369
  gCtx.lineWidth = lw;
1370
+ gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1371
 
1372
  const items = Array.isArray(parsed) ? parsed : [parsed];
1373
 
1374
  items.forEach((item, i) => {
1375
  const col = PALETTE[i % PALETTE.length];
1376
 
1377
+ // ── Detect: bounding box ─────────────────────
1378
  let bbox = null;
1379
  if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
1380
  bbox = item.bbox_2d;
 
1385
  bbox = item;
1386
 
1387
  if (bbox) {
1388
+ let [x1,y1,x2,y2] = bbox.map(Number);
1389
+ // Handle normalised 0-1 coords
1390
  if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
1391
+ x1*=W; y1*=H; x2*=W; y2*=H;
1392
  }
1393
+ // Swap if inverted
1394
+ if (x2 < x1) [x1,x2] = [x2,x1];
1395
+ if (y2 < y1) [y1,y2] = [y2,y1];
1396
+
1397
+ const bw = x2-x1, bh = y2-y1;
1398
+ const lbl = (item?.label ?? `obj ${i+1}`).toString();
1399
 
1400
+ gCtx.fillStyle = hexToRgba(col, 0.20);
1401
+ gCtx.fillRect(x1,y1,bw,bh);
1402
  gCtx.strokeStyle = col;
1403
+ gCtx.lineWidth = lw;
1404
+ gCtx.strokeRect(x1,y1,bw,bh);
1405
 
1406
  const tw = gCtx.measureText(lbl).width;
1407
+ const ph = fs*1.45, pw = tw+12;
1408
+ const lx = x1, ly = Math.max(0, y1-ph);
1409
  gCtx.fillStyle = col;
1410
+ roundRect(gCtx,lx,ly,pw,ph,4); gCtx.fill();
 
1411
  gCtx.fillStyle = '#fff';
1412
+ gCtx.fillText(lbl, lx+6, ly+ph*0.76);
1413
  return;
1414
  }
1415
 
1416
+ // ── Point: 2-D coordinate ─────────────────────
1417
  let pt = null;
1418
  if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
1419
  pt = item.point_2d;
 
1424
  pt = item;
1425
 
1426
  if (pt) {
1427
+ let [x,y] = pt.map(Number);
1428
+ if (x <= 1 && y <= 1) { x*=W; y*=H; }
1429
+ const r = Math.max(8, W/60);
1430
+ const lbl = (item?.label ?? `pt ${i+1}`).toString();
1431
 
1432
  gCtx.beginPath();
1433
+ gCtx.arc(x, y, r*1.8, 0, Math.PI*2);
1434
+ gCtx.fillStyle = hexToRgba(col, 0.18); gCtx.fill();
 
1435
 
1436
  gCtx.beginPath();
1437
+ gCtx.arc(x, y, r, 0, Math.PI*2);
1438
+ gCtx.fillStyle = col; gCtx.fill();
 
1439
  gCtx.strokeStyle = '#fff';
1440
+ gCtx.lineWidth = Math.max(1.5, lw);
1441
  gCtx.stroke();
1442
 
1443
  gCtx.fillStyle = '#fff';
1444
+ gCtx.fillText(lbl, x+r+5, y+fs*0.4);
1445
  }
1446
  });
1447
  };
1448
+ img.onerror = () => console.error('Grounding: failed to load image for overlay.');
1449
  img.src = imgSrc;
1450
  }
1451
 
 
1456
  const outputBox = document.getElementById('outputBox');
1457
  let copyTimer = null;
1458
 
1459
+ function resetCopyBtn() {
1460
+ copyBtn.classList.remove('copied');
1461
+ copyBtn.innerHTML = `
1462
+ <svg width="11" height="11" viewBox="0 0 24 24" fill="none"
1463
+ stroke="currentColor" stroke-width="2.2"
1464
+ stroke-linecap="round" stroke-linejoin="round">
1465
+ <rect x="9" y="9" width="13" height="13" rx="2" ry="2"/>
1466
+ <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>
1467
+ </svg> COPY`;
1468
+ }
1469
+
1470
  copyBtn.onclick = () => {
1471
  const txt = outputBox.innerText || '';
1472
  if (!txt || txt === 'Results will stream here...') return;
 
1479
  <polyline points="20 6 9 17 4 12"/>
1480
  </svg> COPIED`;
1481
  clearTimeout(copyTimer);
1482
+ copyTimer = setTimeout(resetCopyBtn, 2000);
 
 
 
 
 
 
 
 
 
1483
  }).catch(() => {
1484
  const ta = document.createElement('textarea');
1485
+ ta.value = txt; ta.style.position='fixed'; ta.style.opacity='0';
1486
+ document.body.appendChild(ta); ta.select();
1487
+ document.execCommand('copy'); document.body.removeChild(ta);
 
 
 
1488
  });
1489
  };
1490
 
 
1513
  dotOut.classList.remove('active');
1514
  dotGnd.classList.remove('active');
1515
  allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
1516
+ resetCopyBtn();
 
 
 
 
 
 
 
 
 
1517
 
1518
  const formData = new FormData();
1519
  formData.append('image', currentFile);
 
1522
  formData.append('model_id', modelSelect.value);
1523
 
1524
  let fullText = '';
1525
+ let imgObjectURL = URL.createObjectURL(currentFile);
1526
 
1527
  try {
1528
+ const response = await fetch('/api/run', { method:'POST', body:formData });
1529
  if (!response.ok) {
1530
  const err = await response.json();
1531
  throw new Error(err.error || 'Execution failed.');
 
1538
  while (true) {
1539
  const { value, done } = await reader.read();
1540
  if (done) break;
1541
+ buffer += decoder.decode(value, { stream:true });
1542
  const lines = buffer.split('\\n\\n');
1543
  buffer = lines.pop();
1544
  for (const line of lines) {
1545
  if (!line.startsWith('data: ')) continue;
1546
+ const payload = line.replace('data: ','');
1547
  if (payload === '[DONE]') break;
1548
  try {
1549
  const data = JSON.parse(payload);
 
1558
 
1559
  dotOut.classList.add('active');
1560
 
1561
+ // ── Grounding overlay (Point / Detect) ──────────
1562
  const cat = categorySelect.value;
1563
  if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
1564
  const parsed = extractGroundingJSON(fullText);
1565
+ if (parsed !== null) {
1566
  dotGnd.classList.add('active');
1567
+ drawGrounding(imgObjectURL, fullText);
1568
+ } else {
1569
+ console.warn('No grounding JSON found in output.');
1570
  }
1571
  }
1572