prithivMLmods commited on
Commit
2969977
Β·
verified Β·
1 Parent(s): c04b4d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -252
app.py CHANGED
@@ -761,16 +761,18 @@ async def homepage(request: Request):
761
  .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
762
  .ground-canvas-wrap {
763
  position: relative; flex: 1; border: 1px solid var(--node-border);
764
- border-radius: 5px; overflow: hidden; background: #000; min-height: 0;
765
  }
766
  .ground-canvas-wrap canvas {
 
767
  width: 100%; height: 100%;
768
  object-fit: contain; display: block;
769
- image-rendering: pixelated;
770
  }
771
  .ground-placeholder {
772
  position: absolute; inset: 0; display: flex; align-items: center;
773
  justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
 
774
  }
775
  .loader {
776
  width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
@@ -797,6 +799,17 @@ async def homepage(request: Request):
797
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
798
  .model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
799
  .canvas-footer { height: 36px; }
 
 
 
 
 
 
 
 
 
 
 
800
  </style>
801
  </head>
802
  <body>
@@ -808,6 +821,12 @@ async def homepage(request: Request):
808
  <span class="badge">10x Vision Models</span>
809
  </div>
810
 
 
 
 
 
 
 
811
  <div id="canvas">
812
  <svg class="wires">
813
  <path id="wire-img-task" class="wire" />
@@ -961,7 +980,7 @@ async def homepage(request: Request):
961
  SAVE
962
  </a>
963
  </div>
964
- <div class="ground-canvas-wrap">
965
  <canvas id="groundCanvas"></canvas>
966
  <div class="ground-placeholder" id="groundPlaceholder">
967
  Active for Point / Detect tasks.<br>Run inference to visualise.
@@ -1158,172 +1177,131 @@ const PLACEHOLDERS = {
1158
  categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
1159
 
1160
  // ══════════════════════════════════════════════
1161
- // ROBUST JSON EXTRACTOR (FIXED)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1162
  // ══════════════════════════════════════════════
1163
  function extractGroundingJSON(raw) {
1164
- // 1. Strip <think>…</think> blocks
1165
  let text = raw;
1166
- let prev = null;
1167
- while (prev !== text) {
1168
- prev = text;
1169
- text = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
1170
  }
1171
 
1172
- // 2. Strip markdown fences
1173
- text = text.replace(/```(?:json)?([\s\S]*?)```/gi, '$1');
1174
- text = text.replace(/```/g, '').trim();
 
1175
 
1176
- // 3. Balanced bracket extractor (searches forward from a given index)
1177
  function extractBalanced(str, startIdx, openCh, closeCh) {
1178
  let depth = 0, inStr = false, esc = false;
1179
  for (let i = startIdx; i < str.length; i++) {
1180
  const c = str[i];
1181
  if (esc) { esc = false; continue; }
1182
- if (c === '\\') { esc = true; continue; }
1183
  if (c === '"') { inStr = !inStr; continue; }
1184
  if (inStr) continue;
1185
  if (c === openCh) depth++;
1186
  if (c === closeCh) {
1187
  depth--;
1188
  if (depth === 0) {
1189
- try { return JSON.parse(str.slice(startIdx, i + 1)); } catch(_) { return null; }
 
1190
  }
1191
  }
1192
  }
1193
  return null;
1194
  }
1195
 
1196
- // 4. Collect ALL array and object candidates from the text, prefer arrays
1197
- const arrayCandidates = [];
1198
- const objectCandidates = [];
1199
  for (let i = 0; i < text.length; i++) {
1200
- if (text[i] === '[') {
1201
- const r = extractBalanced(text, i, '[', ']');
1202
- if (r !== null) arrayCandidates.push(r);
1203
- }
1204
- if (text[i] === '{') {
1205
- const r = extractBalanced(text, i, '{', '}');
1206
- if (r !== null) objectCandidates.push(r);
1207
- }
1208
  }
1209
 
1210
- // 5. Pick the best grounding candidate:
1211
- // - Must contain at least one item with recognisable bbox or point keys
1212
- const BBOX_KEYS = ['bbox_2d','bbox','box','bounding_box','coordinates','xyxy','xywh'];
1213
- const POINT_KEYS = ['point_2d','point','center','centroid','coordinates','xy'];
1214
-
1215
- function looksLikeGrounding(obj) {
1216
- if (typeof obj !== 'object' || obj === null) return false;
1217
- const keys = Object.keys(obj).map(k => k.toLowerCase());
1218
- return (
1219
- BBOX_KEYS.some(k => keys.includes(k)) ||
1220
- POINT_KEYS.some(k => keys.includes(k)) ||
1221
- // plain [x1,y1,x2,y2] or [x,y] array of numbers
1222
- (Array.isArray(obj) && obj.length >= 2 && obj.every(n => typeof n === 'number'))
1223
- );
1224
- }
1225
-
1226
- // Check arrays of objects first
1227
- for (const arr of arrayCandidates) {
1228
- if (Array.isArray(arr)) {
1229
- // Array of numbers β†’ could be direct coords
1230
- if (arr.every(n => typeof n === 'number') && (arr.length === 2 || arr.length === 4)) return arr;
1231
- // Array of grounding objects
1232
- if (arr.length > 0 && arr.every(o => looksLikeGrounding(o))) return arr;
1233
  }
1234
  }
1235
- // Single object
1236
- for (const obj of objectCandidates) {
1237
- if (looksLikeGrounding(obj)) return obj;
 
 
 
 
1238
  }
1239
- // Fallback: return any array or object found
1240
- if (arrayCandidates.length) return arrayCandidates[0];
1241
- if (objectCandidates.length) return objectCandidates[0];
1242
-
1243
- // Last resort: direct parse
1244
- try { return JSON.parse(text); } catch(_) {}
1245
- return null;
1246
- }
1247
-
1248
- // ══════════════════════════════════════════════
1249
- // NORMALISE COORDS (FIXED)
1250
- // ══════════════════════════════════════════════
1251
- /**
1252
- * Decide whether coords are normalised [0..1] or absolute pixels.
1253
- * Rule: treat as normalised only when ALL values are strictly in (0,1).
1254
- * A value like 150 is clearly pixels; 0.45 is clearly normalised.
1255
- */
1256
- function isNormalised(coords, W, H) {
1257
- // If any value exceeds 1.0 it must be pixel coords
1258
- return coords.every(v => v >= 0 && v <= 1.0) && Math.max(...coords) <= 1.0;
1259
- }
1260
 
1261
- function toPixelBbox(bbox, W, H) {
1262
- let [x1, y1, x2, y2] = bbox.map(Number);
1263
- if (isNormalised([x1, y1, x2, y2], W, H)) { x1*=W; y1*=H; x2*=W; y2*=H; }
1264
- if (x2 < x1) [x1, x2] = [x2, x1];
1265
- if (y2 < y1) [y1, y2] = [y2, y1];
1266
- return [x1, y1, x2, y2];
1267
- }
1268
 
1269
- function toPixelPoint(pt, W, H) {
1270
- let [x, y] = pt.map(Number);
1271
- if (isNormalised([x, y], W, H)) { x*=W; y*=H; }
1272
- return [x, y];
1273
  }
1274
 
1275
  // ══════════════════════════════════════════════
1276
- // EXTRACT BBOX / POINT FROM OBJECT (FIXED)
 
1277
  // ══════════════════════════════════════════════
1278
- const BBOX_KEY_LIST = ['bbox_2d','bbox','box','bounding_box','coordinates','xyxy','xywh'];
1279
- const POINT_KEY_LIST = ['point_2d','point','center','centroid','xy'];
1280
-
1281
- function findBbox(item) {
1282
- if (!item || typeof item !== 'object') return null;
1283
- // Direct array of 4 numbers
1284
- if (Array.isArray(item) && item.length === 4 && item.every(n => typeof n === 'number')) return item;
1285
- const lc = {};
1286
- for (const [k, v] of Object.entries(item)) lc[k.toLowerCase()] = v;
1287
- for (const key of BBOX_KEY_LIST) {
1288
- if (lc[key] && Array.isArray(lc[key]) && lc[key].length === 4) return lc[key];
1289
- }
1290
- // xmin/ymin/xmax/ymax style
1291
- if ('xmin' in lc && 'ymin' in lc && 'xmax' in lc && 'ymax' in lc) {
1292
- return [lc.xmin, lc.ymin, lc.xmax, lc.ymax];
1293
- }
1294
- // x/y/width/height style
1295
- if ('x' in lc && 'y' in lc && 'width' in lc && 'height' in lc) {
1296
- return [lc.x, lc.y, lc.x + lc.width, lc.y + lc.height];
1297
  }
1298
- return null;
1299
- }
1300
 
1301
- function findPoint(item) {
1302
- if (!item || typeof item !== 'object') return null;
1303
- // Direct array of 2 numbers
1304
- if (Array.isArray(item) && item.length === 2 && item.every(n => typeof n === 'number')) return item;
1305
- const lc = {};
1306
- for (const [k, v] of Object.entries(item)) lc[k.toLowerCase()] = v;
1307
- for (const key of POINT_KEY_LIST) {
1308
- if (lc[key] && Array.isArray(lc[key]) && lc[key].length === 2) return lc[key];
1309
  }
1310
- if ('x' in lc && 'y' in lc) return [lc.x, lc.y];
1311
- return null;
1312
- }
1313
 
1314
- function getLabel(item, idx, taskCat) {
1315
- if (!item || typeof item !== 'object' || Array.isArray(item)) {
1316
- return taskCat === 'Point' ? `pt ${idx+1}` : `obj ${idx+1}`;
1317
- }
1318
- const lc = {};
1319
- for (const [k, v] of Object.entries(item)) lc[k.toLowerCase()] = v;
1320
- return (lc.label ?? lc.name ?? lc.class ?? lc.category ?? (taskCat==='Point' ? `pt ${idx+1}` : `obj ${idx+1}`)).toString();
1321
  }
1322
 
1323
  // ══════════════════════════════════════════════
1324
- // GROUNDING VISUALIZER (FIXED)
1325
  // ══════════════════════════════════════════════
1326
  const groundCanvas = document.getElementById('groundCanvas');
 
1327
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1328
  const gCtx = groundCanvas.getContext('2d');
1329
  const downloadBtn = document.getElementById('downloadBtn');
@@ -1334,12 +1312,15 @@ function hexToRgba(hex, alpha) {
1334
  const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
1335
  return `rgba(${r},${g},${b},${alpha})`;
1336
  }
1337
- function roundRect(ctx, x, y, w, h, r) {
1338
- ctx.beginPath(); ctx.moveTo(x+r,y);
1339
- ctx.lineTo(x+w-r,y); ctx.quadraticCurveTo(x+w,y,x+w,y+r);
1340
- ctx.lineTo(x+w,y+h-r); ctx.quadraticCurveTo(x+w,y+h,x+w-r,y+h);
1341
- ctx.lineTo(x+r,y+h); ctx.quadraticCurveTo(x,y+h,x,y+h-r);
1342
- ctx.lineTo(x,y+r); ctx.quadraticCurveTo(x,y,x+r,y); ctx.closePath();
 
 
 
1343
  }
1344
 
1345
  function updateDownloadBtn() {
@@ -1350,118 +1331,149 @@ function updateDownloadBtn() {
1350
  downloadBtn.style.display = 'flex';
1351
  }
1352
 
1353
- function drawGrounding(imgSrc, rawText, taskCat) {
1354
  const parsed = extractGroundingJSON(rawText);
 
1355
  if (!parsed) {
1356
- console.warn('Grounding: no JSON found. Raw text snippet:', rawText.slice(0, 300));
1357
- groundPlaceholder.textContent = 'No grounding data found in output.';
1358
  groundPlaceholder.style.display = 'flex';
1359
  return;
1360
  }
1361
 
1362
- console.log('Grounding parsed:', JSON.stringify(parsed).slice(0, 500));
1363
-
1364
  const img = new Image();
 
 
1365
  img.onload = () => {
1366
- const W = img.naturalWidth, H = img.naturalHeight;
 
 
 
1367
  groundCanvas.width = W;
1368
  groundCanvas.height = H;
1369
- gCtx.drawImage(img, 0, 0);
 
 
 
 
1370
  groundPlaceholder.style.display = 'none';
1371
 
1372
- const lw = Math.max(2, W / 200);
1373
- const fs = Math.max(12, W / 40);
1374
  gCtx.lineWidth = lw;
1375
- gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1376
 
1377
- // Normalise to always be an array
1378
  const items = Array.isArray(parsed) ? parsed : [parsed];
1379
-
1380
- // Check whether ALL items are plain numbers (flat coords array)
1381
- const allNumbers = items.every(v => typeof v === 'number');
1382
-
1383
- if (allNumbers) {
1384
- // Could be [x1,y1,x2,y2] or [x,y]
1385
- const col = PALETTE[0];
1386
- if (items.length === 4) {
1387
- const [x1,y1,x2,y2] = toPixelBbox(items, W, H);
1388
- drawBox(x1,y1,x2-x1,y2-y1,col,'obj 1',lw,fs);
1389
- } else if (items.length === 2) {
1390
- const [x,y] = toPixelPoint(items, W, H);
1391
- drawDot(x,y,col,'pt 1',lw,fs,W);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1392
  }
1393
- } else {
1394
- items.forEach((item, i) => {
1395
- const col = PALETTE[i % PALETTE.length];
1396
- const lbl = getLabel(item, i, taskCat);
1397
-
1398
- // Try bbox first
1399
- const bboxRaw = findBbox(item);
1400
- if (bboxRaw) {
1401
- const [x1,y1,x2,y2] = toPixelBbox(bboxRaw, W, H);
1402
- drawBox(x1, y1, x2-x1, y2-y1, col, lbl, lw, fs);
1403
- return;
1404
- }
1405
-
1406
- // Try point
1407
- const ptRaw = findPoint(item);
1408
- if (ptRaw) {
1409
- const [x,y] = toPixelPoint(ptRaw, W, H);
1410
- drawDot(x, y, col, lbl, lw, fs, W);
1411
- return;
1412
- }
1413
 
1414
- console.warn('Grounding item unrecognised:', item);
1415
- });
1416
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1417
 
1418
  updateDownloadBtn();
1419
  };
1420
- img.onerror = () => {
1421
- console.error('Grounding: failed to load image.');
 
1422
  groundPlaceholder.textContent = 'Failed to load image for overlay.';
1423
  groundPlaceholder.style.display = 'flex';
1424
  };
1425
- img.src = imgSrc;
1426
- }
1427
-
1428
- function drawBox(x1, y1, bw, bh, col, lbl, lw, fs) {
1429
- // Fill
1430
- gCtx.fillStyle = hexToRgba(col, 0.20);
1431
- gCtx.fillRect(x1, y1, bw, bh);
1432
- // Border
1433
- gCtx.strokeStyle = col;
1434
- gCtx.lineWidth = lw;
1435
- gCtx.strokeRect(x1, y1, bw, bh);
1436
- // Label pill
1437
- const tw = gCtx.measureText(lbl).width;
1438
- const ph = fs * 1.45, pw = tw + 12;
1439
- const lx = x1, ly = Math.max(0, y1 - ph);
1440
- gCtx.fillStyle = col;
1441
- roundRect(gCtx, lx, ly, pw, ph, 4);
1442
- gCtx.fill();
1443
- gCtx.fillStyle = '#fff';
1444
- gCtx.fillText(lbl, lx + 6, ly + ph * 0.76);
1445
- }
1446
 
1447
- function drawDot(x, y, col, lbl, lw, fs, W) {
1448
- const r = Math.max(8, W / 60);
1449
- // Outer glow ring
1450
- gCtx.beginPath();
1451
- gCtx.arc(x, y, r * 1.8, 0, Math.PI * 2);
1452
- gCtx.fillStyle = hexToRgba(col, 0.18);
1453
- gCtx.fill();
1454
- // Dot
1455
- gCtx.beginPath();
1456
- gCtx.arc(x, y, r, 0, Math.PI * 2);
1457
- gCtx.fillStyle = col;
1458
- gCtx.fill();
1459
- gCtx.strokeStyle = '#fff';
1460
- gCtx.lineWidth = Math.max(1.5, lw);
1461
- gCtx.stroke();
1462
- // Label
1463
- gCtx.fillStyle = '#fff';
1464
- gCtx.fillText(lbl, x + r + 5, y + fs * 0.4);
1465
  }
1466
 
1467
  // ══════════════════════════════════════════════
@@ -1513,64 +1525,105 @@ runBtn.onclick = async () => {
1513
  const promptStr = promptInput.value.trim();
1514
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
1515
 
1516
- runBtn.disabled=true; btnLoader.style.display='inline-block';
1517
- outputBox.innerText=''; outputBox.style.color='';
1518
- groundPlaceholder.style.display='flex';
1519
- groundPlaceholder.textContent='Active for Point / Detect tasks.\\nRun inference to visualise.';
1520
- gCtx.clearRect(0,0,groundCanvas.width,groundCanvas.height);
1521
- downloadBtn.style.display='none';
 
 
 
 
 
1522
  dotTask.classList.add('active');
1523
- dotOut.classList.remove('active'); dotGnd.classList.remove('active');
1524
- allWires.forEach(id=>document.getElementById(id)?.classList.add('active'));
 
1525
  resetCopyBtn();
1526
 
1527
- const formData=new FormData();
1528
  formData.append('image', currentFile);
1529
  formData.append('category', categorySelect.value);
1530
  formData.append('prompt', promptStr);
1531
  formData.append('model_id', modelSelect.value);
1532
 
1533
- let fullText='';
1534
- // Keep a stable object URL for the image
1535
  const imgObjectURL = URL.createObjectURL(currentFile);
1536
- const taskCat = categorySelect.value;
1537
 
1538
  try {
1539
- const response=await fetch('/api/run',{method:'POST',body:formData});
1540
- if (!response.ok) { const err=await response.json(); throw new Error(err.error||'Execution failed.'); }
 
 
 
 
 
 
 
1541
 
1542
- const reader=response.body.getReader(), decoder=new TextDecoder('utf-8');
1543
- let buffer='';
1544
  while (true) {
1545
- const {value,done}=await reader.read(); if(done)break;
1546
- buffer+=decoder.decode(value,{stream:true});
1547
- const lines=buffer.split('\\n\\n'); buffer=lines.pop();
 
 
 
1548
  for (const line of lines) {
1549
  if (!line.startsWith('data: ')) continue;
1550
- const payload=line.replace('data: ','');
1551
- if (payload==='[DONE]') break;
1552
  try {
1553
- const data=JSON.parse(payload);
1554
- if (data.chunk) { fullText+=data.chunk; outputBox.innerText=fullText; outputBox.scrollTop=outputBox.scrollHeight; }
1555
- } catch(_) {}
 
 
 
 
1556
  }
1557
  }
1558
 
1559
  dotOut.classList.add('active');
1560
 
1561
- // Grounding overlay for Point / Detect
1562
- if ((taskCat==='Point' || taskCat==='Detect') && fullText.trim()) {
1563
- dotGnd.classList.add('active');
1564
- // Small delay so the canvas wrapper has finished any layout reflow
1565
- setTimeout(() => drawGrounding(imgObjectURL, fullText, taskCat), 80);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566
  }
1567
 
1568
- } catch(err) {
1569
- outputBox.innerText=`[Error] ${err.message}`; outputBox.style.color='#ff6b6b';
 
 
 
1570
  } finally {
1571
- runBtn.disabled=false; btnLoader.style.display='none';
 
1572
  dotTask.classList.remove('active');
1573
- allWires.forEach(id=>document.getElementById(id)?.classList.remove('active'));
 
 
1574
  }
1575
  };
1576
  </script>
 
761
  .ground-header-row { display: flex; align-items: center; justify-content: space-between; flex-shrink: 0; }
762
  .ground-canvas-wrap {
763
  position: relative; flex: 1; border: 1px solid var(--node-border);
764
+ border-radius: 5px; overflow: hidden; background: #111; min-height: 0;
765
  }
766
  .ground-canvas-wrap canvas {
767
+ position: absolute; top: 0; left: 0;
768
  width: 100%; height: 100%;
769
  object-fit: contain; display: block;
770
+ image-rendering: auto;
771
  }
772
  .ground-placeholder {
773
  position: absolute; inset: 0; display: flex; align-items: center;
774
  justify-content: center; font-size: 11px; color: var(--muted); text-align: center; padding: 10px;
775
+ pointer-events: none; z-index: 5;
776
  }
777
  .loader {
778
  width: 11px; height: 11px; border: 2px solid rgba(255,255,255,0.3);
 
799
  .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
800
  .model-info-box { border-radius: 6px; padding: 9px; font-size: 10px; color: var(--muted); line-height: 1.55; flex-shrink: 0; }
801
  .canvas-footer { height: 36px; }
802
+
803
+ /* ── Debug panel ── */
804
+ #debugPanel {
805
+ position: fixed; bottom: 12px; right: 12px; z-index: 9999;
806
+ background: rgba(13,13,15,0.95); border: 1px solid var(--node-border);
807
+ border-radius: 7px; padding: 8px 12px; font-size: 10px; color: var(--muted);
808
+ max-width: 340px; display: none; backdrop-filter: blur(8px);
809
+ }
810
+ #debugPanel.visible { display: block; }
811
+ #debugPanel .dbg-title { color: var(--accent2); font-weight: 700; margin-bottom: 4px; }
812
+ #debugPanel pre { white-space: pre-wrap; word-break: break-all; max-height: 120px; overflow-y: auto; color: #a0a0c0; }
813
  </style>
814
  </head>
815
  <body>
 
821
  <span class="badge">10x Vision Models</span>
822
  </div>
823
 
824
+ <!-- Debug panel (toggle with D key) -->
825
+ <div id="debugPanel">
826
+ <div class="dbg-title">⬑ GROUNDING DEBUG</div>
827
+ <pre id="debugPre"></pre>
828
+ </div>
829
+
830
  <div id="canvas">
831
  <svg class="wires">
832
  <path id="wire-img-task" class="wire" />
 
980
  SAVE
981
  </a>
982
  </div>
983
+ <div class="ground-canvas-wrap" id="groundWrap">
984
  <canvas id="groundCanvas"></canvas>
985
  <div class="ground-placeholder" id="groundPlaceholder">
986
  Active for Point / Detect tasks.<br>Run inference to visualise.
 
1177
  categorySelect.onchange = e => { promptInput.placeholder = PLACEHOLDERS[e.target.value]||''; };
1178
 
1179
  // ══════════════════════════════════════════════
1180
+ // DEBUG PANEL (press D to toggle)
1181
+ // ══════════════════════════════════════════════
1182
+ const debugPanel = document.getElementById('debugPanel');
1183
+ const debugPre = document.getElementById('debugPre');
1184
+ let debugVisible = false;
1185
+ document.addEventListener('keydown', e => {
1186
+ if (e.key === 'd' || e.key === 'D') {
1187
+ debugVisible = !debugVisible;
1188
+ debugPanel.classList.toggle('visible', debugVisible);
1189
+ }
1190
+ });
1191
+ function dbg(msg) {
1192
+ debugPre.textContent = msg;
1193
+ console.log('[GROUNDING]', msg);
1194
+ }
1195
+
1196
+ // ══════════════════════════════════════════════
1197
+ // ROBUST JSON EXTRACTOR (handles all model output styles)
1198
  // ══════════════════════════════════════════════
1199
  function extractGroundingJSON(raw) {
1200
+ // Step 1: strip <think>…</think> blocks completely
1201
  let text = raw;
1202
+ for (let i = 0; i < 10; i++) {
1203
+ const next = text.replace(/<think>[\s\S]*?<\/think>/gi, '');
1204
+ if (next === text) break;
1205
+ text = next;
1206
  }
1207
 
1208
+ // Step 2: strip markdown fences
1209
+ text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '').trim();
1210
+
1211
+ dbg('Cleaned text (first 400):' + text.slice(0, 400));
1212
 
1213
+ // Step 3: Balanced bracket extractor
1214
  function extractBalanced(str, startIdx, openCh, closeCh) {
1215
  let depth = 0, inStr = false, esc = false;
1216
  for (let i = startIdx; i < str.length; i++) {
1217
  const c = str[i];
1218
  if (esc) { esc = false; continue; }
1219
+ if (c === '\\\\') { esc = true; continue; }
1220
  if (c === '"') { inStr = !inStr; continue; }
1221
  if (inStr) continue;
1222
  if (c === openCh) depth++;
1223
  if (c === closeCh) {
1224
  depth--;
1225
  if (depth === 0) {
1226
+ try { return JSON.parse(str.slice(startIdx, i + 1)); }
1227
+ catch (_) { return null; }
1228
  }
1229
  }
1230
  }
1231
  return null;
1232
  }
1233
 
1234
+ // Step 4: scan for ALL '[' positions, try each from last to first
1235
+ const bracketPositions = [];
1236
+ const bracePositions = [];
1237
  for (let i = 0; i < text.length; i++) {
1238
+ if (text[i] === '[') bracketPositions.push(i);
1239
+ if (text[i] === '{') bracePositions.push(i);
 
 
 
 
 
 
1240
  }
1241
 
1242
+ // Prefer arrays (most models return [{...}, {...}])
1243
+ for (let i = bracketPositions.length - 1; i >= 0; i--) {
1244
+ const r = extractBalanced(text, bracketPositions[i], '[', ']');
1245
+ if (r !== null && Array.isArray(r) && r.length > 0) {
1246
+ dbg('Found array at pos ' + bracketPositions[i] + ': ' + JSON.stringify(r).slice(0, 200));
1247
+ return r;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1248
  }
1249
  }
1250
+ // Try objects
1251
+ for (let i = bracePositions.length - 1; i >= 0; i--) {
1252
+ const r = extractBalanced(text, bracePositions[i], '{', '}');
1253
+ if (r !== null) {
1254
+ dbg('Found object at pos ' + bracePositions[i] + ': ' + JSON.stringify(r).slice(0, 200));
1255
+ return r;
1256
+ }
1257
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1258
 
1259
+ // Step 5: try whole-text parse
1260
+ try { return JSON.parse(text); } catch (_) {}
 
 
 
 
 
1261
 
1262
+ dbg('No JSON found. Raw tail: ' + text.slice(-300));
1263
+ return null;
 
 
1264
  }
1265
 
1266
  // ══════════════════════════════════════════════
1267
+ // COORDINATE NORMALISER
1268
+ // Handles: absolute pixels, 0-1 fractions, 0-1000 Qwen scale
1269
  // ══════════════════════════════════════════════
1270
+ function normaliseCoords(arr, W, H) {
1271
+ // arr is [x1,y1,x2,y2] or [x,y]
1272
+ const nums = arr.map(Number);
1273
+
1274
+ if (arr.length === 4) {
1275
+ let [x1,y1,x2,y2] = nums;
1276
+ // Qwen VL often uses 0-1000 normalised coords
1277
+ const maxVal = Math.max(x1, y1, x2, y2);
1278
+ if (maxVal <= 1.0 && maxVal > 0) {
1279
+ // 0-1 fraction
1280
+ return [x1*W, y1*H, x2*W, y2*H];
1281
+ } else if (maxVal <= 1000 && maxVal > 1) {
1282
+ // 0-1000 scale (Qwen VL convention)
1283
+ return [x1/1000*W, y1/1000*H, x2/1000*W, y2/1000*H];
1284
+ }
1285
+ // Already in pixels
1286
+ return [x1, y1, x2, y2];
 
 
1287
  }
 
 
1288
 
1289
+ if (arr.length === 2) {
1290
+ let [x, y] = nums;
1291
+ const maxVal = Math.max(x, y);
1292
+ if (maxVal <= 1.0 && maxVal > 0) return [x*W, y*H];
1293
+ if (maxVal <= 1000 && maxVal > 1) return [x/1000*W, y/1000*H];
1294
+ return [x, y];
 
 
1295
  }
 
 
 
1296
 
1297
+ return nums;
 
 
 
 
 
 
1298
  }
1299
 
1300
  // ══════════════════════════════════════════════
1301
+ // GROUNDING VISUALIZER
1302
  // ══════════════════════════════════════════════
1303
  const groundCanvas = document.getElementById('groundCanvas');
1304
+ const groundWrap = document.getElementById('groundWrap');
1305
  const groundPlaceholder = document.getElementById('groundPlaceholder');
1306
  const gCtx = groundCanvas.getContext('2d');
1307
  const downloadBtn = document.getElementById('downloadBtn');
 
1312
  const r=parseInt(hex.slice(1,3),16), g=parseInt(hex.slice(3,5),16), b=parseInt(hex.slice(5,7),16);
1313
  return `rgba(${r},${g},${b},${alpha})`;
1314
  }
1315
+ function drawRoundRect(ctx, x, y, w, h, r) {
1316
+ r = Math.min(r, w/2, h/2);
1317
+ ctx.beginPath();
1318
+ ctx.moveTo(x+r, y);
1319
+ ctx.lineTo(x+w-r, y); ctx.quadraticCurveTo(x+w, y, x+w, y+r);
1320
+ ctx.lineTo(x+w, y+h-r); ctx.quadraticCurveTo(x+w, y+h, x+w-r, y+h);
1321
+ ctx.lineTo(x+r, y+h); ctx.quadraticCurveTo(x, y+h, x, y+h-r);
1322
+ ctx.lineTo(x, y+r); ctx.quadraticCurveTo(x, y, x+r, y);
1323
+ ctx.closePath();
1324
  }
1325
 
1326
  function updateDownloadBtn() {
 
1331
  downloadBtn.style.display = 'flex';
1332
  }
1333
 
1334
+ function drawGrounding(imgSrc, rawText) {
1335
  const parsed = extractGroundingJSON(rawText);
1336
+
1337
  if (!parsed) {
1338
+ dbg('drawGrounding: no JSON parsed from output.');
1339
+ groundPlaceholder.textContent = 'No grounding coordinates found in model output.';
1340
  groundPlaceholder.style.display = 'flex';
1341
  return;
1342
  }
1343
 
 
 
1344
  const img = new Image();
1345
+ img.crossOrigin = 'anonymous';
1346
+
1347
  img.onload = () => {
1348
+ const W = img.naturalWidth || img.width || 512;
1349
+ const H = img.naturalHeight || img.height || 512;
1350
+
1351
+ // Set canvas to image natural size for crisp drawing
1352
  groundCanvas.width = W;
1353
  groundCanvas.height = H;
1354
+
1355
+ // Draw base image
1356
+ gCtx.drawImage(img, 0, 0, W, H);
1357
+
1358
+ // Hide placeholder β€” canvas is now populated
1359
  groundPlaceholder.style.display = 'none';
1360
 
1361
+ const lw = Math.max(2, W / 180);
1362
+ const fs = Math.max(11, Math.min(W / 35, 22));
1363
  gCtx.lineWidth = lw;
 
1364
 
 
1365
  const items = Array.isArray(parsed) ? parsed : [parsed];
1366
+ dbg('Drawing ' + items.length + ' item(s) on ' + W + 'x' + H);
1367
+
1368
+ items.forEach((item, i) => {
1369
+ const col = PALETTE[i % PALETTE.length];
1370
+
1371
+ // ── Try to extract bbox ───────────────────────
1372
+ let rawBbox = null;
1373
+ if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4) rawBbox = item.bbox_2d;
1374
+ else if (Array.isArray(item?.bbox) && item.bbox.length === 4) rawBbox = item.bbox;
1375
+ else if (Array.isArray(item?.box) && item.box.length === 4) rawBbox = item.box;
1376
+ // flat array of 4 numbers
1377
+ else if (Array.isArray(item) && item.length === 4 && item.every(v => typeof v === 'number'))
1378
+ rawBbox = item;
1379
+
1380
+ if (rawBbox) {
1381
+ let [x1, y1, x2, y2] = normaliseCoords(rawBbox, W, H);
1382
+ // Ensure x1<x2, y1<y2
1383
+ if (x2 < x1) [x1, x2] = [x2, x1];
1384
+ if (y2 < y1) [y1, y2] = [y2, y1];
1385
+ const bw = x2 - x1, bh = y2 - y1;
1386
+
1387
+ // Fill
1388
+ gCtx.fillStyle = hexToRgba(col, 0.18);
1389
+ gCtx.fillRect(x1, y1, bw, bh);
1390
+
1391
+ // Border
1392
+ gCtx.strokeStyle = col;
1393
+ gCtx.lineWidth = lw;
1394
+ gCtx.strokeRect(x1, y1, bw, bh);
1395
+
1396
+ // Corner accent marks
1397
+ const cLen = Math.min(bw, bh, 18);
1398
+ gCtx.lineWidth = lw * 1.8;
1399
+ [[x1,y1],[x2,y1],[x2,y2],[x1,y2]].forEach(([cx,cy]) => {
1400
+ const sx = cx === x1 ? 1 : -1, sy = cy === y1 ? 1 : -1;
1401
+ gCtx.beginPath();
1402
+ gCtx.moveTo(cx + sx*cLen, cy);
1403
+ gCtx.lineTo(cx, cy);
1404
+ gCtx.lineTo(cx, cy + sy*cLen);
1405
+ gCtx.strokeStyle = col;
1406
+ gCtx.stroke();
1407
+ });
1408
+ gCtx.lineWidth = lw;
1409
+
1410
+ // Label
1411
+ const lbl = (item?.label ?? item?.class_name ?? item?.name ?? `obj ${i+1}`).toString();
1412
+ gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1413
+ const tw = gCtx.measureText(lbl).width;
1414
+ const ph = fs * 1.5, pw = tw + 14;
1415
+ const lx = Math.max(0, Math.min(x1, W - pw));
1416
+ const ly = y1 - ph > 0 ? y1 - ph : y1 + 2;
1417
+ drawRoundRect(gCtx, lx, ly, pw, ph, 4);
1418
+ gCtx.fillStyle = col; gCtx.fill();
1419
+ gCtx.fillStyle = '#fff';
1420
+ gCtx.fillText(lbl, lx + 7, ly + ph * 0.74);
1421
+ return;
1422
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1423
 
1424
+ // ── Try to extract point ──────────────────────
1425
+ let rawPt = null;
1426
+ if (Array.isArray(item?.point_2d) && item.point_2d.length === 2) rawPt = item.point_2d;
1427
+ else if (Array.isArray(item?.point) && item.point.length === 2) rawPt = item.point;
1428
+ else if (Array.isArray(item?.coord) && item.coord.length === 2) rawPt = item.coord;
1429
+ else if (Array.isArray(item) && item.length === 2 && item.every(v => typeof v === 'number'))
1430
+ rawPt = item;
1431
+
1432
+ if (rawPt) {
1433
+ let [x, y] = normaliseCoords(rawPt, W, H);
1434
+ const r = Math.max(7, Math.min(W / 55, 18));
1435
+ const lbl = (item?.label ?? item?.name ?? `pt ${i+1}`).toString();
1436
+
1437
+ // Outer glow ring
1438
+ gCtx.beginPath(); gCtx.arc(x, y, r * 2.2, 0, Math.PI*2);
1439
+ gCtx.fillStyle = hexToRgba(col, 0.15); gCtx.fill();
1440
+
1441
+ // Middle ring
1442
+ gCtx.beginPath(); gCtx.arc(x, y, r * 1.4, 0, Math.PI*2);
1443
+ gCtx.fillStyle = hexToRgba(col, 0.25); gCtx.fill();
1444
+
1445
+ // Core dot
1446
+ gCtx.beginPath(); gCtx.arc(x, y, r, 0, Math.PI*2);
1447
+ gCtx.fillStyle = col; gCtx.fill();
1448
+ gCtx.strokeStyle = '#fff'; gCtx.lineWidth = Math.max(1.5, lw); gCtx.stroke();
1449
+
1450
+ // Centre dot
1451
+ gCtx.beginPath(); gCtx.arc(x, y, r * 0.3, 0, Math.PI*2);
1452
+ gCtx.fillStyle = '#fff'; gCtx.fill();
1453
+
1454
+ // Label
1455
+ gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1456
+ const tw = gCtx.measureText(lbl).width;
1457
+ const ph = fs * 1.45, pw = tw + 12;
1458
+ const lx = Math.min(x + r + 6, W - pw);
1459
+ const ly = Math.max(0, y - ph/2);
1460
+ drawRoundRect(gCtx, lx, ly, pw, ph, 4);
1461
+ gCtx.fillStyle = col; gCtx.fill();
1462
+ gCtx.fillStyle = '#fff';
1463
+ gCtx.fillText(lbl, lx + 6, ly + ph * 0.74);
1464
+ }
1465
+ });
1466
 
1467
  updateDownloadBtn();
1468
  };
1469
+
1470
+ img.onerror = (e) => {
1471
+ dbg('Image load error: ' + e);
1472
  groundPlaceholder.textContent = 'Failed to load image for overlay.';
1473
  groundPlaceholder.style.display = 'flex';
1474
  };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1475
 
1476
+ img.src = imgSrc;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1477
  }
1478
 
1479
  // ══════════════════════════════════════════════
 
1525
  const promptStr = promptInput.value.trim();
1526
  if (!promptStr) { alert('Please enter a prompt directive.'); return; }
1527
 
1528
+ // Reset UI
1529
+ runBtn.disabled = true;
1530
+ btnLoader.style.display = 'inline-block';
1531
+ outputBox.innerText = '';
1532
+ outputBox.style.color = '';
1533
+ groundPlaceholder.style.display = 'flex';
1534
+ groundPlaceholder.textContent = 'Running inference…';
1535
+ gCtx.clearRect(0, 0, groundCanvas.width, groundCanvas.height);
1536
+ groundCanvas.width = 1; // reset canvas
1537
+ groundCanvas.height = 1;
1538
+ downloadBtn.style.display = 'none';
1539
  dotTask.classList.add('active');
1540
+ dotOut.classList.remove('active');
1541
+ dotGnd.classList.remove('active');
1542
+ allWires.forEach(id => document.getElementById(id)?.classList.add('active'));
1543
  resetCopyBtn();
1544
 
1545
+ const formData = new FormData();
1546
  formData.append('image', currentFile);
1547
  formData.append('category', categorySelect.value);
1548
  formData.append('prompt', promptStr);
1549
  formData.append('model_id', modelSelect.value);
1550
 
1551
+ let fullText = '';
1552
+ // Create a stable object URL for this run
1553
  const imgObjectURL = URL.createObjectURL(currentFile);
 
1554
 
1555
  try {
1556
+ const response = await fetch('/api/run', { method: 'POST', body: formData });
1557
+ if (!response.ok) {
1558
+ const err = await response.json();
1559
+ throw new Error(err.error || 'Execution failed.');
1560
+ }
1561
+
1562
+ const reader = response.body.getReader();
1563
+ const decoder = new TextDecoder('utf-8');
1564
+ let buffer = '';
1565
 
 
 
1566
  while (true) {
1567
+ const { value, done } = await reader.read();
1568
+ if (done) break;
1569
+ buffer += decoder.decode(value, { stream: true });
1570
+ const lines = buffer.split('\\n\\n');
1571
+ buffer = lines.pop(); // keep incomplete chunk
1572
+
1573
  for (const line of lines) {
1574
  if (!line.startsWith('data: ')) continue;
1575
+ const payload = line.slice(6); // remove 'data: '
1576
+ if (payload === '[DONE]') break;
1577
  try {
1578
+ const data = JSON.parse(payload);
1579
+ if (data.chunk) {
1580
+ fullText += data.chunk;
1581
+ outputBox.innerText = fullText;
1582
+ outputBox.scrollTop = outputBox.scrollHeight;
1583
+ }
1584
+ } catch (_) {}
1585
  }
1586
  }
1587
 
1588
  dotOut.classList.add('active');
1589
 
1590
+ // ── Grounding overlay ─────────────────────────
1591
+ const cat = categorySelect.value;
1592
+ if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
1593
+ groundPlaceholder.textContent = 'Parsing coordinates…';
1594
+ groundPlaceholder.style.display = 'flex';
1595
+
1596
+ // Small delay so the UI updates before heavy canvas work
1597
+ setTimeout(() => {
1598
+ const parsed = extractGroundingJSON(fullText);
1599
+ if (parsed !== null) {
1600
+ dotGnd.classList.add('active');
1601
+ drawGrounding(imgObjectURL, fullText);
1602
+ } else {
1603
+ groundPlaceholder.textContent =
1604
+ 'No grounding JSON detected in model output. ' +
1605
+ 'Try rephrasing your prompt or use a VL model.';
1606
+ groundPlaceholder.style.display = 'flex';
1607
+ dbg('No JSON found. Full output: ' + fullText.slice(0, 500));
1608
+ }
1609
+ }, 50);
1610
+ } else if (cat !== 'Point' && cat !== 'Detect') {
1611
+ groundPlaceholder.textContent = 'Active for Point / Detect tasks. Run inference to visualise.';
1612
+ groundPlaceholder.style.display = 'flex';
1613
  }
1614
 
1615
+ } catch (err) {
1616
+ outputBox.innerText = `[Error] ${err.message}`;
1617
+ outputBox.style.color = '#ff6b6b';
1618
+ groundPlaceholder.textContent = 'Inference error β€” see Output Stream node.';
1619
+ groundPlaceholder.style.display = 'flex';
1620
  } finally {
1621
+ runBtn.disabled = false;
1622
+ btnLoader.style.display = 'none';
1623
  dotTask.classList.remove('active');
1624
+ allWires.forEach(id => document.getElementById(id)?.classList.remove('active'));
1625
+ // Revoke object URL after a delay to allow canvas drawing
1626
+ setTimeout(() => URL.revokeObjectURL(imgObjectURL), 10000);
1627
  }
1628
  };
1629
  </script>