Spaces:
Running on Zero
Running on Zero
update app
Browse files
app.py
CHANGED
|
@@ -36,15 +36,29 @@ DTYPE = (
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
|
|
@@ -201,7 +215,7 @@ def safe_parse_json(text: str):
|
|
| 201 |
# --- Inference Generator (Streaming) ---
|
| 202 |
@spaces.GPU(duration=120)
|
| 203 |
def generate_inference_stream(
|
| 204 |
-
image: Image.Image, category: str, prompt: str, model_id: str = "
|
| 205 |
):
|
| 206 |
if category == "Query":
|
| 207 |
full_prompt = prompt
|
|
@@ -214,8 +228,39 @@ def generate_inference_stream(
|
|
| 214 |
else:
|
| 215 |
full_prompt = prompt
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββ
|
| 218 |
-
|
| 219 |
if qwen_4b_model is None or qwen_4b_processor is None:
|
| 220 |
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
|
| 221 |
yield "data: [DONE]\n\n"
|
|
@@ -504,7 +549,7 @@ async def run_inference(
|
|
| 504 |
image: UploadFile = File(...),
|
| 505 |
category: str = Form(...),
|
| 506 |
prompt: str = Form(...),
|
| 507 |
-
model_id: str = Form("
|
| 508 |
):
|
| 509 |
try:
|
| 510 |
img_bytes = await image.read()
|
|
@@ -882,15 +927,16 @@ async def homepage(request: Request):
|
|
| 882 |
border-radius: 4px; font-size: 9px; font-weight: 700;
|
| 883 |
letter-spacing: 0.06em; text-transform: uppercase;
|
| 884 |
}
|
| 885 |
-
.model-badge.
|
| 886 |
-
.model-badge.
|
| 887 |
-
.model-badge.
|
| 888 |
-
.model-badge.
|
| 889 |
-
.model-badge.
|
| 890 |
-
.model-badge.
|
| 891 |
-
.model-badge.
|
| 892 |
-
.model-badge.
|
| 893 |
-
.model-badge.
|
|
|
|
| 894 |
|
| 895 |
.model-info-box {
|
| 896 |
border-radius: 6px; padding: 9px;
|
|
@@ -907,7 +953,7 @@ async def homepage(request: Request):
|
|
| 907 |
<span class="logo">MULTIMODAL EDGE</span>
|
| 908 |
<span class="sep">|</span>
|
| 909 |
<span class="sub">Node-Based Inference Canvas</span>
|
| 910 |
-
<span class="badge">v2.
|
| 911 |
</div>
|
| 912 |
|
| 913 |
<div id="canvas">
|
|
@@ -969,6 +1015,7 @@ async def homepage(request: Request):
|
|
| 969 |
<div>
|
| 970 |
<label>Active Model</label>
|
| 971 |
<select id="modelSelect">
|
|
|
|
| 972 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 973 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
| 974 |
<option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
|
|
@@ -981,10 +1028,10 @@ async def homepage(request: Request):
|
|
| 981 |
</select>
|
| 982 |
</div>
|
| 983 |
<div id="modelInfoBox" class="model-info-box"
|
| 984 |
-
style="background:rgba(255,
|
| 985 |
-
<span class="model-badge
|
| 986 |
-
Qwen3.5
|
| 987 |
-
|
| 988 |
</div>
|
| 989 |
<div style="flex:1;"></div>
|
| 990 |
</div>
|
|
@@ -1195,6 +1242,13 @@ const dotModel = document.getElementById('dot-model');
|
|
| 1195 |
dotModel.classList.add('active');
|
| 1196 |
|
| 1197 |
const MODEL_INFO = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1198 |
qwen_4b: {
|
| 1199 |
html: `<span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
|
| 1200 |
Qwen3.5 4B multimodal model by Alibaba Cloud.
|
|
@@ -1285,22 +1339,21 @@ categorySelect.onchange = e => {
|
|
| 1285 |
|
| 1286 |
// ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1287 |
// ROBUST JSON EXTRACTOR
|
| 1288 |
-
// Strips <think>β¦</think> blocks
|
| 1289 |
// the first JSON array or object from the text.
|
| 1290 |
// ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1291 |
function extractGroundingJSON(raw) {
|
| 1292 |
-
// 1. Remove <think>β¦</think> blocks
|
| 1293 |
let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
| 1294 |
|
| 1295 |
-
// 2. Strip markdown code fences
|
| 1296 |
text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '');
|
| 1297 |
|
| 1298 |
text = text.trim();
|
| 1299 |
|
| 1300 |
-
// 3. Try
|
| 1301 |
const arrIdx = text.indexOf('[');
|
| 1302 |
if (arrIdx !== -1) {
|
| 1303 |
-
// Walk forward to find the matching closing bracket
|
| 1304 |
let depth = 0, inStr = false, esc = false;
|
| 1305 |
for (let i = arrIdx; i < text.length; i++) {
|
| 1306 |
const c = text[i];
|
|
@@ -1318,7 +1371,7 @@ function extractGroundingJSON(raw) {
|
|
| 1318 |
}
|
| 1319 |
}
|
| 1320 |
|
| 1321 |
-
// 4. Try
|
| 1322 |
const objIdx = text.indexOf('{');
|
| 1323 |
if (objIdx !== -1) {
|
| 1324 |
let depth = 0, inStr = false, esc = false;
|
|
@@ -1338,9 +1391,8 @@ function extractGroundingJSON(raw) {
|
|
| 1338 |
}
|
| 1339 |
}
|
| 1340 |
|
| 1341 |
-
// 5. Last resort
|
| 1342 |
try { return JSON.parse(text); } catch(_) {}
|
| 1343 |
-
|
| 1344 |
return null;
|
| 1345 |
}
|
| 1346 |
|
|
@@ -1374,7 +1426,6 @@ function roundRect(ctx, x, y, w, h, r) {
|
|
| 1374 |
}
|
| 1375 |
|
| 1376 |
function drawGrounding(imgSrc, rawText) {
|
| 1377 |
-
// ββ Extract JSON from raw model output (handles <think> blocks etc.) ββ
|
| 1378 |
const parsed = extractGroundingJSON(rawText);
|
| 1379 |
if (!parsed) {
|
| 1380 |
console.warn('Grounding: could not extract JSON from output:', rawText);
|
|
@@ -1394,14 +1445,12 @@ function drawGrounding(imgSrc, rawText) {
|
|
| 1394 |
gCtx.lineWidth = lw;
|
| 1395 |
gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
|
| 1396 |
|
| 1397 |
-
// Normalise to array
|
| 1398 |
const items = Array.isArray(parsed) ? parsed : [parsed];
|
| 1399 |
|
| 1400 |
items.forEach((item, i) => {
|
| 1401 |
const col = PALETTE[i % PALETTE.length];
|
| 1402 |
|
| 1403 |
-
// ββ
|
| 1404 |
-
// Accept bbox_2d, bbox, or a raw 4-number array
|
| 1405 |
let bbox = null;
|
| 1406 |
if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
|
| 1407 |
bbox = item.bbox_2d;
|
|
@@ -1413,23 +1462,17 @@ function drawGrounding(imgSrc, rawText) {
|
|
| 1413 |
|
| 1414 |
if (bbox) {
|
| 1415 |
let [x1, y1, x2, y2] = bbox.map(Number);
|
| 1416 |
-
|
| 1417 |
-
// Normalised 0-1 coords β pixel coords
|
| 1418 |
if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
|
| 1419 |
x1 *= W; y1 *= H; x2 *= W; y2 *= H;
|
| 1420 |
}
|
| 1421 |
-
|
| 1422 |
-
const bw = x2 - x1;
|
| 1423 |
-
const bh = y2 - y1;
|
| 1424 |
const lbl = item?.label ?? `obj ${i + 1}`;
|
| 1425 |
|
| 1426 |
-
// Filled rect + stroke
|
| 1427 |
gCtx.fillStyle = hexToRgba(col, 0.18);
|
| 1428 |
gCtx.fillRect(x1, y1, bw, bh);
|
| 1429 |
gCtx.strokeStyle = col;
|
| 1430 |
gCtx.strokeRect(x1, y1, bw, bh);
|
| 1431 |
|
| 1432 |
-
// Label pill above the box
|
| 1433 |
const tw = gCtx.measureText(lbl).width;
|
| 1434 |
const ph = fs * 1.4, pw = tw + 10;
|
| 1435 |
const lx = x1, ly = Math.max(0, y1 - ph);
|
|
@@ -1441,8 +1484,7 @@ function drawGrounding(imgSrc, rawText) {
|
|
| 1441 |
return;
|
| 1442 |
}
|
| 1443 |
|
| 1444 |
-
// ββ Point
|
| 1445 |
-
// Accept point_2d, point, or a raw 2-number array
|
| 1446 |
let pt = null;
|
| 1447 |
if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
|
| 1448 |
pt = item.point_2d;
|
|
@@ -1454,20 +1496,15 @@ function drawGrounding(imgSrc, rawText) {
|
|
| 1454 |
|
| 1455 |
if (pt) {
|
| 1456 |
let [x, y] = pt.map(Number);
|
| 1457 |
-
|
| 1458 |
-
// Normalised 0-1 coords β pixel coords
|
| 1459 |
if (x <= 1 && y <= 1) { x *= W; y *= H; }
|
| 1460 |
-
|
| 1461 |
const r = Math.max(8, W / 60);
|
| 1462 |
const lbl = item?.label ?? `pt ${i + 1}`;
|
| 1463 |
|
| 1464 |
-
// Outer glow ring
|
| 1465 |
gCtx.beginPath();
|
| 1466 |
gCtx.arc(x, y, r * 1.7, 0, Math.PI * 2);
|
| 1467 |
gCtx.fillStyle = hexToRgba(col, 0.15);
|
| 1468 |
gCtx.fill();
|
| 1469 |
|
| 1470 |
-
// Solid dot
|
| 1471 |
gCtx.beginPath();
|
| 1472 |
gCtx.arc(x, y, r, 0, Math.PI * 2);
|
| 1473 |
gCtx.fillStyle = col;
|
|
@@ -1475,7 +1512,6 @@ function drawGrounding(imgSrc, rawText) {
|
|
| 1475 |
gCtx.strokeStyle = '#fff';
|
| 1476 |
gCtx.stroke();
|
| 1477 |
|
| 1478 |
-
// Label to the right of the dot
|
| 1479 |
gCtx.fillStyle = '#fff';
|
| 1480 |
gCtx.fillText(lbl, x + r + 4, y + fs * 0.4);
|
| 1481 |
}
|
|
@@ -1514,7 +1550,6 @@ copyBtn.onclick = () => {
|
|
| 1514 |
</svg> COPY`;
|
| 1515 |
}, 2000);
|
| 1516 |
}).catch(() => {
|
| 1517 |
-
// Fallback for older browsers
|
| 1518 |
const ta = document.createElement('textarea');
|
| 1519 |
ta.value = txt;
|
| 1520 |
ta.style.position = 'fixed'; ta.style.opacity = '0';
|
|
@@ -1603,7 +1638,7 @@ runBtn.onclick = async () => {
|
|
| 1603 |
|
| 1604 |
dotOut.classList.add('active');
|
| 1605 |
|
| 1606 |
-
//
|
| 1607 |
const cat = categorySelect.value;
|
| 1608 |
if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
|
| 1609 |
const parsed = extractGroundingJSON(fullText);
|
|
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
| 39 |
+
QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
|
| 40 |
+
QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
|
| 41 |
+
QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
|
| 42 |
+
QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
|
| 43 |
+
QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
|
| 44 |
+
LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
|
| 45 |
+
GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
|
| 46 |
+
LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
|
| 47 |
+
QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
|
| 48 |
+
QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 49 |
+
|
| 50 |
+
# ββ Qwen3.5-4B-Unredacted-MAX βββββββββββββββββββββββββββ
|
| 51 |
+
print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
|
| 52 |
+
try:
|
| 53 |
+
qwen_4b_unredacted_model = Qwen3_5ForConditionalGeneration.from_pretrained(
|
| 54 |
+
QWEN_4B_UNREDACTED_NAME, torch_dtype=DTYPE, device_map=DEVICE,
|
| 55 |
+
).eval()
|
| 56 |
+
qwen_4b_unredacted_processor = AutoProcessor.from_pretrained(QWEN_4B_UNREDACTED_NAME)
|
| 57 |
+
print("Qwen3.5-4B-Unredacted-MAX model loaded successfully.")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Warning: Qwen3.5-4B-Unredacted-MAX model loading failed. Error: {e}")
|
| 60 |
+
qwen_4b_unredacted_model = None
|
| 61 |
+
qwen_4b_unredacted_processor = None
|
| 62 |
|
| 63 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββββββ
|
| 64 |
print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
|
|
|
|
| 215 |
# --- Inference Generator (Streaming) ---
|
| 216 |
@spaces.GPU(duration=120)
|
| 217 |
def generate_inference_stream(
|
| 218 |
+
image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b_unredacted"
|
| 219 |
):
|
| 220 |
if category == "Query":
|
| 221 |
full_prompt = prompt
|
|
|
|
| 228 |
else:
|
| 229 |
full_prompt = prompt
|
| 230 |
|
| 231 |
+
# ββ Qwen3.5-4B-Unredacted-MAX βββββββββββββββββββββββ
|
| 232 |
+
if model_id == "qwen_4b_unredacted":
|
| 233 |
+
if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
|
| 234 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
|
| 235 |
+
yield "data: [DONE]\n\n"
|
| 236 |
+
return
|
| 237 |
+
messages = [{"role": "user", "content": [
|
| 238 |
+
{"type": "image", "image": image},
|
| 239 |
+
{"type": "text", "text": full_prompt},
|
| 240 |
+
]}]
|
| 241 |
+
text_input = qwen_4b_unredacted_processor.apply_chat_template(
|
| 242 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 243 |
+
)
|
| 244 |
+
inputs = qwen_4b_unredacted_processor(
|
| 245 |
+
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 246 |
+
).to(qwen_4b_unredacted_model.device)
|
| 247 |
+
streamer = TextIteratorStreamer(
|
| 248 |
+
qwen_4b_unredacted_processor.tokenizer,
|
| 249 |
+
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 250 |
+
)
|
| 251 |
+
thread = threading.Thread(
|
| 252 |
+
target=qwen_4b_unredacted_model.generate,
|
| 253 |
+
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 254 |
+
use_cache=True, temperature=1.5, min_p=0.1),
|
| 255 |
+
)
|
| 256 |
+
thread.start()
|
| 257 |
+
for tok in streamer:
|
| 258 |
+
if tok:
|
| 259 |
+
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 260 |
+
thread.join()
|
| 261 |
+
|
| 262 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββ
|
| 263 |
+
elif model_id == "qwen_4b":
|
| 264 |
if qwen_4b_model is None or qwen_4b_processor is None:
|
| 265 |
yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
|
| 266 |
yield "data: [DONE]\n\n"
|
|
|
|
| 549 |
image: UploadFile = File(...),
|
| 550 |
category: str = Form(...),
|
| 551 |
prompt: str = Form(...),
|
| 552 |
+
model_id: str = Form("qwen_4b_unredacted"),
|
| 553 |
):
|
| 554 |
try:
|
| 555 |
img_bytes = await image.read()
|
|
|
|
| 927 |
border-radius: 4px; font-size: 9px; font-weight: 700;
|
| 928 |
letter-spacing: 0.06em; text-transform: uppercase;
|
| 929 |
}
|
| 930 |
+
.model-badge.q4bunred { background: rgba(255,80,80,0.18); color: #ff5050; border: 1px solid rgba(255,80,80,0.40); }
|
| 931 |
+
.model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
|
| 932 |
+
.model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
|
| 933 |
+
.model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
|
| 934 |
+
.model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
|
| 935 |
+
.model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
|
| 936 |
+
.model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
|
| 937 |
+
.model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
|
| 938 |
+
.model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
|
| 939 |
+
.model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
|
| 940 |
|
| 941 |
.model-info-box {
|
| 942 |
border-radius: 6px; padding: 9px;
|
|
|
|
| 953 |
<span class="logo">MULTIMODAL EDGE</span>
|
| 954 |
<span class="sep">|</span>
|
| 955 |
<span class="sub">Node-Based Inference Canvas</span>
|
| 956 |
+
<span class="badge">v2.8 β DECA MODEL</span>
|
| 957 |
</div>
|
| 958 |
|
| 959 |
<div id="canvas">
|
|
|
|
| 1015 |
<div>
|
| 1016 |
<label>Active Model</label>
|
| 1017 |
<select id="modelSelect">
|
| 1018 |
+
<option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
|
| 1019 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 1020 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
| 1021 |
<option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
|
|
|
|
| 1028 |
</select>
|
| 1029 |
</div>
|
| 1030 |
<div id="modelInfoBox" class="model-info-box"
|
| 1031 |
+
style="background:rgba(255,80,80,0.07);border:1px solid rgba(255,80,80,0.3);">
|
| 1032 |
+
<span class="model-badge q4bunred">QWEN 3.5 Β· 4B UNREDACTED MAX</span><br><br>
|
| 1033 |
+
Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
|
| 1034 |
+
with extended instruction-following & unrestricted reasoning.
|
| 1035 |
</div>
|
| 1036 |
<div style="flex:1;"></div>
|
| 1037 |
</div>
|
|
|
|
| 1242 |
dotModel.classList.add('active');
|
| 1243 |
|
| 1244 |
const MODEL_INFO = {
|
| 1245 |
+
qwen_4b_unredacted: {
|
| 1246 |
+
html: `<span class="model-badge q4bunred">QWEN 3.5 Β· 4B UNREDACTED MAX</span><br><br>
|
| 1247 |
+
Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
|
| 1248 |
+
with extended instruction-following & unrestricted reasoning.`,
|
| 1249 |
+
bg: 'rgba(255,80,80,0.07)',
|
| 1250 |
+
border: 'rgba(255,80,80,0.30)',
|
| 1251 |
+
},
|
| 1252 |
qwen_4b: {
|
| 1253 |
html: `<span class="model-badge q4b">QWEN 3.5 Β· 4B</span><br><br>
|
| 1254 |
Qwen3.5 4B multimodal model by Alibaba Cloud.
|
|
|
|
| 1339 |
|
| 1340 |
// ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1341 |
// ROBUST JSON EXTRACTOR
|
| 1342 |
+
// Strips <think>β¦</think> blocks then pulls
|
| 1343 |
// the first JSON array or object from the text.
|
| 1344 |
// ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1345 |
function extractGroundingJSON(raw) {
|
| 1346 |
+
// 1. Remove <think>β¦</think> blocks
|
| 1347 |
let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, '');
|
| 1348 |
|
| 1349 |
+
// 2. Strip markdown code fences
|
| 1350 |
text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '');
|
| 1351 |
|
| 1352 |
text = text.trim();
|
| 1353 |
|
| 1354 |
+
// 3. Try JSON array first [ β¦ ]
|
| 1355 |
const arrIdx = text.indexOf('[');
|
| 1356 |
if (arrIdx !== -1) {
|
|
|
|
| 1357 |
let depth = 0, inStr = false, esc = false;
|
| 1358 |
for (let i = arrIdx; i < text.length; i++) {
|
| 1359 |
const c = text[i];
|
|
|
|
| 1371 |
}
|
| 1372 |
}
|
| 1373 |
|
| 1374 |
+
// 4. Try JSON object { β¦ }
|
| 1375 |
const objIdx = text.indexOf('{');
|
| 1376 |
if (objIdx !== -1) {
|
| 1377 |
let depth = 0, inStr = false, esc = false;
|
|
|
|
| 1391 |
}
|
| 1392 |
}
|
| 1393 |
|
| 1394 |
+
// 5. Last resort
|
| 1395 |
try { return JSON.parse(text); } catch(_) {}
|
|
|
|
| 1396 |
return null;
|
| 1397 |
}
|
| 1398 |
|
|
|
|
| 1426 |
}
|
| 1427 |
|
| 1428 |
function drawGrounding(imgSrc, rawText) {
|
|
|
|
| 1429 |
const parsed = extractGroundingJSON(rawText);
|
| 1430 |
if (!parsed) {
|
| 1431 |
console.warn('Grounding: could not extract JSON from output:', rawText);
|
|
|
|
| 1445 |
gCtx.lineWidth = lw;
|
| 1446 |
gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
|
| 1447 |
|
|
|
|
| 1448 |
const items = Array.isArray(parsed) ? parsed : [parsed];
|
| 1449 |
|
| 1450 |
items.forEach((item, i) => {
|
| 1451 |
const col = PALETTE[i % PALETTE.length];
|
| 1452 |
|
| 1453 |
+
// ββ Bounding box βββββββββββββββββββββββββββββ
|
|
|
|
| 1454 |
let bbox = null;
|
| 1455 |
if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
|
| 1456 |
bbox = item.bbox_2d;
|
|
|
|
| 1462 |
|
| 1463 |
if (bbox) {
|
| 1464 |
let [x1, y1, x2, y2] = bbox.map(Number);
|
|
|
|
|
|
|
| 1465 |
if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
|
| 1466 |
x1 *= W; y1 *= H; x2 *= W; y2 *= H;
|
| 1467 |
}
|
| 1468 |
+
const bw = x2 - x1, bh = y2 - y1;
|
|
|
|
|
|
|
| 1469 |
const lbl = item?.label ?? `obj ${i + 1}`;
|
| 1470 |
|
|
|
|
| 1471 |
gCtx.fillStyle = hexToRgba(col, 0.18);
|
| 1472 |
gCtx.fillRect(x1, y1, bw, bh);
|
| 1473 |
gCtx.strokeStyle = col;
|
| 1474 |
gCtx.strokeRect(x1, y1, bw, bh);
|
| 1475 |
|
|
|
|
| 1476 |
const tw = gCtx.measureText(lbl).width;
|
| 1477 |
const ph = fs * 1.4, pw = tw + 10;
|
| 1478 |
const lx = x1, ly = Math.max(0, y1 - ph);
|
|
|
|
| 1484 |
return;
|
| 1485 |
}
|
| 1486 |
|
| 1487 |
+
// ββ Point ββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 1488 |
let pt = null;
|
| 1489 |
if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
|
| 1490 |
pt = item.point_2d;
|
|
|
|
| 1496 |
|
| 1497 |
if (pt) {
|
| 1498 |
let [x, y] = pt.map(Number);
|
|
|
|
|
|
|
| 1499 |
if (x <= 1 && y <= 1) { x *= W; y *= H; }
|
|
|
|
| 1500 |
const r = Math.max(8, W / 60);
|
| 1501 |
const lbl = item?.label ?? `pt ${i + 1}`;
|
| 1502 |
|
|
|
|
| 1503 |
gCtx.beginPath();
|
| 1504 |
gCtx.arc(x, y, r * 1.7, 0, Math.PI * 2);
|
| 1505 |
gCtx.fillStyle = hexToRgba(col, 0.15);
|
| 1506 |
gCtx.fill();
|
| 1507 |
|
|
|
|
| 1508 |
gCtx.beginPath();
|
| 1509 |
gCtx.arc(x, y, r, 0, Math.PI * 2);
|
| 1510 |
gCtx.fillStyle = col;
|
|
|
|
| 1512 |
gCtx.strokeStyle = '#fff';
|
| 1513 |
gCtx.stroke();
|
| 1514 |
|
|
|
|
| 1515 |
gCtx.fillStyle = '#fff';
|
| 1516 |
gCtx.fillText(lbl, x + r + 4, y + fs * 0.4);
|
| 1517 |
}
|
|
|
|
| 1550 |
</svg> COPY`;
|
| 1551 |
}, 2000);
|
| 1552 |
}).catch(() => {
|
|
|
|
| 1553 |
const ta = document.createElement('textarea');
|
| 1554 |
ta.value = txt;
|
| 1555 |
ta.style.position = 'fixed'; ta.style.opacity = '0';
|
|
|
|
| 1638 |
|
| 1639 |
dotOut.classList.add('active');
|
| 1640 |
|
| 1641 |
+
// Attempt grounding overlay for Point / Detect
|
| 1642 |
const cat = categorySelect.value;
|
| 1643 |
if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
|
| 1644 |
const parsed = extractGroundingJSON(fullText);
|