prithivMLmods commited on
Commit
94a2a1e
Β·
verified Β·
1 Parent(s): ee59a4d

update app

Browse files
Files changed (1) hide show
  1. app.py +174 -124
app.py CHANGED
@@ -16,12 +16,14 @@ from gradio import Server
16
  from fastapi import Request, UploadFile, File, Form
17
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
18
  from transformers import (
 
19
  Qwen3_5ForConditionalGeneration,
20
  Qwen3VLForConditionalGeneration,
21
  AutoProcessor,
22
  AutoModelForImageTextToText,
23
  TextIteratorStreamer,
24
  )
 
25
 
26
  # --- App Configuration & Initialization ---
27
  app = Server()
@@ -39,6 +41,7 @@ QWEN_VL_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
39
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
40
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
41
  QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
 
42
 
43
  # ── Qwen3.5-4B ──────────────────────────────────────────
44
  print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
@@ -126,6 +129,21 @@ except Exception as e:
126
  qwen_unredacted_model = None
127
  qwen_unredacted_processor = None
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  # --- Utility Functions ---
131
  def safe_parse_json(text: str):
@@ -339,6 +357,42 @@ def generate_inference_stream(
339
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
340
  thread.join()
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  yield "data: [DONE]\n\n"
343
 
344
 
@@ -405,7 +459,7 @@ async def homepage(request: Request):
405
  radial-gradient(circle at 80% 20%, rgba(78,205,196,0.04) 0%, transparent 50%),
406
  linear-gradient(var(--grid) 1px, transparent 1px),
407
  linear-gradient(90deg, var(--grid) 1px, transparent 1px);
408
- background-size: 100% 100%, 100% 100%, 28px 28px, 28px 28px;
409
  overflow-x: auto;
410
  overflow-y: auto;
411
  }
@@ -413,30 +467,30 @@ async def homepage(request: Request):
413
  /* ── Top Bar ── */
414
  .top-bar {
415
  position: sticky; top: 0; left: 0; right: 0;
416
- height: 50px;
417
  background: rgba(13,13,15,0.95);
418
  border-bottom: 1px solid var(--node-border);
419
- display: flex; align-items: center; padding: 0 28px;
420
- gap: 16px; z-index: 1000;
421
  backdrop-filter: blur(12px);
422
  }
423
- .top-bar .logo { font-size: 15px; font-weight: 700; color: var(--accent); letter-spacing: 0.05em; }
424
- .top-bar .sep { color: var(--node-border); font-size: 15px; }
425
- .top-bar .sub { font-size: 13px; color: var(--muted); }
426
  .top-bar .badge {
427
  margin-left: auto;
428
  background: rgba(124,106,247,0.15);
429
  border: 1px solid rgba(124,106,247,0.3);
430
- padding: 4px 14px; border-radius: 20px;
431
- font-size: 11px; color: var(--accent);
432
  }
433
 
434
  /* ── Canvas ── */
435
  #canvas {
436
  position: relative;
437
- width: 1560px;
438
- min-height: calc(100vh - 50px);
439
- height: 1080px;
440
  margin: 0 auto;
441
  }
442
 
@@ -460,26 +514,27 @@ async def homepage(request: Request):
460
  /* ── Nodes ── */
461
  .node {
462
  position: absolute;
463
- width: 340px;
464
  background: var(--node-bg);
465
  border: 1px solid var(--node-border);
466
- border-radius: 11px;
467
- box-shadow: 0 10px 34px rgba(0,0,0,0.5);
468
  z-index: 10;
469
  display: flex; flex-direction: column;
470
  transition: box-shadow 0.2s;
471
  }
472
  .node:hover {
473
- box-shadow: 0 10px 34px rgba(0,0,0,0.5), 0 0 0 1px rgba(124,106,247,0.3);
 
474
  }
475
- .node.fixed-height { height: 420px; }
476
 
477
  .node-header {
478
  background: var(--node-header);
479
- padding: 9px 14px;
480
  border-bottom: 1px solid var(--node-border);
481
- border-radius: 11px 11px 0 0;
482
- font-size: 13px; font-weight: 700;
483
  cursor: grab;
484
  display: flex; justify-content: space-between; align-items: center;
485
  flex-shrink: 0;
@@ -487,32 +542,32 @@ async def homepage(request: Request):
487
  }
488
  .node-header:active { cursor: grabbing; }
489
  .node-header .id {
490
- font-size: 11px; color: var(--muted);
491
  background: rgba(255,255,255,0.04);
492
- padding: 3px 9px; border-radius: 5px;
493
  }
494
 
495
  .node-body {
496
- padding: 13px;
497
- display: flex; flex-direction: column; gap: 10px;
498
  flex: 1; overflow: hidden;
499
  }
500
 
501
  /* ── Ports ── */
502
  .port {
503
  position: absolute;
504
- width: 13px; height: 13px;
505
  background: var(--node-bg);
506
  border: 2px solid var(--port);
507
  border-radius: 50%; z-index: 30;
508
  }
509
- .port.out { right: -7px; }
510
- .port.in { left: -7px; }
511
 
512
  /* ── Labels ── */
513
  label {
514
- font-size: 11px; color: var(--muted);
515
- font-weight: 600; display: block; margin-bottom: 4px;
516
  letter-spacing: 0.07em; text-transform: uppercase;
517
  }
518
 
@@ -526,12 +581,12 @@ async def homepage(request: Request):
526
 
527
  .file-upload {
528
  border: 1.5px dashed var(--node-border);
529
- border-radius: 8px; padding: 18px 12px;
530
  text-align: center; cursor: pointer;
531
- font-size: 12px; color: var(--muted);
532
  transition: border-color 0.2s, background 0.2s;
533
  background: rgba(255,255,255,0.01);
534
- display: flex; flex-direction: column; align-items: center; gap: 6px;
535
  }
536
  .file-upload:hover {
537
  border-color: var(--accent);
@@ -544,7 +599,7 @@ async def homepage(request: Request):
544
  .preview-wrap {
545
  display: none;
546
  position: relative;
547
- border-radius: 8px;
548
  overflow: hidden;
549
  border: 1px solid var(--node-border);
550
  background: #000;
@@ -553,16 +608,16 @@ async def homepage(request: Request):
553
 
554
  .img-preview {
555
  width: 100%;
556
- height: 210px;
557
  object-fit: contain;
558
  display: block;
559
  }
560
 
561
- /* ── Clear button overlaid on preview ── */
562
  .clear-btn {
563
  position: absolute;
564
- top: 7px; right: 7px;
565
- width: 28px; height: 28px;
566
  border-radius: 50%;
567
  background: rgba(13,13,15,0.82);
568
  border: 1px solid var(--node-border);
@@ -574,44 +629,44 @@ async def homepage(request: Request):
574
  backdrop-filter: blur(6px);
575
  }
576
  .clear-btn:hover {
577
- background: rgba(255,107,107,0.18);
578
  border-color: var(--accent3);
579
  transform: scale(1.08);
580
  }
581
- .clear-btn:active { transform: scale(0.96); }
582
  .clear-btn svg { pointer-events: none; }
583
 
584
- /* ── Image filename chip ── */
585
  .img-chip {
586
  display: none;
587
- align-items: center; gap: 7px;
588
  background: rgba(124,106,247,0.08);
589
  border: 1px solid rgba(124,106,247,0.22);
590
- border-radius: 6px;
591
- padding: 5px 10px;
592
- font-size: 10px; color: var(--muted);
593
  overflow: hidden;
594
  }
595
  .img-chip.visible { display: flex; }
596
  .img-chip .chip-dot {
597
- width: 6px; height: 6px; border-radius: 50%;
598
  background: var(--accent2); flex-shrink: 0;
599
- box-shadow: 0 0 5px var(--accent2);
600
  }
601
  .img-chip .chip-name {
602
  overflow: hidden; text-overflow: ellipsis;
603
  white-space: nowrap; flex: 1;
604
- color: var(--text);
605
  }
606
- .img-chip .chip-size { color: var(--muted); flex-shrink: 0; }
607
 
608
  select, textarea {
609
  width: 100%;
610
  background: rgba(0,0,0,0.3);
611
  border: 1px solid var(--node-border);
612
- color: var(--text); padding: 8px 11px;
613
- border-radius: 6px; outline: none;
614
- font-size: 12px; font-family: 'JetBrains Mono', monospace;
615
  resize: none; transition: border-color 0.2s;
616
  }
617
  select:focus, textarea:focus { border-color: var(--accent); }
@@ -620,13 +675,13 @@ async def homepage(request: Request):
620
  button.run-btn {
621
  background: linear-gradient(135deg, var(--accent), #9b59b6);
622
  color: #fff; border: none;
623
- padding: 10px; border-radius: 7px;
624
- font-weight: 700; font-size: 13px;
625
  font-family: 'JetBrains Mono', monospace;
626
  cursor: pointer;
627
  transition: opacity 0.2s, transform 0.1s;
628
  display: flex; justify-content: center; align-items: center; gap: 10px;
629
- letter-spacing: 0.04em; flex-shrink: 0;
630
  }
631
  button.run-btn:hover { opacity: 0.9; }
632
  button.run-btn:active { transform: scale(0.98); }
@@ -637,9 +692,9 @@ async def homepage(request: Request):
637
  .output-box {
638
  background: rgba(0,0,0,0.4);
639
  border: 1px solid var(--node-border);
640
- border-radius: 6px; padding: 12px;
641
  flex: 1; overflow-y: auto;
642
- font-size: 12px; line-height: 1.65;
643
  color: #c8c8e0; white-space: pre-wrap;
644
  user-select: text;
645
  font-family: 'JetBrains Mono', monospace;
@@ -649,7 +704,7 @@ async def homepage(request: Request):
649
  .ground-canvas-wrap {
650
  position: relative; flex: 1;
651
  border: 1px solid var(--node-border);
652
- border-radius: 6px; overflow: hidden;
653
  background: #000; min-height: 0;
654
  }
655
  .ground-canvas-wrap canvas {
@@ -659,11 +714,11 @@ async def homepage(request: Request):
659
  .ground-placeholder {
660
  position: absolute; inset: 0;
661
  display: flex; align-items: center; justify-content: center;
662
- font-size: 12px; color: var(--muted); text-align: center; padding: 12px;
663
  }
664
 
665
  .loader {
666
- width: 13px; height: 13px;
667
  border: 2px solid rgba(255,255,255,0.3);
668
  border-top-color: #fff; border-radius: 50%;
669
  animation: spin 0.7s linear infinite;
@@ -672,34 +727,35 @@ async def homepage(request: Request):
672
  @keyframes spin { to { transform: rotate(360deg); } }
673
 
674
  .status-dot {
675
- width: 7px; height: 7px; border-radius: 50%;
676
- background: var(--muted); display: inline-block; margin-right: 7px;
677
  }
678
  .status-dot.active {
679
  background: var(--accent2);
680
- box-shadow: 0 0 6px var(--accent2);
681
  }
682
 
683
  /* ── Model badges ── */
684
  .model-badge {
685
- display: inline-block; padding: 3px 9px;
686
- border-radius: 5px; font-size: 10px; font-weight: 700;
687
- letter-spacing: 0.06em; text-transform: uppercase;
688
  }
689
- .model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
690
- .model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
691
- .model-badge.qvl { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
692
- .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
693
- .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
694
- .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
 
695
 
696
  .model-info-box {
697
- border-radius: 7px; padding: 11px;
698
- font-size: 11px; color: var(--muted); line-height: 1.6;
699
  flex-shrink: 0;
700
  }
701
 
702
- .canvas-footer { height: 44px; }
703
  </style>
704
  </head>
705
  <body>
@@ -708,7 +764,7 @@ async def homepage(request: Request):
708
  <span class="logo">MULTIMODAL EDGE</span>
709
  <span class="sep">|</span>
710
  <span class="sub">Node-Based Inference Canvas</span>
711
- <span class="badge">v2.4 β€” HEXA MODEL</span>
712
  </div>
713
 
714
  <div id="canvas">
@@ -720,7 +776,7 @@ async def homepage(request: Request):
720
  </svg>
721
 
722
  <!-- ─── ID 01 : Image Input ─── -->
723
- <div class="node fixed-height" id="node-img" style="left:48px; top:60px;">
724
  <div class="node-header">
725
  <span><span class="status-dot" id="dot-img"></span>Input Image</span>
726
  <span class="id">ID: 01</span>
@@ -729,9 +785,9 @@ async def homepage(request: Request):
729
  <div>
730
  <label>Upload Image</label>
731
 
732
- <!-- Drop zone (shown when no image) -->
733
  <div class="file-upload" id="dropZone">
734
- <svg width="34" height="34" viewBox="0 0 24 24" fill="none"
735
  stroke="#7c6af7" stroke-width="1.5"
736
  stroke-linecap="round" stroke-linejoin="round">
737
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
@@ -742,12 +798,11 @@ async def homepage(request: Request):
742
  <input type="file" id="fileInput" accept="image/*">
743
  </div>
744
 
745
- <!-- Preview (shown when image loaded) -->
746
  <div class="preview-wrap" id="previewWrap">
747
  <img id="imgPreview" class="img-preview" />
748
- <!-- Clear button -->
749
  <button class="clear-btn" id="clearBtn" title="Remove image">
750
- <svg width="14" height="14" viewBox="0 0 24 24" fill="none"
751
  stroke="currentColor" stroke-width="2.5"
752
  stroke-linecap="round" stroke-linejoin="round">
753
  <line x1="18" y1="6" x2="6" y2="18"/>
@@ -757,7 +812,7 @@ async def homepage(request: Request):
757
  </div>
758
 
759
  <!-- Filename chip -->
760
- <div class="img-chip" id="imgChip" style="margin-top:7px;">
761
  <span class="chip-dot"></span>
762
  <span class="chip-name" id="chipName">β€”</span>
763
  <span class="chip-size" id="chipSize"></span>
@@ -768,7 +823,7 @@ async def homepage(request: Request):
768
  </div>
769
 
770
  <!-- ─── ID 02 : Model Selector ─── -->
771
- <div class="node fixed-height" id="node-model" style="left:48px; top:502px;">
772
  <div class="node-header">
773
  <span><span class="status-dot" id="dot-model"></span>Model Selector</span>
774
  <span class="id">ID: 02</span>
@@ -783,6 +838,7 @@ async def homepage(request: Request):
783
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
784
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
785
  <option value="qwen_unredacted">Qwen3.5-2B-Unredacted-MAX</option>
 
786
  </select>
787
  </div>
788
  <div id="modelInfoBox" class="model-info-box"
@@ -797,7 +853,7 @@ async def homepage(request: Request):
797
  </div>
798
 
799
  <!-- ─── ID 03 : Task Config ─── -->
800
- <div class="node fixed-height" id="node-task" style="left:490px; top:60px;">
801
  <div class="port in" id="port-task-in" style="top:50%;transform:translateY(-50%);"></div>
802
  <div class="node-header">
803
  <span><span class="status-dot" id="dot-task"></span>Task Config</span>
@@ -815,7 +871,7 @@ async def homepage(request: Request):
815
  </div>
816
  <div>
817
  <label>Prompt Directive</label>
818
- <textarea id="promptInput" rows="5"
819
  placeholder="e.g., Count the total number of boats and describe the environment."></textarea>
820
  </div>
821
  <button class="run-btn" id="runBtn">
@@ -827,7 +883,7 @@ async def homepage(request: Request):
827
  </div>
828
 
829
  <!-- ─── ID 04 : Output Stream ─── -->
830
- <div class="node fixed-height" id="node-out" style="left:932px; top:60px;">
831
  <div class="port in" id="port-out-in" style="top:50%;transform:translateY(-50%);"></div>
832
  <div class="node-header">
833
  <span><span class="status-dot" id="dot-out"></span>Output Stream</span>
@@ -840,7 +896,7 @@ async def homepage(request: Request):
840
  </div>
841
 
842
  <!-- ─── ID 05 : Grounding Visualiser ─── -->
843
- <div class="node fixed-height" id="node-gnd" style="left:932px; top:502px;">
844
  <div class="port in" id="port-gnd-in" style="top:50%;transform:translateY(-50%);"></div>
845
  <div class="node-header">
846
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
@@ -884,10 +940,10 @@ function bezier(p1, p2) {
884
 
885
  function updateWires() {
886
  const wires = [
887
- ['wire-img-task', 'port-img-out', 'port-task-in'],
888
- ['wire-model-task', 'port-model-out', 'port-task-in'],
889
- ['wire-task-out', 'port-task-out', 'port-out-in'],
890
- ['wire-task-gnd', 'port-task-out', 'port-gnd-in'],
891
  ];
892
  for (const [id, from, to] of wires) {
893
  const el = document.getElementById(id);
@@ -928,64 +984,51 @@ requestAnimationFrame(updateWires);
928
  // ══════════════════════════════════════════════
929
  let currentFile = null;
930
 
931
- const dropZone = document.getElementById('dropZone');
932
- const fileInput = document.getElementById('fileInput');
933
- const previewWrap= document.getElementById('previewWrap');
934
- const imgPreview = document.getElementById('imgPreview');
935
- const clearBtn = document.getElementById('clearBtn');
936
- const imgChip = document.getElementById('imgChip');
937
- const chipName = document.getElementById('chipName');
938
- const chipSize = document.getElementById('chipSize');
939
- const dotImg = document.getElementById('dot-img');
940
 
941
  function formatBytes(bytes) {
942
  if (bytes < 1024) return bytes + ' B';
943
- if (bytes < 1024*1024) return (bytes/1024).toFixed(1) + ' KB';
944
- return (bytes/(1024*1024)).toFixed(1) + ' MB';
945
  }
946
 
947
  function handleFile(file) {
948
  if (!file || !file.type.startsWith('image/')) return;
949
  currentFile = file;
950
-
951
- // preview
952
  imgPreview.src = URL.createObjectURL(file);
953
  previewWrap.classList.add('visible');
954
  dropZone.style.display = 'none';
955
-
956
- // chip
957
  chipName.textContent = file.name;
958
  chipSize.textContent = formatBytes(file.size);
959
  imgChip.classList.add('visible');
960
-
961
  dotImg.classList.add('active');
962
  requestAnimationFrame(updateWires);
963
  }
964
 
965
  function clearImage() {
966
  currentFile = null;
967
-
968
- // reset preview
969
  imgPreview.src = '';
970
  previewWrap.classList.remove('visible');
971
  dropZone.style.display = '';
972
-
973
- // reset chip
974
  imgChip.classList.remove('visible');
975
  chipName.textContent = 'β€”';
976
  chipSize.textContent = '';
977
-
978
- // reset file input so same file can be re-selected
979
  fileInput.value = '';
980
-
981
  dotImg.classList.remove('active');
982
  requestAnimationFrame(updateWires);
983
  }
984
 
985
  dropZone.onclick = () => fileInput.click();
986
  fileInput.onchange = e => handleFile(e.target.files[0]);
987
- clearBtn.onclick = (e) => { e.stopPropagation(); clearImage(); };
988
-
989
  dropZone.ondragover = e => { e.preventDefault(); dropZone.style.borderColor = 'var(--accent)'; };
990
  dropZone.ondragleave = () => { dropZone.style.borderColor = ''; };
991
  dropZone.ondrop = e => {
@@ -1044,14 +1087,21 @@ const MODEL_INFO = {
1044
  bg: 'rgba(255,80,160,0.07)',
1045
  border: 'rgba(255,80,160,0.25)',
1046
  },
 
 
 
 
 
 
 
1047
  };
1048
 
1049
  modelSelect.onchange = () => {
1050
  const info = MODEL_INFO[modelSelect.value];
1051
  if (!info) return;
1052
  modelInfoBox.innerHTML = info.html;
1053
- modelInfoBox.style.background = info.bg;
1054
- modelInfoBox.style.border = `1px solid ${info.border}`;
1055
  };
1056
 
1057
  // ══════════════════════════════════════════════
@@ -1127,7 +1177,7 @@ function drawGrounding(imgSrc, jsonText) {
1127
  groundPlaceholder.style.display = 'none';
1128
 
1129
  const lw = Math.max(2, W/200);
1130
- const fs = Math.max(12, W/40);
1131
  gCtx.lineWidth = lw;
1132
  gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1133
 
@@ -1157,12 +1207,12 @@ function drawGrounding(imgSrc, jsonText) {
1157
  gCtx.strokeRect(x1, y1, bw, bh);
1158
 
1159
  const tw = gCtx.measureText(lbl).width;
1160
- const ph = fs*1.4, pw = tw+10;
1161
  const lx = x1, ly = Math.max(0, y1-ph);
1162
  gCtx.fillStyle = col;
1163
- roundRect(gCtx, lx, ly, pw, ph, 4); gCtx.fill();
1164
  gCtx.fillStyle = '#fff';
1165
- gCtx.fillText(lbl, lx+5, ly+ph*0.76);
1166
  return;
1167
  }
1168
 
@@ -1176,7 +1226,7 @@ function drawGrounding(imgSrc, jsonText) {
1176
  if (pt) {
1177
  let [x,y] = pt;
1178
  if (x <= 1 && y <= 1) { x*=W; y*=H; }
1179
- const r = Math.max(8, W/60);
1180
  const lbl = item?.label || `${i+1}`;
1181
 
1182
  gCtx.beginPath();
@@ -1189,7 +1239,7 @@ function drawGrounding(imgSrc, jsonText) {
1189
  gCtx.strokeStyle = '#fff'; gCtx.stroke();
1190
 
1191
  gCtx.fillStyle = '#fff';
1192
- gCtx.fillText(lbl, x+r+4, y+fs*0.4);
1193
  }
1194
  });
1195
  };
 
16
  from fastapi import Request, UploadFile, File, Form
17
  from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
18
  from transformers import (
19
+ Qwen2_5_VLForConditionalGeneration,
20
  Qwen3_5ForConditionalGeneration,
21
  Qwen3VLForConditionalGeneration,
22
  AutoProcessor,
23
  AutoModelForImageTextToText,
24
  TextIteratorStreamer,
25
  )
26
+ from qwen_vl_utils import process_vision_info
27
 
28
  # --- App Configuration & Initialization ---
29
  app = Server()
 
41
  LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
42
  LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
43
  QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
44
+ QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
45
 
46
  # ── Qwen3.5-4B ──────────────────────────────────────────
47
  print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
 
129
  qwen_unredacted_model = None
130
  qwen_unredacted_processor = None
131
 
132
+ # ── Qwen2.5-VL-3B-Instruct ──────────────────────────────
133
+ print(f"Loading Qwen2.5-VL-3B-Instruct model: {QWEN25_VL_3B_NAME} on {DEVICE}...")
134
+ try:
135
+ qwen25_vl_3b_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
136
+ QWEN25_VL_3B_NAME,
137
+ torch_dtype="auto",
138
+ device_map="auto",
139
+ ).eval()
140
+ qwen25_vl_3b_processor = AutoProcessor.from_pretrained(QWEN25_VL_3B_NAME)
141
+ print("Qwen2.5-VL-3B-Instruct model loaded successfully.")
142
+ except Exception as e:
143
+ print(f"Warning: Qwen2.5-VL-3B-Instruct model loading failed. Error: {e}")
144
+ qwen25_vl_3b_model = None
145
+ qwen25_vl_3b_processor = None
146
+
147
 
148
  # --- Utility Functions ---
149
  def safe_parse_json(text: str):
 
357
  yield f"data: {json.dumps({'chunk': tok})}\n\n"
358
  thread.join()
359
 
360
+ # ── Qwen2.5-VL-3B-Instruct ──────────────────────────
361
+ elif model_id == "qwen25_vl_3b":
362
+ if qwen25_vl_3b_model is None or qwen25_vl_3b_processor is None:
363
+ yield f"data: {json.dumps({'chunk': '[Error] Qwen2.5-VL-3B-Instruct model not loaded.'})}\n\n"
364
+ yield "data: [DONE]\n\n"
365
+ return
366
+ messages = [{"role": "user", "content": [
367
+ {"type": "image", "image": image},
368
+ {"type": "text", "text": full_prompt},
369
+ ]}]
370
+ text_input = qwen25_vl_3b_processor.apply_chat_template(
371
+ messages, tokenize=False, add_generation_prompt=True
372
+ )
373
+ image_inputs, video_inputs = process_vision_info(messages)
374
+ inputs = qwen25_vl_3b_processor(
375
+ text=[text_input],
376
+ images=image_inputs,
377
+ videos=video_inputs,
378
+ return_tensors="pt",
379
+ padding=True,
380
+ ).to(qwen25_vl_3b_model.device)
381
+ streamer = TextIteratorStreamer(
382
+ qwen25_vl_3b_processor.tokenizer,
383
+ skip_prompt=True, skip_special_tokens=True, timeout=120,
384
+ )
385
+ thread = threading.Thread(
386
+ target=qwen25_vl_3b_model.generate,
387
+ kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
388
+ use_cache=True, temperature=1.0, do_sample=True),
389
+ )
390
+ thread.start()
391
+ for tok in streamer:
392
+ if tok:
393
+ yield f"data: {json.dumps({'chunk': tok})}\n\n"
394
+ thread.join()
395
+
396
  yield "data: [DONE]\n\n"
397
 
398
 
 
459
  radial-gradient(circle at 80% 20%, rgba(78,205,196,0.04) 0%, transparent 50%),
460
  linear-gradient(var(--grid) 1px, transparent 1px),
461
  linear-gradient(90deg, var(--grid) 1px, transparent 1px);
462
+ background-size: 100% 100%, 100% 100%, 32px 32px, 32px 32px;
463
  overflow-x: auto;
464
  overflow-y: auto;
465
  }
 
467
  /* ── Top Bar ── */
468
  .top-bar {
469
  position: sticky; top: 0; left: 0; right: 0;
470
+ height: 56px;
471
  background: rgba(13,13,15,0.95);
472
  border-bottom: 1px solid var(--node-border);
473
+ display: flex; align-items: center; padding: 0 36px;
474
+ gap: 18px; z-index: 1000;
475
  backdrop-filter: blur(12px);
476
  }
477
+ .top-bar .logo { font-size: 17px; font-weight: 700; color: var(--accent); letter-spacing: 0.06em; }
478
+ .top-bar .sep { color: var(--node-border); font-size: 17px; }
479
+ .top-bar .sub { font-size: 14px; color: var(--muted); }
480
  .top-bar .badge {
481
  margin-left: auto;
482
  background: rgba(124,106,247,0.15);
483
  border: 1px solid rgba(124,106,247,0.3);
484
+ padding: 5px 16px; border-radius: 20px;
485
+ font-size: 12px; color: var(--accent);
486
  }
487
 
488
  /* ── Canvas ── */
489
  #canvas {
490
  position: relative;
491
+ width: 1760px;
492
+ min-height: calc(100vh - 56px);
493
+ height: 1180px;
494
  margin: 0 auto;
495
  }
496
 
 
514
  /* ── Nodes ── */
515
  .node {
516
  position: absolute;
517
+ width: 380px;
518
  background: var(--node-bg);
519
  border: 1px solid var(--node-border);
520
+ border-radius: 13px;
521
+ box-shadow: 0 12px 38px rgba(0,0,0,0.55);
522
  z-index: 10;
523
  display: flex; flex-direction: column;
524
  transition: box-shadow 0.2s;
525
  }
526
  .node:hover {
527
+ box-shadow: 0 12px 38px rgba(0,0,0,0.55),
528
+ 0 0 0 1px rgba(124,106,247,0.35);
529
  }
530
+ .node.fixed-height { height: 470px; }
531
 
532
  .node-header {
533
  background: var(--node-header);
534
+ padding: 11px 16px;
535
  border-bottom: 1px solid var(--node-border);
536
+ border-radius: 13px 13px 0 0;
537
+ font-size: 14px; font-weight: 700;
538
  cursor: grab;
539
  display: flex; justify-content: space-between; align-items: center;
540
  flex-shrink: 0;
 
542
  }
543
  .node-header:active { cursor: grabbing; }
544
  .node-header .id {
545
+ font-size: 12px; color: var(--muted);
546
  background: rgba(255,255,255,0.04);
547
+ padding: 3px 10px; border-radius: 6px;
548
  }
549
 
550
  .node-body {
551
+ padding: 15px;
552
+ display: flex; flex-direction: column; gap: 12px;
553
  flex: 1; overflow: hidden;
554
  }
555
 
556
  /* ── Ports ── */
557
  .port {
558
  position: absolute;
559
+ width: 15px; height: 15px;
560
  background: var(--node-bg);
561
  border: 2px solid var(--port);
562
  border-radius: 50%; z-index: 30;
563
  }
564
+ .port.out { right: -8px; }
565
+ .port.in { left: -8px; }
566
 
567
  /* ── Labels ── */
568
  label {
569
+ font-size: 12px; color: var(--muted);
570
+ font-weight: 600; display: block; margin-bottom: 5px;
571
  letter-spacing: 0.07em; text-transform: uppercase;
572
  }
573
 
 
581
 
582
  .file-upload {
583
  border: 1.5px dashed var(--node-border);
584
+ border-radius: 10px; padding: 22px 14px;
585
  text-align: center; cursor: pointer;
586
+ font-size: 13px; color: var(--muted);
587
  transition: border-color 0.2s, background 0.2s;
588
  background: rgba(255,255,255,0.01);
589
+ display: flex; flex-direction: column; align-items: center; gap: 8px;
590
  }
591
  .file-upload:hover {
592
  border-color: var(--accent);
 
599
  .preview-wrap {
600
  display: none;
601
  position: relative;
602
+ border-radius: 10px;
603
  overflow: hidden;
604
  border: 1px solid var(--node-border);
605
  background: #000;
 
608
 
609
  .img-preview {
610
  width: 100%;
611
+ height: 235px;
612
  object-fit: contain;
613
  display: block;
614
  }
615
 
616
+ /* ── Clear button ── */
617
  .clear-btn {
618
  position: absolute;
619
+ top: 8px; right: 8px;
620
+ width: 32px; height: 32px;
621
  border-radius: 50%;
622
  background: rgba(13,13,15,0.82);
623
  border: 1px solid var(--node-border);
 
629
  backdrop-filter: blur(6px);
630
  }
631
  .clear-btn:hover {
632
+ background: rgba(255,107,107,0.20);
633
  border-color: var(--accent3);
634
  transform: scale(1.08);
635
  }
636
+ .clear-btn:active { transform: scale(0.95); }
637
  .clear-btn svg { pointer-events: none; }
638
 
639
+ /* ── Filename chip ── */
640
  .img-chip {
641
  display: none;
642
+ align-items: center; gap: 8px;
643
  background: rgba(124,106,247,0.08);
644
  border: 1px solid rgba(124,106,247,0.22);
645
+ border-radius: 7px;
646
+ padding: 6px 12px;
647
+ font-size: 11px; color: var(--muted);
648
  overflow: hidden;
649
  }
650
  .img-chip.visible { display: flex; }
651
  .img-chip .chip-dot {
652
+ width: 7px; height: 7px; border-radius: 50%;
653
  background: var(--accent2); flex-shrink: 0;
654
+ box-shadow: 0 0 6px var(--accent2);
655
  }
656
  .img-chip .chip-name {
657
  overflow: hidden; text-overflow: ellipsis;
658
  white-space: nowrap; flex: 1;
659
+ color: var(--text); font-size: 11px;
660
  }
661
+ .img-chip .chip-size { color: var(--muted); flex-shrink: 0; font-size: 11px; }
662
 
663
  select, textarea {
664
  width: 100%;
665
  background: rgba(0,0,0,0.3);
666
  border: 1px solid var(--node-border);
667
+ color: var(--text); padding: 9px 13px;
668
+ border-radius: 7px; outline: none;
669
+ font-size: 13px; font-family: 'JetBrains Mono', monospace;
670
  resize: none; transition: border-color 0.2s;
671
  }
672
  select:focus, textarea:focus { border-color: var(--accent); }
 
675
  button.run-btn {
676
  background: linear-gradient(135deg, var(--accent), #9b59b6);
677
  color: #fff; border: none;
678
+ padding: 12px; border-radius: 8px;
679
+ font-weight: 700; font-size: 14px;
680
  font-family: 'JetBrains Mono', monospace;
681
  cursor: pointer;
682
  transition: opacity 0.2s, transform 0.1s;
683
  display: flex; justify-content: center; align-items: center; gap: 10px;
684
+ letter-spacing: 0.05em; flex-shrink: 0;
685
  }
686
  button.run-btn:hover { opacity: 0.9; }
687
  button.run-btn:active { transform: scale(0.98); }
 
692
  .output-box {
693
  background: rgba(0,0,0,0.4);
694
  border: 1px solid var(--node-border);
695
+ border-radius: 7px; padding: 14px;
696
  flex: 1; overflow-y: auto;
697
+ font-size: 13px; line-height: 1.7;
698
  color: #c8c8e0; white-space: pre-wrap;
699
  user-select: text;
700
  font-family: 'JetBrains Mono', monospace;
 
704
  .ground-canvas-wrap {
705
  position: relative; flex: 1;
706
  border: 1px solid var(--node-border);
707
+ border-radius: 7px; overflow: hidden;
708
  background: #000; min-height: 0;
709
  }
710
  .ground-canvas-wrap canvas {
 
714
  .ground-placeholder {
715
  position: absolute; inset: 0;
716
  display: flex; align-items: center; justify-content: center;
717
+ font-size: 13px; color: var(--muted); text-align: center; padding: 14px;
718
  }
719
 
720
  .loader {
721
+ width: 15px; height: 15px;
722
  border: 2px solid rgba(255,255,255,0.3);
723
  border-top-color: #fff; border-radius: 50%;
724
  animation: spin 0.7s linear infinite;
 
727
  @keyframes spin { to { transform: rotate(360deg); } }
728
 
729
  .status-dot {
730
+ width: 8px; height: 8px; border-radius: 50%;
731
+ background: var(--muted); display: inline-block; margin-right: 8px;
732
  }
733
  .status-dot.active {
734
  background: var(--accent2);
735
+ box-shadow: 0 0 7px var(--accent2);
736
  }
737
 
738
  /* ── Model badges ── */
739
  .model-badge {
740
+ display: inline-block; padding: 3px 10px;
741
+ border-radius: 6px; font-size: 10px; font-weight: 700;
742
+ letter-spacing: 0.07em; text-transform: uppercase;
743
  }
744
+ .model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
745
+ .model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
746
+ .model-badge.qvl { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
747
+ .model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
748
+ .model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
749
+ .model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
750
+ .model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
751
 
752
  .model-info-box {
753
+ border-radius: 8px; padding: 13px;
754
+ font-size: 12px; color: var(--muted); line-height: 1.65;
755
  flex-shrink: 0;
756
  }
757
 
758
+ .canvas-footer { height: 50px; }
759
  </style>
760
  </head>
761
  <body>
 
764
  <span class="logo">MULTIMODAL EDGE</span>
765
  <span class="sep">|</span>
766
  <span class="sub">Node-Based Inference Canvas</span>
767
+ <span class="badge">v2.5 β€” HEPTA MODEL</span>
768
  </div>
769
 
770
  <div id="canvas">
 
776
  </svg>
777
 
778
  <!-- ─── ID 01 : Image Input ─── -->
779
+ <div class="node fixed-height" id="node-img" style="left:55px; top:70px;">
780
  <div class="node-header">
781
  <span><span class="status-dot" id="dot-img"></span>Input Image</span>
782
  <span class="id">ID: 01</span>
 
785
  <div>
786
  <label>Upload Image</label>
787
 
788
+ <!-- Drop zone -->
789
  <div class="file-upload" id="dropZone">
790
+ <svg width="38" height="38" viewBox="0 0 24 24" fill="none"
791
  stroke="#7c6af7" stroke-width="1.5"
792
  stroke-linecap="round" stroke-linejoin="round">
793
  <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
 
798
  <input type="file" id="fileInput" accept="image/*">
799
  </div>
800
 
801
+ <!-- Preview -->
802
  <div class="preview-wrap" id="previewWrap">
803
  <img id="imgPreview" class="img-preview" />
 
804
  <button class="clear-btn" id="clearBtn" title="Remove image">
805
+ <svg width="15" height="15" viewBox="0 0 24 24" fill="none"
806
  stroke="currentColor" stroke-width="2.5"
807
  stroke-linecap="round" stroke-linejoin="round">
808
  <line x1="18" y1="6" x2="6" y2="18"/>
 
812
  </div>
813
 
814
  <!-- Filename chip -->
815
+ <div class="img-chip" id="imgChip" style="margin-top:8px;">
816
  <span class="chip-dot"></span>
817
  <span class="chip-name" id="chipName">β€”</span>
818
  <span class="chip-size" id="chipSize"></span>
 
823
  </div>
824
 
825
  <!-- ─── ID 02 : Model Selector ─── -->
826
+ <div class="node fixed-height" id="node-model" style="left:55px; top:562px;">
827
  <div class="node-header">
828
  <span><span class="status-dot" id="dot-model"></span>Model Selector</span>
829
  <span class="id">ID: 02</span>
 
838
  <option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
839
  <option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
840
  <option value="qwen_unredacted">Qwen3.5-2B-Unredacted-MAX</option>
841
+ <option value="qwen25_vl_3b">Qwen2.5-VL-3B-Instruct</option>
842
  </select>
843
  </div>
844
  <div id="modelInfoBox" class="model-info-box"
 
853
  </div>
854
 
855
  <!-- ─── ID 03 : Task Config ─── -->
856
+ <div class="node fixed-height" id="node-task" style="left:548px; top:70px;">
857
  <div class="port in" id="port-task-in" style="top:50%;transform:translateY(-50%);"></div>
858
  <div class="node-header">
859
  <span><span class="status-dot" id="dot-task"></span>Task Config</span>
 
871
  </div>
872
  <div>
873
  <label>Prompt Directive</label>
874
+ <textarea id="promptInput" rows="6"
875
  placeholder="e.g., Count the total number of boats and describe the environment."></textarea>
876
  </div>
877
  <button class="run-btn" id="runBtn">
 
883
  </div>
884
 
885
  <!-- ─── ID 04 : Output Stream ─── -->
886
+ <div class="node fixed-height" id="node-out" style="left:1042px; top:70px;">
887
  <div class="port in" id="port-out-in" style="top:50%;transform:translateY(-50%);"></div>
888
  <div class="node-header">
889
  <span><span class="status-dot" id="dot-out"></span>Output Stream</span>
 
896
  </div>
897
 
898
  <!-- ─── ID 05 : Grounding Visualiser ─── -->
899
+ <div class="node fixed-height" id="node-gnd" style="left:1042px; top:562px;">
900
  <div class="port in" id="port-gnd-in" style="top:50%;transform:translateY(-50%);"></div>
901
  <div class="node-header">
902
  <span><span class="status-dot" id="dot-gnd"></span>View Grounding</span>
 
940
 
941
  function updateWires() {
942
  const wires = [
943
+ ['wire-img-task', 'port-img-out', 'port-task-in'],
944
+ ['wire-model-task', 'port-model-out', 'port-task-in'],
945
+ ['wire-task-out', 'port-task-out', 'port-out-in'],
946
+ ['wire-task-gnd', 'port-task-out', 'port-gnd-in'],
947
  ];
948
  for (const [id, from, to] of wires) {
949
  const el = document.getElementById(id);
 
984
  // ══════════════════════════════════════════════
985
  let currentFile = null;
986
 
987
+ const dropZone = document.getElementById('dropZone');
988
+ const fileInput = document.getElementById('fileInput');
989
+ const previewWrap = document.getElementById('previewWrap');
990
+ const imgPreview = document.getElementById('imgPreview');
991
+ const clearBtn = document.getElementById('clearBtn');
992
+ const imgChip = document.getElementById('imgChip');
993
+ const chipName = document.getElementById('chipName');
994
+ const chipSize = document.getElementById('chipSize');
995
+ const dotImg = document.getElementById('dot-img');
996
 
997
  function formatBytes(bytes) {
998
  if (bytes < 1024) return bytes + ' B';
999
+ if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB';
1000
+ return (bytes / 1048576).toFixed(1) + ' MB';
1001
  }
1002
 
1003
  function handleFile(file) {
1004
  if (!file || !file.type.startsWith('image/')) return;
1005
  currentFile = file;
 
 
1006
  imgPreview.src = URL.createObjectURL(file);
1007
  previewWrap.classList.add('visible');
1008
  dropZone.style.display = 'none';
 
 
1009
  chipName.textContent = file.name;
1010
  chipSize.textContent = formatBytes(file.size);
1011
  imgChip.classList.add('visible');
 
1012
  dotImg.classList.add('active');
1013
  requestAnimationFrame(updateWires);
1014
  }
1015
 
1016
  function clearImage() {
1017
  currentFile = null;
 
 
1018
  imgPreview.src = '';
1019
  previewWrap.classList.remove('visible');
1020
  dropZone.style.display = '';
 
 
1021
  imgChip.classList.remove('visible');
1022
  chipName.textContent = 'β€”';
1023
  chipSize.textContent = '';
 
 
1024
  fileInput.value = '';
 
1025
  dotImg.classList.remove('active');
1026
  requestAnimationFrame(updateWires);
1027
  }
1028
 
1029
  dropZone.onclick = () => fileInput.click();
1030
  fileInput.onchange = e => handleFile(e.target.files[0]);
1031
+ clearBtn.onclick = e => { e.stopPropagation(); clearImage(); };
 
1032
  dropZone.ondragover = e => { e.preventDefault(); dropZone.style.borderColor = 'var(--accent)'; };
1033
  dropZone.ondragleave = () => { dropZone.style.borderColor = ''; };
1034
  dropZone.ondrop = e => {
 
1087
  bg: 'rgba(255,80,160,0.07)',
1088
  border: 'rgba(255,80,160,0.25)',
1089
  },
1090
+ qwen25_vl_3b: {
1091
+ html: `<span class="model-badge q25vl3b">QWEN 2.5-VL Β· 3B</span><br><br>
1092
+ Qwen2.5-VL-3B-Instruct by Alibaba Cloud. Powerful 3B vision-language model
1093
+ with strong grounding, OCR &amp; multi-task visual reasoning via qwen_vl_utils.`,
1094
+ bg: 'rgba(80,180,255,0.07)',
1095
+ border: 'rgba(80,180,255,0.25)',
1096
+ },
1097
  };
1098
 
1099
  modelSelect.onchange = () => {
1100
  const info = MODEL_INFO[modelSelect.value];
1101
  if (!info) return;
1102
  modelInfoBox.innerHTML = info.html;
1103
+ modelInfoBox.style.background = info.bg;
1104
+ modelInfoBox.style.border = `1px solid ${info.border}`;
1105
  };
1106
 
1107
  // ══════════════════════════════════════════════
 
1177
  groundPlaceholder.style.display = 'none';
1178
 
1179
  const lw = Math.max(2, W/200);
1180
+ const fs = Math.max(13, W/40);
1181
  gCtx.lineWidth = lw;
1182
  gCtx.font = `bold ${fs}px JetBrains Mono, monospace`;
1183
 
 
1207
  gCtx.strokeRect(x1, y1, bw, bh);
1208
 
1209
  const tw = gCtx.measureText(lbl).width;
1210
+ const ph = fs*1.4, pw = tw+12;
1211
  const lx = x1, ly = Math.max(0, y1-ph);
1212
  gCtx.fillStyle = col;
1213
+ roundRect(gCtx, lx, ly, pw, ph, 5); gCtx.fill();
1214
  gCtx.fillStyle = '#fff';
1215
+ gCtx.fillText(lbl, lx+6, ly+ph*0.76);
1216
  return;
1217
  }
1218
 
 
1226
  if (pt) {
1227
  let [x,y] = pt;
1228
  if (x <= 1 && y <= 1) { x*=W; y*=H; }
1229
+ const r = Math.max(9, W/60);
1230
  const lbl = item?.label || `${i+1}`;
1231
 
1232
  gCtx.beginPath();
 
1239
  gCtx.strokeStyle = '#fff'; gCtx.stroke();
1240
 
1241
  gCtx.fillStyle = '#fff';
1242
+ gCtx.fillText(lbl, x+r+5, y+fs*0.4);
1243
  }
1244
  });
1245
  };