Spaces:
Running on Zero
Running on Zero
update app
Browse files
app.py
CHANGED
|
@@ -36,14 +36,15 @@ DTYPE = (
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
| 39 |
-
QWEN_4B_MODEL_NAME
|
| 40 |
-
QWEN_2B_MODEL_NAME
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
|
| 48 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
|
|
@@ -72,21 +73,38 @@ except Exception as e:
|
|
| 72 |
qwen_2b_processor = None
|
| 73 |
|
| 74 |
# ββ Qwen3-VL-2B-Instruct ββββββββββββββββββββββββββββββββ
|
| 75 |
-
print(f"Loading Qwen3-VL model: {
|
| 76 |
try:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
trust_remote_code=True,
|
| 80 |
torch_dtype=torch.bfloat16,
|
| 81 |
).to(DEVICE).eval()
|
| 82 |
-
|
| 83 |
-
|
| 84 |
)
|
| 85 |
-
print("Qwen3-VL model loaded successfully.")
|
| 86 |
except Exception as e:
|
| 87 |
-
print(f"Warning: Qwen3-VL model loading failed. Error: {e}")
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# ββ LFM2.5-VL-450M ββββββββββββββββββββββββββββββββββββββ
|
| 92 |
print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
|
|
@@ -103,6 +121,23 @@ except Exception as e:
|
|
| 103 |
lfm_450_model = None
|
| 104 |
lfm_450_processor = None
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
# ββ LFM2.5-VL-1.6B ββββββββββββββββββββββββββββββββββββββ
|
| 107 |
print(f"Loading LFM-1.6B model: {LFM_16_MODEL_NAME} on {DEVICE}...")
|
| 108 |
try:
|
|
@@ -146,23 +181,6 @@ except Exception as e:
|
|
| 146 |
qwen25_vl_3b_model = None
|
| 147 |
qwen25_vl_3b_processor = None
|
| 148 |
|
| 149 |
-
# ββ Gemma4-E2B-it βββββββββββββββββββββββββββββββββββββββ
|
| 150 |
-
print(f"Loading Gemma4-E2B-it: {GEMMA4_E2B_NAME} on {DEVICE}...")
|
| 151 |
-
try:
|
| 152 |
-
gemma4_e2b_model = Gemma4ForConditionalGeneration.from_pretrained(
|
| 153 |
-
GEMMA4_E2B_NAME,
|
| 154 |
-
torch_dtype=torch.bfloat16,
|
| 155 |
-
device_map="auto" if torch.cuda.is_available() else None,
|
| 156 |
-
).eval()
|
| 157 |
-
if not torch.cuda.is_available():
|
| 158 |
-
gemma4_e2b_model = gemma4_e2b_model.to(DEVICE)
|
| 159 |
-
gemma4_e2b_processor = AutoProcessor.from_pretrained(GEMMA4_E2B_NAME)
|
| 160 |
-
print("Gemma4-E2B-it model loaded successfully.")
|
| 161 |
-
except Exception as e:
|
| 162 |
-
print(f"Warning: Gemma4-E2B-it model loading failed. Error: {e}")
|
| 163 |
-
gemma4_e2b_model = None
|
| 164 |
-
gemma4_e2b_processor = None
|
| 165 |
-
|
| 166 |
|
| 167 |
# --- Utility Functions ---
|
| 168 |
def safe_parse_json(text: str):
|
|
@@ -258,28 +276,59 @@ def generate_inference_stream(
|
|
| 258 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 259 |
thread.join()
|
| 260 |
|
| 261 |
-
# ββ Qwen3-VL βββββββββββββββββββββββββββββββββββββ
|
| 262 |
-
elif model_id == "
|
| 263 |
-
if
|
| 264 |
-
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL model not loaded.'})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
yield "data: [DONE]\n\n"
|
| 266 |
return
|
| 267 |
messages = [{"role": "user", "content": [
|
| 268 |
{"type": "image", "image": image},
|
| 269 |
{"type": "text", "text": full_prompt},
|
| 270 |
]}]
|
| 271 |
-
text_input =
|
| 272 |
messages, tokenize=False, add_generation_prompt=True
|
| 273 |
)
|
| 274 |
-
inputs =
|
| 275 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 276 |
-
).to(
|
| 277 |
streamer = TextIteratorStreamer(
|
| 278 |
-
|
| 279 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 280 |
)
|
| 281 |
thread = threading.Thread(
|
| 282 |
-
target=
|
| 283 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 284 |
use_cache=True, temperature=1.0, do_sample=True),
|
| 285 |
)
|
|
@@ -317,6 +366,40 @@ def generate_inference_stream(
|
|
| 317 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 318 |
thread.join()
|
| 319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
# ββ LFM-1.6B ββββββββββββββββββββββββββββββββββββββββ
|
| 321 |
elif model_id == "lfm_16":
|
| 322 |
if lfm_16_model is None or lfm_16_processor is None:
|
|
@@ -412,40 +495,6 @@ def generate_inference_stream(
|
|
| 412 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 413 |
thread.join()
|
| 414 |
|
| 415 |
-
# ββ Gemma4-E2B-it βββββββββββββββββββββββββββββββββββ
|
| 416 |
-
elif model_id == "gemma4_e2b":
|
| 417 |
-
if gemma4_e2b_model is None or gemma4_e2b_processor is None:
|
| 418 |
-
yield f"data: {json.dumps({'chunk': '[Error] Gemma4-E2B-it model not loaded.'})}\n\n"
|
| 419 |
-
yield "data: [DONE]\n\n"
|
| 420 |
-
return
|
| 421 |
-
messages = [{"role": "user", "content": [
|
| 422 |
-
{"type": "image", "image": image},
|
| 423 |
-
{"type": "text", "text": full_prompt},
|
| 424 |
-
]}]
|
| 425 |
-
text_input = gemma4_e2b_processor.apply_chat_template(
|
| 426 |
-
messages, tokenize=False, add_generation_prompt=True
|
| 427 |
-
)
|
| 428 |
-
inputs = gemma4_e2b_processor(
|
| 429 |
-
text=[text_input],
|
| 430 |
-
images=[image],
|
| 431 |
-
return_tensors="pt",
|
| 432 |
-
padding=True,
|
| 433 |
-
).to(gemma4_e2b_model.device)
|
| 434 |
-
streamer = TextIteratorStreamer(
|
| 435 |
-
gemma4_e2b_processor.tokenizer,
|
| 436 |
-
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 437 |
-
)
|
| 438 |
-
thread = threading.Thread(
|
| 439 |
-
target=gemma4_e2b_model.generate,
|
| 440 |
-
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 441 |
-
use_cache=True, temperature=1.0, do_sample=True),
|
| 442 |
-
)
|
| 443 |
-
thread.start()
|
| 444 |
-
for tok in streamer:
|
| 445 |
-
if tok:
|
| 446 |
-
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 447 |
-
thread.join()
|
| 448 |
-
|
| 449 |
yield "data: [DONE]\n\n"
|
| 450 |
|
| 451 |
|
|
@@ -793,12 +842,13 @@ async def homepage(request: Request):
|
|
| 793 |
}
|
| 794 |
.model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
|
| 795 |
.model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
|
| 796 |
-
.model-badge.
|
|
|
|
| 797 |
.model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
|
|
|
|
| 798 |
.model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
|
| 799 |
.model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
|
| 800 |
.model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
|
| 801 |
-
.model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
|
| 802 |
|
| 803 |
.model-info-box {
|
| 804 |
border-radius: 6px; padding: 9px;
|
|
@@ -815,7 +865,7 @@ async def homepage(request: Request):
|
|
| 815 |
<span class="logo">MULTIMODAL EDGE</span>
|
| 816 |
<span class="sep">|</span>
|
| 817 |
<span class="sub">Node-Based Inference Canvas</span>
|
| 818 |
-
<span class="badge">v2.
|
| 819 |
</div>
|
| 820 |
|
| 821 |
<div id="canvas">
|
|
@@ -835,8 +885,6 @@ async def homepage(request: Request):
|
|
| 835 |
<div class="node-body">
|
| 836 |
<div>
|
| 837 |
<label>Upload Image</label>
|
| 838 |
-
|
| 839 |
-
<!-- Drop zone -->
|
| 840 |
<div class="file-upload" id="dropZone">
|
| 841 |
<svg width="30" height="30" viewBox="0 0 24 24" fill="none"
|
| 842 |
stroke="#7c6af7" stroke-width="1.5"
|
|
@@ -848,8 +896,6 @@ async def homepage(request: Request):
|
|
| 848 |
<span>Click or drop image here</span>
|
| 849 |
<input type="file" id="fileInput" accept="image/*">
|
| 850 |
</div>
|
| 851 |
-
|
| 852 |
-
<!-- Preview -->
|
| 853 |
<div class="preview-wrap" id="previewWrap">
|
| 854 |
<img id="imgPreview" class="img-preview" />
|
| 855 |
<button class="clear-btn" id="clearBtn" title="Remove image">
|
|
@@ -861,8 +907,6 @@ async def homepage(request: Request):
|
|
| 861 |
</svg>
|
| 862 |
</button>
|
| 863 |
</div>
|
| 864 |
-
|
| 865 |
-
<!-- Filename chip -->
|
| 866 |
<div class="img-chip" id="imgChip" style="margin-top:6px;">
|
| 867 |
<span class="chip-dot"></span>
|
| 868 |
<span class="chip-name" id="chipName">β</span>
|
|
@@ -885,12 +929,13 @@ async def homepage(request: Request):
|
|
| 885 |
<select id="modelSelect">
|
| 886 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 887 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
| 888 |
-
<option value="
|
|
|
|
| 889 |
<option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
|
|
|
|
| 890 |
<option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
|
| 891 |
<option value="qwen_unredacted">Qwen3.5-2B-Unredacted-MAX</option>
|
| 892 |
<option value="qwen25_vl_3b">Qwen2.5-VL-3B-Instruct</option>
|
| 893 |
-
<option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
|
| 894 |
</select>
|
| 895 |
</div>
|
| 896 |
<div id="modelInfoBox" class="model-info-box"
|
|
@@ -1111,13 +1156,20 @@ const MODEL_INFO = {
|
|
| 1111 |
bg: 'rgba(124,106,247,0.07)',
|
| 1112 |
border: 'rgba(124,106,247,0.25)',
|
| 1113 |
},
|
| 1114 |
-
|
| 1115 |
-
html: `<span class="model-badge
|
| 1116 |
Qwen3-VL-2B-Instruct β dedicated vision-language model by Alibaba Cloud.
|
| 1117 |
Strong spatial grounding, OCR & instruction-following.`,
|
| 1118 |
bg: 'rgba(255,150,50,0.07)',
|
| 1119 |
border: 'rgba(255,150,50,0.25)',
|
| 1120 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1121 |
lfm_450: {
|
| 1122 |
html: `<span class="model-badge lfm450">LFM Β· 450M</span><br><br>
|
| 1123 |
LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
|
|
@@ -1125,6 +1177,13 @@ const MODEL_INFO = {
|
|
| 1125 |
bg: 'rgba(78,205,196,0.07)',
|
| 1126 |
border: 'rgba(78,205,196,0.25)',
|
| 1127 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1128 |
lfm_16: {
|
| 1129 |
html: `<span class="model-badge lfm16">LFM Β· 1.6B</span><br><br>
|
| 1130 |
LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
|
|
@@ -1146,13 +1205,6 @@ const MODEL_INFO = {
|
|
| 1146 |
bg: 'rgba(80,180,255,0.07)',
|
| 1147 |
border: 'rgba(80,180,255,0.25)',
|
| 1148 |
},
|
| 1149 |
-
gemma4_e2b: {
|
| 1150 |
-
html: `<span class="model-badge g4e2b">GEMMA 4 Β· E2B</span><br><br>
|
| 1151 |
-
Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
|
| 1152 |
-
with strong vision-language understanding & instruction-following.`,
|
| 1153 |
-
bg: 'rgba(66,197,107,0.07)',
|
| 1154 |
-
border: 'rgba(66,197,107,0.25)',
|
| 1155 |
-
},
|
| 1156 |
};
|
| 1157 |
|
| 1158 |
modelSelect.onchange = () => {
|
|
|
|
| 36 |
else torch.float16
|
| 37 |
)
|
| 38 |
|
| 39 |
+
QWEN_4B_MODEL_NAME = "Qwen/Qwen3.5-4B"
|
| 40 |
+
QWEN_2B_MODEL_NAME = "Qwen/Qwen3.5-2B"
|
| 41 |
+
QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
|
| 42 |
+
QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
|
| 43 |
+
LFM_450_MODEL_NAME = "LiquidAI/LFM2.5-VL-450M"
|
| 44 |
+
GEMMA4_E2B_NAME = "google/gemma-4-E2B-it"
|
| 45 |
+
LFM_16_MODEL_NAME = "LiquidAI/LFM2.5-VL-1.6B"
|
| 46 |
+
QWEN_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
|
| 47 |
+
QWEN25_VL_3B_NAME = "Qwen/Qwen2.5-VL-3B-Instruct"
|
| 48 |
|
| 49 |
# ββ Qwen3.5-4B ββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
|
|
|
|
| 73 |
qwen_2b_processor = None
|
| 74 |
|
| 75 |
# ββ Qwen3-VL-2B-Instruct ββββββββββββββββββββββββββββββββ
|
| 76 |
+
print(f"Loading Qwen3-VL-2B model: {QWEN_VL_2B_MODEL_NAME} on {DEVICE}...")
|
| 77 |
try:
|
| 78 |
+
qwen_vl_2b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 79 |
+
QWEN_VL_2B_MODEL_NAME,
|
| 80 |
trust_remote_code=True,
|
| 81 |
torch_dtype=torch.bfloat16,
|
| 82 |
).to(DEVICE).eval()
|
| 83 |
+
qwen_vl_2b_processor = AutoProcessor.from_pretrained(
|
| 84 |
+
QWEN_VL_2B_MODEL_NAME, trust_remote_code=True
|
| 85 |
)
|
| 86 |
+
print("Qwen3-VL-2B model loaded successfully.")
|
| 87 |
except Exception as e:
|
| 88 |
+
print(f"Warning: Qwen3-VL-2B model loading failed. Error: {e}")
|
| 89 |
+
qwen_vl_2b_model = None
|
| 90 |
+
qwen_vl_2b_processor = None
|
| 91 |
+
|
| 92 |
+
# ββ Qwen3-VL-4B-Instruct ββββββββββββββββββββββββββββββββ
|
| 93 |
+
print(f"Loading Qwen3-VL-4B model: {QWEN_VL_4B_MODEL_NAME} on {DEVICE}...")
|
| 94 |
+
try:
|
| 95 |
+
qwen_vl_4b_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 96 |
+
QWEN_VL_4B_MODEL_NAME,
|
| 97 |
+
trust_remote_code=True,
|
| 98 |
+
torch_dtype=torch.bfloat16,
|
| 99 |
+
).to(DEVICE).eval()
|
| 100 |
+
qwen_vl_4b_processor = AutoProcessor.from_pretrained(
|
| 101 |
+
QWEN_VL_4B_MODEL_NAME, trust_remote_code=True
|
| 102 |
+
)
|
| 103 |
+
print("Qwen3-VL-4B model loaded successfully.")
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Warning: Qwen3-VL-4B model loading failed. Error: {e}")
|
| 106 |
+
qwen_vl_4b_model = None
|
| 107 |
+
qwen_vl_4b_processor = None
|
| 108 |
|
| 109 |
# ββ LFM2.5-VL-450M ββββββββββββββββββββββββββββββββββββββ
|
| 110 |
print(f"Loading LFM-450M model: {LFM_450_MODEL_NAME} on {DEVICE}...")
|
|
|
|
| 121 |
lfm_450_model = None
|
| 122 |
lfm_450_processor = None
|
| 123 |
|
| 124 |
+
# ββ Gemma4-E2B-it βββββββββββββββββββββββββββββββββββββββ
|
| 125 |
+
print(f"Loading Gemma4-E2B-it: {GEMMA4_E2B_NAME} on {DEVICE}...")
|
| 126 |
+
try:
|
| 127 |
+
gemma4_e2b_model = Gemma4ForConditionalGeneration.from_pretrained(
|
| 128 |
+
GEMMA4_E2B_NAME,
|
| 129 |
+
torch_dtype=torch.bfloat16,
|
| 130 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 131 |
+
).eval()
|
| 132 |
+
if not torch.cuda.is_available():
|
| 133 |
+
gemma4_e2b_model = gemma4_e2b_model.to(DEVICE)
|
| 134 |
+
gemma4_e2b_processor = AutoProcessor.from_pretrained(GEMMA4_E2B_NAME)
|
| 135 |
+
print("Gemma4-E2B-it model loaded successfully.")
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"Warning: Gemma4-E2B-it model loading failed. Error: {e}")
|
| 138 |
+
gemma4_e2b_model = None
|
| 139 |
+
gemma4_e2b_processor = None
|
| 140 |
+
|
| 141 |
# ββ LFM2.5-VL-1.6B ββββββββββββββββββββββββββββββββββββββ
|
| 142 |
print(f"Loading LFM-1.6B model: {LFM_16_MODEL_NAME} on {DEVICE}...")
|
| 143 |
try:
|
|
|
|
| 181 |
qwen25_vl_3b_model = None
|
| 182 |
qwen25_vl_3b_processor = None
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
# --- Utility Functions ---
|
| 186 |
def safe_parse_json(text: str):
|
|
|
|
| 276 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 277 |
thread.join()
|
| 278 |
|
| 279 |
+
# ββ Qwen3-VL-2B βββββββββββββββββββββββββββββββββββββ
|
| 280 |
+
elif model_id == "qwen_vl_2b":
|
| 281 |
+
if qwen_vl_2b_model is None or qwen_vl_2b_processor is None:
|
| 282 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-2B model not loaded.'})}\n\n"
|
| 283 |
+
yield "data: [DONE]\n\n"
|
| 284 |
+
return
|
| 285 |
+
messages = [{"role": "user", "content": [
|
| 286 |
+
{"type": "image", "image": image},
|
| 287 |
+
{"type": "text", "text": full_prompt},
|
| 288 |
+
]}]
|
| 289 |
+
text_input = qwen_vl_2b_processor.apply_chat_template(
|
| 290 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 291 |
+
)
|
| 292 |
+
inputs = qwen_vl_2b_processor(
|
| 293 |
+
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 294 |
+
).to(qwen_vl_2b_model.device)
|
| 295 |
+
streamer = TextIteratorStreamer(
|
| 296 |
+
qwen_vl_2b_processor.tokenizer,
|
| 297 |
+
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 298 |
+
)
|
| 299 |
+
thread = threading.Thread(
|
| 300 |
+
target=qwen_vl_2b_model.generate,
|
| 301 |
+
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 302 |
+
use_cache=True, temperature=1.0, do_sample=True),
|
| 303 |
+
)
|
| 304 |
+
thread.start()
|
| 305 |
+
for tok in streamer:
|
| 306 |
+
if tok:
|
| 307 |
+
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 308 |
+
thread.join()
|
| 309 |
+
|
| 310 |
+
# ββ Qwen3-VL-4B βββββββββββββββββββββββββββββββββββββ
|
| 311 |
+
elif model_id == "qwen_vl_4b":
|
| 312 |
+
if qwen_vl_4b_model is None or qwen_vl_4b_processor is None:
|
| 313 |
+
yield f"data: {json.dumps({'chunk': '[Error] Qwen3-VL-4B model not loaded.'})}\n\n"
|
| 314 |
yield "data: [DONE]\n\n"
|
| 315 |
return
|
| 316 |
messages = [{"role": "user", "content": [
|
| 317 |
{"type": "image", "image": image},
|
| 318 |
{"type": "text", "text": full_prompt},
|
| 319 |
]}]
|
| 320 |
+
text_input = qwen_vl_4b_processor.apply_chat_template(
|
| 321 |
messages, tokenize=False, add_generation_prompt=True
|
| 322 |
)
|
| 323 |
+
inputs = qwen_vl_4b_processor(
|
| 324 |
text=[text_input], images=[image], return_tensors="pt", padding=True
|
| 325 |
+
).to(qwen_vl_4b_model.device)
|
| 326 |
streamer = TextIteratorStreamer(
|
| 327 |
+
qwen_vl_4b_processor.tokenizer,
|
| 328 |
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 329 |
)
|
| 330 |
thread = threading.Thread(
|
| 331 |
+
target=qwen_vl_4b_model.generate,
|
| 332 |
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 333 |
use_cache=True, temperature=1.0, do_sample=True),
|
| 334 |
)
|
|
|
|
| 366 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 367 |
thread.join()
|
| 368 |
|
| 369 |
+
# ββ Gemma4-E2B-it βββββββββββββββββββββββββββββββββββ
|
| 370 |
+
elif model_id == "gemma4_e2b":
|
| 371 |
+
if gemma4_e2b_model is None or gemma4_e2b_processor is None:
|
| 372 |
+
yield f"data: {json.dumps({'chunk': '[Error] Gemma4-E2B-it model not loaded.'})}\n\n"
|
| 373 |
+
yield "data: [DONE]\n\n"
|
| 374 |
+
return
|
| 375 |
+
messages = [{"role": "user", "content": [
|
| 376 |
+
{"type": "image", "image": image},
|
| 377 |
+
{"type": "text", "text": full_prompt},
|
| 378 |
+
]}]
|
| 379 |
+
text_input = gemma4_e2b_processor.apply_chat_template(
|
| 380 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 381 |
+
)
|
| 382 |
+
inputs = gemma4_e2b_processor(
|
| 383 |
+
text=[text_input],
|
| 384 |
+
images=[image],
|
| 385 |
+
return_tensors="pt",
|
| 386 |
+
padding=True,
|
| 387 |
+
).to(gemma4_e2b_model.device)
|
| 388 |
+
streamer = TextIteratorStreamer(
|
| 389 |
+
gemma4_e2b_processor.tokenizer,
|
| 390 |
+
skip_prompt=True, skip_special_tokens=True, timeout=120,
|
| 391 |
+
)
|
| 392 |
+
thread = threading.Thread(
|
| 393 |
+
target=gemma4_e2b_model.generate,
|
| 394 |
+
kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
|
| 395 |
+
use_cache=True, temperature=1.0, do_sample=True),
|
| 396 |
+
)
|
| 397 |
+
thread.start()
|
| 398 |
+
for tok in streamer:
|
| 399 |
+
if tok:
|
| 400 |
+
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 401 |
+
thread.join()
|
| 402 |
+
|
| 403 |
# ββ LFM-1.6B ββββββββββββββββββββββββββββββββββββββββ
|
| 404 |
elif model_id == "lfm_16":
|
| 405 |
if lfm_16_model is None or lfm_16_processor is None:
|
|
|
|
| 495 |
yield f"data: {json.dumps({'chunk': tok})}\n\n"
|
| 496 |
thread.join()
|
| 497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
yield "data: [DONE]\n\n"
|
| 499 |
|
| 500 |
|
|
|
|
| 842 |
}
|
| 843 |
.model-badge.q4b { background: rgba(255,200,80,0.15); color: #ffc850; border: 1px solid rgba(255,200,80,0.35); }
|
| 844 |
.model-badge.q2b { background: rgba(124,106,247,0.2); color: var(--accent); border: 1px solid rgba(124,106,247,0.3); }
|
| 845 |
+
.model-badge.qvl2b { background: rgba(255,150,50,0.15); color: #ff9632; border: 1px solid rgba(255,150,50,0.35); }
|
| 846 |
+
.model-badge.qvl4b { background: rgba(255,100,80,0.15); color: #ff6450; border: 1px solid rgba(255,100,80,0.35); }
|
| 847 |
.model-badge.lfm450 { background: rgba(78,205,196,0.15); color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
|
| 848 |
+
.model-badge.g4e2b { background: rgba(66,197,107,0.15); color: #42c56b; border: 1px solid rgba(66,197,107,0.35); }
|
| 849 |
.model-badge.lfm16 { background: rgba(107,203,119,0.15); color: #6bcb77; border: 1px solid rgba(107,203,119,0.35); }
|
| 850 |
.model-badge.qunred { background: rgba(255,80,160,0.15); color: #ff50a0; border: 1px solid rgba(255,80,160,0.35); }
|
| 851 |
.model-badge.q25vl3b { background: rgba(80,180,255,0.15); color: #50b4ff; border: 1px solid rgba(80,180,255,0.35); }
|
|
|
|
| 852 |
|
| 853 |
.model-info-box {
|
| 854 |
border-radius: 6px; padding: 9px;
|
|
|
|
| 865 |
<span class="logo">MULTIMODAL EDGE</span>
|
| 866 |
<span class="sep">|</span>
|
| 867 |
<span class="sub">Node-Based Inference Canvas</span>
|
| 868 |
+
<span class="badge">v2.7 β NONA MODEL</span>
|
| 869 |
</div>
|
| 870 |
|
| 871 |
<div id="canvas">
|
|
|
|
| 885 |
<div class="node-body">
|
| 886 |
<div>
|
| 887 |
<label>Upload Image</label>
|
|
|
|
|
|
|
| 888 |
<div class="file-upload" id="dropZone">
|
| 889 |
<svg width="30" height="30" viewBox="0 0 24 24" fill="none"
|
| 890 |
stroke="#7c6af7" stroke-width="1.5"
|
|
|
|
| 896 |
<span>Click or drop image here</span>
|
| 897 |
<input type="file" id="fileInput" accept="image/*">
|
| 898 |
</div>
|
|
|
|
|
|
|
| 899 |
<div class="preview-wrap" id="previewWrap">
|
| 900 |
<img id="imgPreview" class="img-preview" />
|
| 901 |
<button class="clear-btn" id="clearBtn" title="Remove image">
|
|
|
|
| 907 |
</svg>
|
| 908 |
</button>
|
| 909 |
</div>
|
|
|
|
|
|
|
| 910 |
<div class="img-chip" id="imgChip" style="margin-top:6px;">
|
| 911 |
<span class="chip-dot"></span>
|
| 912 |
<span class="chip-name" id="chipName">β</span>
|
|
|
|
| 929 |
<select id="modelSelect">
|
| 930 |
<option value="qwen_4b">Qwen3.5-4B</option>
|
| 931 |
<option value="qwen_2b">Qwen3.5-2B</option>
|
| 932 |
+
<option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
|
| 933 |
+
<option value="qwen_vl_4b">Qwen3-VL-4B-Instruct</option>
|
| 934 |
<option value="lfm_450">LFM2.5-VL-450M (LiquidAI)</option>
|
| 935 |
+
<option value="gemma4_e2b">Gemma4-E2B-it (Google)</option>
|
| 936 |
<option value="lfm_16">LFM2.5-VL-1.6B (LiquidAI)</option>
|
| 937 |
<option value="qwen_unredacted">Qwen3.5-2B-Unredacted-MAX</option>
|
| 938 |
<option value="qwen25_vl_3b">Qwen2.5-VL-3B-Instruct</option>
|
|
|
|
| 939 |
</select>
|
| 940 |
</div>
|
| 941 |
<div id="modelInfoBox" class="model-info-box"
|
|
|
|
| 1156 |
bg: 'rgba(124,106,247,0.07)',
|
| 1157 |
border: 'rgba(124,106,247,0.25)',
|
| 1158 |
},
|
| 1159 |
+
qwen_vl_2b: {
|
| 1160 |
+
html: `<span class="model-badge qvl2b">QWEN3-VL Β· 2B</span><br><br>
|
| 1161 |
Qwen3-VL-2B-Instruct β dedicated vision-language model by Alibaba Cloud.
|
| 1162 |
Strong spatial grounding, OCR & instruction-following.`,
|
| 1163 |
bg: 'rgba(255,150,50,0.07)',
|
| 1164 |
border: 'rgba(255,150,50,0.25)',
|
| 1165 |
},
|
| 1166 |
+
qwen_vl_4b: {
|
| 1167 |
+
html: `<span class="model-badge qvl4b">QWEN3-VL Β· 4B</span><br><br>
|
| 1168 |
+
Qwen3-VL-4B-Instruct β enhanced vision-language model by Alibaba Cloud.
|
| 1169 |
+
Superior spatial grounding, richer OCR & stronger multi-step reasoning.`,
|
| 1170 |
+
bg: 'rgba(255,100,80,0.07)',
|
| 1171 |
+
border: 'rgba(255,100,80,0.25)',
|
| 1172 |
+
},
|
| 1173 |
lfm_450: {
|
| 1174 |
html: `<span class="model-badge lfm450">LFM Β· 450M</span><br><br>
|
| 1175 |
LFM2.5-VL 450M by LiquidAI. Ultra-lightweight edge model
|
|
|
|
| 1177 |
bg: 'rgba(78,205,196,0.07)',
|
| 1178 |
border: 'rgba(78,205,196,0.25)',
|
| 1179 |
},
|
| 1180 |
+
gemma4_e2b: {
|
| 1181 |
+
html: `<span class="model-badge g4e2b">GEMMA 4 Β· E2B</span><br><br>
|
| 1182 |
+
Gemma4-E2B-it by Google DeepMind. Efficient 2B multimodal model
|
| 1183 |
+
with strong vision-language understanding & instruction-following.`,
|
| 1184 |
+
bg: 'rgba(66,197,107,0.07)',
|
| 1185 |
+
border: 'rgba(66,197,107,0.25)',
|
| 1186 |
+
},
|
| 1187 |
lfm_16: {
|
| 1188 |
html: `<span class="model-badge lfm16">LFM Β· 1.6B</span><br><br>
|
| 1189 |
LFM2.5-VL 1.6B by LiquidAI. Larger liquid-state model offering
|
|
|
|
| 1205 |
bg: 'rgba(80,180,255,0.07)',
|
| 1206 |
border: 'rgba(80,180,255,0.25)',
|
| 1207 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1208 |
};
|
| 1209 |
|
| 1210 |
modelSelect.onchange = () => {
|