Spaces:

prithivMLmods
/

Multimodal-Edge-Node

Running on Zero

App Files Files Community

prithivMLmods commited on May 1

Commit

237d494

verified ·

1 Parent(s): 8fea3b1

update app

Browse files

Files changed (1) hide show

app.py +90 -55

app.py CHANGED Viewed

@@ -36,15 +36,29 @@ DTYPE = (
     else torch.float16
 )
-QWEN_4B_MODEL_NAME    = "Qwen/Qwen3.5-4B"
-QWEN_2B_MODEL_NAME    = "Qwen/Qwen3.5-2B"
-QWEN_VL_2B_MODEL_NAME = "Qwen/Qwen3-VL-2B-Instruct"
-QWEN_VL_4B_MODEL_NAME = "Qwen/Qwen3-VL-4B-Instruct"
-LFM_450_MODEL_NAME    = "LiquidAI/LFM2.5-VL-450M"
-GEMMA4_E2B_NAME       = "google/gemma-4-E2B-it"
-LFM_16_MODEL_NAME     = "LiquidAI/LFM2.5-VL-1.6B"
-QWEN_UNREDACTED_NAME  = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
-QWEN25_VL_3B_NAME     = "Qwen/Qwen2.5-VL-3B-Instruct"
 # ── Qwen3.5-4B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
@@ -201,7 +215,7 @@ def safe_parse_json(text: str):
 # --- Inference Generator (Streaming) ---
 @spaces.GPU(duration=120)
 def generate_inference_stream(
-    image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b"
 ):
     if category == "Query":
         full_prompt = prompt
@@ -214,8 +228,39 @@ def generate_inference_stream(
     else:
         full_prompt = prompt
     # ── Qwen3.5-4B ──────────────────────────────────────
-    if model_id == "qwen_4b":
         if qwen_4b_model is None or qwen_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
@@ -504,7 +549,7 @@ async def run_inference(
     image:    UploadFile = File(...),
     category: str        = Form(...),
     prompt:   str        = Form(...),
-    model_id: str        = Form("qwen_4b"),
 ):
     try:
         img_bytes = await image.read()
@@ -882,15 +927,16 @@ async def homepage(request: Request):
             border-radius: 4px; font-size: 9px; font-weight: 700;
             letter-spacing: 0.06em; text-transform: uppercase;
         }
-        .model-badge.q4b     { background: rgba(255,200,80,0.15);  color: #ffc850;        border: 1px solid rgba(255,200,80,0.35); }
-        .model-badge.q2b     { background: rgba(124,106,247,0.2);  color: var(--accent);  border: 1px solid rgba(124,106,247,0.3); }
-        .model-badge.qvl2b   { background: rgba(255,150,50,0.15);  color: #ff9632;        border: 1px solid rgba(255,150,50,0.35); }
-        .model-badge.qvl4b   { background: rgba(255,100,80,0.15);  color: #ff6450;        border: 1px solid rgba(255,100,80,0.35); }
-        .model-badge.lfm450  { background: rgba(78,205,196,0.15);  color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
-        .model-badge.g4e2b   { background: rgba(66,197,107,0.15);  color: #42c56b;        border: 1px solid rgba(66,197,107,0.35); }
-        .model-badge.lfm16   { background: rgba(107,203,119,0.15); color: #6bcb77;        border: 1px solid rgba(107,203,119,0.35); }
-        .model-badge.qunred  { background: rgba(255,80,160,0.15);  color: #ff50a0;        border: 1px solid rgba(255,80,160,0.35); }
-        .model-badge.q25vl3b { background: rgba(80,180,255,0.15);  color: #50b4ff;        border: 1px solid rgba(80,180,255,0.35); }
         .model-info-box {
             border-radius: 6px; padding: 9px;
@@ -907,7 +953,7 @@ async def homepage(request: Request):
     <span class="logo">MULTIMODAL EDGE</span>
     <span class="sep">|</span>
     <span class="sub">Node-Based Inference Canvas</span>
-    <span class="badge">v2.7 — NONA MODEL</span>
 </div>
 <div id="canvas">
@@ -969,6 +1015,7 @@ async def homepage(request: Request):
             <div>
                 <label>Active Model</label>
                 <select id="modelSelect">
                     <option value="qwen_4b">Qwen3.5-4B</option>
                     <option value="qwen_2b">Qwen3.5-2B</option>
                     <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
@@ -981,10 +1028,10 @@ async def homepage(request: Request):
                 </select>
             </div>
             <div id="modelInfoBox" class="model-info-box"
-                 style="background:rgba(255,200,80,0.07);border:1px solid rgba(255,200,80,0.3);">
-                <span class="model-badge q4b">QWEN 3.5 · 4B</span><br><br>
-                Qwen3.5 4B multimodal model by Alibaba Cloud.
-                Enhanced capacity over 2B — richer reasoning, better instruction following.
             </div>
             <div style="flex:1;"></div>
         </div>
@@ -1195,6 +1242,13 @@ const dotModel     = document.getElementById('dot-model');
 dotModel.classList.add('active');
 const MODEL_INFO = {
     qwen_4b: {
         html:   `<span class="model-badge q4b">QWEN 3.5 · 4B</span><br><br>
                  Qwen3.5 4B multimodal model by Alibaba Cloud.
@@ -1285,22 +1339,21 @@ categorySelect.onchange = e => {
 // ══════════════════════════════════════════════
 //  ROBUST JSON EXTRACTOR
-//  Strips <think>…</think> blocks, then pulls
 //  the first JSON array or object from the text.
 // ══════════════════════════════════════════════
 function extractGroundingJSON(raw) {
-    // 1. Remove <think>…</think> blocks (including nested content)
     let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, '');
-    // 2. Strip markdown code fences  ```json … ```  or  ``` … ```
     text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '');
     text = text.trim();
-    // 3. Try to find a JSON array first  [ … ]
     const arrIdx = text.indexOf('[');
     if (arrIdx !== -1) {
-        // Walk forward to find the matching closing bracket
         let depth = 0, inStr = false, esc = false;
         for (let i = arrIdx; i < text.length; i++) {
             const c = text[i];
@@ -1318,7 +1371,7 @@ function extractGroundingJSON(raw) {
         }
     }
-    // 4. Try to find a JSON object  { … }
     const objIdx = text.indexOf('{');
     if (objIdx !== -1) {
         let depth = 0, inStr = false, esc = false;
@@ -1338,9 +1391,8 @@ function extractGroundingJSON(raw) {
         }
     }
-    // 5. Last resort — try parsing the whole cleaned text
     try { return JSON.parse(text); } catch(_) {}
     return null;
 }
@@ -1374,7 +1426,6 @@ function roundRect(ctx, x, y, w, h, r) {
 }
 function drawGrounding(imgSrc, rawText) {
-    // ── Extract JSON from raw model output (handles <think> blocks etc.) ──
     const parsed = extractGroundingJSON(rawText);
     if (!parsed) {
         console.warn('Grounding: could not extract JSON from output:', rawText);
@@ -1394,14 +1445,12 @@ function drawGrounding(imgSrc, rawText) {
         gCtx.lineWidth = lw;
         gCtx.font      = `bold ${fs}px JetBrains Mono, monospace`;
-        // Normalise to array
         const items = Array.isArray(parsed) ? parsed : [parsed];
         items.forEach((item, i) => {
             const col = PALETTE[i % PALETTE.length];
-            // ── Detect: bounding box ─────────────────────────
-            // Accept bbox_2d, bbox, or a raw 4-number array
             let bbox = null;
             if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
                 bbox = item.bbox_2d;
@@ -1413,23 +1462,17 @@ function drawGrounding(imgSrc, rawText) {
             if (bbox) {
                 let [x1, y1, x2, y2] = bbox.map(Number);
-                // Normalised 0-1 coords → pixel coords
                 if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
                     x1 *= W; y1 *= H; x2 *= W; y2 *= H;
                 }
-                const bw  = x2 - x1;
-                const bh  = y2 - y1;
                 const lbl = item?.label ?? `obj ${i + 1}`;
-                // Filled rect + stroke
                 gCtx.fillStyle   = hexToRgba(col, 0.18);
                 gCtx.fillRect(x1, y1, bw, bh);
                 gCtx.strokeStyle = col;
                 gCtx.strokeRect(x1, y1, bw, bh);
-                // Label pill above the box
                 const tw = gCtx.measureText(lbl).width;
                 const ph = fs * 1.4, pw = tw + 10;
                 const lx = x1, ly = Math.max(0, y1 - ph);
@@ -1441,8 +1484,7 @@ function drawGrounding(imgSrc, rawText) {
                 return;
             }
-            // ── Point: 2-D coordinate ────────────────────────
-            // Accept point_2d, point, or a raw 2-number array
             let pt = null;
             if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
                 pt = item.point_2d;
@@ -1454,20 +1496,15 @@ function drawGrounding(imgSrc, rawText) {
             if (pt) {
                 let [x, y] = pt.map(Number);
-                // Normalised 0-1 coords → pixel coords
                 if (x <= 1 && y <= 1) { x *= W; y *= H; }
                 const r   = Math.max(8, W / 60);
                 const lbl = item?.label ?? `pt ${i + 1}`;
-                // Outer glow ring
                 gCtx.beginPath();
                 gCtx.arc(x, y, r * 1.7, 0, Math.PI * 2);
                 gCtx.fillStyle = hexToRgba(col, 0.15);
                 gCtx.fill();
-                // Solid dot
                 gCtx.beginPath();
                 gCtx.arc(x, y, r, 0, Math.PI * 2);
                 gCtx.fillStyle   = col;
@@ -1475,7 +1512,6 @@ function drawGrounding(imgSrc, rawText) {
                 gCtx.strokeStyle = '#fff';
                 gCtx.stroke();
-                // Label to the right of the dot
                 gCtx.fillStyle = '#fff';
                 gCtx.fillText(lbl, x + r + 4, y + fs * 0.4);
             }
@@ -1514,7 +1550,6 @@ copyBtn.onclick = () => {
                 </svg> COPY`;
         }, 2000);
     }).catch(() => {
-        // Fallback for older browsers
         const ta = document.createElement('textarea');
         ta.value = txt;
         ta.style.position = 'fixed'; ta.style.opacity = '0';
@@ -1603,7 +1638,7 @@ runBtn.onclick = async () => {
         dotOut.classList.add('active');
-        // ── Attempt grounding overlay for Point / Detect ──
         const cat = categorySelect.value;
         if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
             const parsed = extractGroundingJSON(fullText);

     else torch.float16
 )
+QWEN_4B_UNREDACTED_NAME = "prithivMLmods/Qwen3.5-4B-Unredacted-MAX"
+QWEN_4B_MODEL_NAME      = "Qwen/Qwen3.5-4B"
+QWEN_2B_MODEL_NAME      = "Qwen/Qwen3.5-2B"
+QWEN_VL_2B_MODEL_NAME   = "Qwen/Qwen3-VL-2B-Instruct"
+QWEN_VL_4B_MODEL_NAME   = "Qwen/Qwen3-VL-4B-Instruct"
+LFM_450_MODEL_NAME      = "LiquidAI/LFM2.5-VL-450M"
+GEMMA4_E2B_NAME         = "google/gemma-4-E2B-it"
+LFM_16_MODEL_NAME       = "LiquidAI/LFM2.5-VL-1.6B"
+QWEN_UNREDACTED_NAME    = "prithivMLmods/Qwen3.5-2B-Unredacted-MAX"
+QWEN25_VL_3B_NAME       = "Qwen/Qwen2.5-VL-3B-Instruct"
+# ── Qwen3.5-4B-Unredacted-MAX ───────────────────────────
+print(f"Loading Qwen3.5-4B-Unredacted-MAX: {QWEN_4B_UNREDACTED_NAME} on {DEVICE}...")
+try:
+    qwen_4b_unredacted_model = Qwen3_5ForConditionalGeneration.from_pretrained(
+        QWEN_4B_UNREDACTED_NAME, torch_dtype=DTYPE, device_map=DEVICE,
+    ).eval()
+    qwen_4b_unredacted_processor = AutoProcessor.from_pretrained(QWEN_4B_UNREDACTED_NAME)
+    print("Qwen3.5-4B-Unredacted-MAX model loaded successfully.")
+except Exception as e:
+    print(f"Warning: Qwen3.5-4B-Unredacted-MAX model loading failed. Error: {e}")
+    qwen_4b_unredacted_model = None
+    qwen_4b_unredacted_processor = None
 # ── Qwen3.5-4B ──────────────────────────────────────────
 print(f"Loading Qwen3.5-4B model: {QWEN_4B_MODEL_NAME} on {DEVICE}...")
 # --- Inference Generator (Streaming) ---
 @spaces.GPU(duration=120)
 def generate_inference_stream(
+    image: Image.Image, category: str, prompt: str, model_id: str = "qwen_4b_unredacted"
 ):
     if category == "Query":
         full_prompt = prompt
     else:
         full_prompt = prompt
+    # ── Qwen3.5-4B-Unredacted-MAX ───────────────────────
+    if model_id == "qwen_4b_unredacted":
+        if qwen_4b_unredacted_model is None or qwen_4b_unredacted_processor is None:
+            yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B-Unredacted-MAX model not loaded.'})}\n\n"
+            yield "data: [DONE]\n\n"
+            return
+        messages = [{"role": "user", "content": [
+            {"type": "image", "image": image},
+            {"type": "text",  "text":  full_prompt},
+        ]}]
+        text_input = qwen_4b_unredacted_processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        inputs = qwen_4b_unredacted_processor(
+            text=[text_input], images=[image], return_tensors="pt", padding=True
+        ).to(qwen_4b_unredacted_model.device)
+        streamer = TextIteratorStreamer(
+            qwen_4b_unredacted_processor.tokenizer,
+            skip_prompt=True, skip_special_tokens=True, timeout=120,
+        )
+        thread = threading.Thread(
+            target=qwen_4b_unredacted_model.generate,
+            kwargs=dict(**inputs, streamer=streamer, max_new_tokens=1024,
+                        use_cache=True, temperature=1.5, min_p=0.1),
+        )
+        thread.start()
+        for tok in streamer:
+            if tok:
+                yield f"data: {json.dumps({'chunk': tok})}\n\n"
+        thread.join()
     # ── Qwen3.5-4B ──────────────────────────────────────
+    elif model_id == "qwen_4b":
         if qwen_4b_model is None or qwen_4b_processor is None:
             yield f"data: {json.dumps({'chunk': '[Error] Qwen3.5-4B model not loaded.'})}\n\n"
             yield "data: [DONE]\n\n"
     image:    UploadFile = File(...),
     category: str        = Form(...),
     prompt:   str        = Form(...),
+    model_id: str        = Form("qwen_4b_unredacted"),
 ):
     try:
         img_bytes = await image.read()
             border-radius: 4px; font-size: 9px; font-weight: 700;
             letter-spacing: 0.06em; text-transform: uppercase;
         }
+        .model-badge.q4bunred { background: rgba(255,80,80,0.18);   color: #ff5050;        border: 1px solid rgba(255,80,80,0.40); }
+        .model-badge.q4b      { background: rgba(255,200,80,0.15);  color: #ffc850;        border: 1px solid rgba(255,200,80,0.35); }
+        .model-badge.q2b      { background: rgba(124,106,247,0.2);  color: var(--accent);  border: 1px solid rgba(124,106,247,0.3); }
+        .model-badge.qvl2b    { background: rgba(255,150,50,0.15);  color: #ff9632;        border: 1px solid rgba(255,150,50,0.35); }
+        .model-badge.qvl4b    { background: rgba(255,100,80,0.15);  color: #ff6450;        border: 1px solid rgba(255,100,80,0.35); }
+        .model-badge.lfm450   { background: rgba(78,205,196,0.15);  color: var(--accent2); border: 1px solid rgba(78,205,196,0.3); }
+        .model-badge.g4e2b    { background: rgba(66,197,107,0.15);  color: #42c56b;        border: 1px solid rgba(66,197,107,0.35); }
+        .model-badge.lfm16    { background: rgba(107,203,119,0.15); color: #6bcb77;        border: 1px solid rgba(107,203,119,0.35); }
+        .model-badge.qunred   { background: rgba(255,80,160,0.15);  color: #ff50a0;        border: 1px solid rgba(255,80,160,0.35); }
+        .model-badge.q25vl3b  { background: rgba(80,180,255,0.15);  color: #50b4ff;        border: 1px solid rgba(80,180,255,0.35); }
         .model-info-box {
             border-radius: 6px; padding: 9px;
     <span class="logo">MULTIMODAL EDGE</span>
     <span class="sep">|</span>
     <span class="sub">Node-Based Inference Canvas</span>
+    <span class="badge">v2.8 — DECA MODEL</span>
 </div>
 <div id="canvas">
             <div>
                 <label>Active Model</label>
                 <select id="modelSelect">
+                    <option value="qwen_4b_unredacted">Qwen3.5-4B-Unredacted-MAX</option>
                     <option value="qwen_4b">Qwen3.5-4B</option>
                     <option value="qwen_2b">Qwen3.5-2B</option>
                     <option value="qwen_vl_2b">Qwen3-VL-2B-Instruct</option>
                 </select>
             </div>
             <div id="modelInfoBox" class="model-info-box"
+                 style="background:rgba(255,80,80,0.07);border:1px solid rgba(255,80,80,0.3);">
+                <span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
+                Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
+                with extended instruction-following &amp; unrestricted reasoning.
             </div>
             <div style="flex:1;"></div>
         </div>
 dotModel.classList.add('active');
 const MODEL_INFO = {
+    qwen_4b_unredacted: {
+        html:   `<span class="model-badge q4bunred">QWEN 3.5 · 4B UNREDACTED MAX</span><br><br>
+                 Qwen3.5-4B-Unredacted-MAX by prithivMLmods. Uncensored fine-tune of Qwen3.5-4B
+                 with extended instruction-following &amp; unrestricted reasoning.`,
+        bg:     'rgba(255,80,80,0.07)',
+        border: 'rgba(255,80,80,0.30)',
+    },
     qwen_4b: {
         html:   `<span class="model-badge q4b">QWEN 3.5 · 4B</span><br><br>
                  Qwen3.5 4B multimodal model by Alibaba Cloud.
 // ══════════════════════════════════════════════
 //  ROBUST JSON EXTRACTOR
+//  Strips <think>…</think> blocks then pulls
 //  the first JSON array or object from the text.
 // ══════════════════════════════════════════════
 function extractGroundingJSON(raw) {
+    // 1. Remove <think>…</think> blocks
     let text = raw.replace(/<think>[\s\S]*?<\/think>/gi, '');
+    // 2. Strip markdown code fences
     text = text.replace(/```(?:json)?\\s*/gi, '').replace(/```/g, '');
     text = text.trim();
+    // 3. Try JSON array first  [ … ]
     const arrIdx = text.indexOf('[');
     if (arrIdx !== -1) {
         let depth = 0, inStr = false, esc = false;
         for (let i = arrIdx; i < text.length; i++) {
             const c = text[i];
         }
     }
+    // 4. Try JSON object  { … }
     const objIdx = text.indexOf('{');
     if (objIdx !== -1) {
         let depth = 0, inStr = false, esc = false;
         }
     }
+    // 5. Last resort
     try { return JSON.parse(text); } catch(_) {}
     return null;
 }
 }
 function drawGrounding(imgSrc, rawText) {
     const parsed = extractGroundingJSON(rawText);
     if (!parsed) {
         console.warn('Grounding: could not extract JSON from output:', rawText);
         gCtx.lineWidth = lw;
         gCtx.font      = `bold ${fs}px JetBrains Mono, monospace`;
         const items = Array.isArray(parsed) ? parsed : [parsed];
         items.forEach((item, i) => {
             const col = PALETTE[i % PALETTE.length];
+            // ── Bounding box ─────────────────────────────
             let bbox = null;
             if (Array.isArray(item?.bbox_2d) && item.bbox_2d.length === 4)
                 bbox = item.bbox_2d;
             if (bbox) {
                 let [x1, y1, x2, y2] = bbox.map(Number);
                 if (x1 <= 1 && y1 <= 1 && x2 <= 1 && y2 <= 1) {
                     x1 *= W; y1 *= H; x2 *= W; y2 *= H;
                 }
+                const bw  = x2 - x1, bh = y2 - y1;
                 const lbl = item?.label ?? `obj ${i + 1}`;
                 gCtx.fillStyle   = hexToRgba(col, 0.18);
                 gCtx.fillRect(x1, y1, bw, bh);
                 gCtx.strokeStyle = col;
                 gCtx.strokeRect(x1, y1, bw, bh);
                 const tw = gCtx.measureText(lbl).width;
                 const ph = fs * 1.4, pw = tw + 10;
                 const lx = x1, ly = Math.max(0, y1 - ph);
                 return;
             }
+            // ── Point ────────────────────────────────────
             let pt = null;
             if (Array.isArray(item?.point_2d) && item.point_2d.length === 2)
                 pt = item.point_2d;
             if (pt) {
                 let [x, y] = pt.map(Number);
                 if (x <= 1 && y <= 1) { x *= W; y *= H; }
                 const r   = Math.max(8, W / 60);
                 const lbl = item?.label ?? `pt ${i + 1}`;
                 gCtx.beginPath();
                 gCtx.arc(x, y, r * 1.7, 0, Math.PI * 2);
                 gCtx.fillStyle = hexToRgba(col, 0.15);
                 gCtx.fill();
                 gCtx.beginPath();
                 gCtx.arc(x, y, r, 0, Math.PI * 2);
                 gCtx.fillStyle   = col;
                 gCtx.strokeStyle = '#fff';
                 gCtx.stroke();
                 gCtx.fillStyle = '#fff';
                 gCtx.fillText(lbl, x + r + 4, y + fs * 0.4);
             }
                 </svg> COPY`;
         }, 2000);
     }).catch(() => {
         const ta = document.createElement('textarea');
         ta.value = txt;
         ta.style.position = 'fixed'; ta.style.opacity = '0';
         dotOut.classList.add('active');
+        // Attempt grounding overlay for Point / Detect
         const cat = categorySelect.value;
         if ((cat === 'Point' || cat === 'Detect') && fullText.trim()) {
             const parsed = extractGroundingJSON(fullText);