Spaces:

apsora
/

cad_chatbox

Build error

App Files Files Community

apsora commited on Dec 5, 2025

Commit

b6c29b8

verified ·

1 Parent(s): c5818d2

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -98

app.py CHANGED Viewed

@@ -1,56 +1,26 @@
 import json
 import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
-import torch
 from PIL import Image
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForVision2Seq
-import spaces  # <-- needed for Stateless GPU / zeroGPU
-# ---------------------------------------------------------------------
-# Minimal GPU-decorated function so Stateless GPU doesn't error out
-# ---------------------------------------------------------------------
-@spaces.GPU
-def gpu_ping() -> str:
-    """
-    Dummy GPU endpoint so Hugging Face Stateless GPU / zeroGPU
-    detects at least one @spaces.GPU function.
-    We don't actually use this in the app logic. It just keeps
-    the Space from throwing:
-    'No @spaces.GPU function detected during startup'.
-    """
-    return "gpu_ready"
 # ============================================================
-# 0.  Model + guidelines setup
 # ============================================================
-# NOTE: we keep everything on CPU here to avoid touching CUDA
-# in the main process (required for Stateless GPU).
-DEVICE = "cpu"
-DTYPE = torch.float32
 MODEL_NAME = "maryzhang/qwen3vl-guideline-lora-model"
-print(f"Loading unified vision+text model {MODEL_NAME} on {DEVICE}", flush=True)
-model_vlm = AutoModelForVision2Seq.from_pretrained(
-    MODEL_NAME,
-    dtype=DTYPE,
-    trust_remote_code=True,
-)
-model_vlm.to(DEVICE)
-model_vlm.eval()
-processor_vlm = AutoProcessor.from_pretrained(
-    MODEL_NAME,
-    trust_remote_code=True,
-)
 GUIDELINES_PATH = "guidelines_final.json"
@@ -119,41 +89,43 @@ print(f"Loaded {len(ALL_GUIDELINES)} guidelines", flush=True)
 # ============================================================
-# 1.  Core LLM helpers (text-only + vision)
 # ============================================================
 def run_text_llm(system_prompt: str, user_prompt: str, max_new_tokens: int = 768) -> str:
     """
-    Use Qwen3-VL (LoRA) in text-only mode.
     """
     messages = [
-        {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
-        {"role": "user", "content": [{"type": "text", "text": user_prompt}]},
     ]
-    prompt_text = processor_vlm.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
     )
-    inputs = processor_vlm(
-        text=prompt_text,
-        return_tensors="pt",
-    ).to(DEVICE)
-    with torch.no_grad():
-        output_ids = model_vlm.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            temperature=0.0,
-            do_sample=False,
-        )
-    generated = processor_vlm.decode(
-        output_ids[0],
-        skip_special_tokens=True,
-    ).strip()
-    return generated
 def vlm_generate_json_from_images(
@@ -161,48 +133,65 @@ def vlm_generate_json_from_images(
     images: List[Image.Image],
 ) -> Dict[str, Any]:
     """
-    Call Qwen3-VL with images and ask it to return STRICT JSON.
     """
     if not images:
         images = [Image.new("RGB", (64, 64), "white")]
-    content = [{"type": "image"} for _ in images]
-    content.append({"type": "text", "text": prompt})
-    messages = [{"role": "user", "content": content}]
-    prompt_text = processor_vlm.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
     )
-    inputs = processor_vlm(
-        text=prompt_text,
-        images=images,
-        return_tensors="pt",
-    ).to(DEVICE)
-    with torch.no_grad():
-        output_ids = model_vlm.generate(
-            **inputs,
-            max_new_tokens=512,
-            temperature=0.0,
-            do_sample=False,
-        )
-    generated = processor_vlm.decode(
-        output_ids[0],
-        skip_special_tokens=True,
-    ).strip()
-    m = re.search(r"\{.*\}", generated, re.DOTALL)
     if m:
         try:
             return json.loads(m.group(0))
         except Exception:
             pass
-    return {"parse_error": True, "raw": generated}
 # ============================================================
@@ -287,9 +276,7 @@ def rag_retrieve(query: str, top_k: int = 6) -> List[Dict[str, Any]]:
     scored = []
     for g in ALL_GUIDELINES:
         pfl = g.get("pass_fail_logic") or {}
-        pfl_text = " ".join(
-            f"{k}: {v}" for k, v in pfl.items()
-        )
         blob = " ".join(
             [
                 g.get("topic", ""),
@@ -306,9 +293,7 @@ def rag_retrieve(query: str, top_k: int = 6) -> List[Dict[str, Any]]:
     hits = []
     for score, g in scored[:top_k]:
         pfl = g.get("pass_fail_logic") or {}
-        pfl_text = " ".join(
-            f"{k}: {v}" for k, v in pfl.items()
-        )
         text = (
             " ".join(g.get("evaluation_criteria", []) or [])
             or " ".join(g.get("expected_answers", []) or [])
@@ -1081,7 +1066,9 @@ with gr.Blocks(title="DFM / GD&T Manufacturability Tutor") as demo:
         2. *(Optional)* Add a short description of the part
         3. Click **Start review**
         4. Answer a few focused questions → get a guideline-by-guideline summary
-        This tool is meant to feel like a mini design review with a friendly TA.
         """
     )

 import json
 import re
+import base64
+import io
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 from PIL import Image
 import gradio as gr
+from huggingface_hub import InferenceClient
 # ============================================================
+# 0.  Model + guidelines setup (Inference API version)
 # ============================================================
 MODEL_NAME = "maryzhang/qwen3vl-guideline-lora-model"
+print(f"Using hosted model via Inference API: {MODEL_NAME}", flush=True)
+# This uses the HF Inference API (no local weights, no GPU in the Space)
+# If the model is private, set HF_TOKEN as an environment variable in the Space.
+hf_client = InferenceClient(MODEL_NAME)
 GUIDELINES_PATH = "guidelines_final.json"
 # ============================================================
+# 1.  Core LLM helpers (text-only + vision via Inference API)
 # ============================================================
 def run_text_llm(system_prompt: str, user_prompt: str, max_new_tokens: int = 768) -> str:
     """
+    Use the hosted Qwen3-VL model in text-only mode via chat_completion.
+    We build a simple system+user messages list and ask for a deterministic
+    response (temperature=0).
     """
     messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
     ]
+    response = hf_client.chat_completion(
+        messages=messages,
+        max_tokens=max_new_tokens,
+        temperature=0.0,
+        stream=False,
     )
+    # HuggingFace InferenceClient returns a ChatCompletionOutput
+    text = response.choices[0].message.content
+    return (text or "").strip()
+def _pil_to_data_url(img: Image.Image, fmt: str = "PNG") -> str:
+    """
+    Convert a PIL image to a data URL (base64-encoded), which matches the
+    format expected by chat_completion with vision support:
+    type: "image_url", image_url: {"url": "data:image/png;base64,..."}
+    """
+    buf = io.BytesIO()
+    img.save(buf, format=fmt)
+    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+    mime = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
+    return f"data:{mime};base64,{b64}"
 def vlm_generate_json_from_images(
     images: List[Image.Image],
 ) -> Dict[str, Any]:
     """
+    Call the hosted Qwen3-VL model with images + text using chat_completion.
+    We ask it to return STRICT JSON and then parse the JSON out of the reply.
+    This assumes the model supports OpenAI-style multimodal messages where
+    each content item can be {"type": "image_url", "image_url": {"url": ...}}
+    plus a text chunk.
     """
     if not images:
         images = [Image.new("RGB", (64, 64), "white")]
+    # Build message content with multiple images + prompt text
+    content: List[Dict[str, Any]] = []
+    for img in images:
+        url = _pil_to_data_url(img)
+        content.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": url},
+            }
+        )
+    content.append(
+        {
+            "type": "text",
+            "text": prompt,
+        }
     )
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a vision model that ONLY replies with strict JSON.",
+        },
+        {
+            "role": "user",
+            "content": content,
+        },
+    ]
+    # Ask for a deterministic, non-streaming, JSON-like answer
+    response = hf_client.chat_completion(
+        messages=messages,
+        max_tokens=512,
+        temperature=0.0,
+        stream=False,
+        # If your model supports response_format, you can uncomment:
+        # response_format={"type": "json_object"},
+    )
+    raw = response.choices[0].message.content or ""
+    raw = raw.strip()
+    # Try to extract JSON object from the raw string
+    m = re.search(r"\{.*\}", raw, re.DOTALL)
     if m:
         try:
             return json.loads(m.group(0))
         except Exception:
             pass
+    return {"parse_error": True, "raw": raw}
 # ============================================================
     scored = []
     for g in ALL_GUIDELINES:
         pfl = g.get("pass_fail_logic") or {}
+        pfl_text = " ".join(f"{k}: {v}" for k, v in pfl.items())
         blob = " ".join(
             [
                 g.get("topic", ""),
     hits = []
     for score, g in scored[:top_k]:
         pfl = g.get("pass_fail_logic") or {}
+        pfl_text = " ".join(f"{k}: {v}" for k, v in pfl.items())
         text = (
             " ".join(g.get("evaluation_criteria", []) or [])
             or " ".join(g.get("expected_answers", []) or [])
         2. *(Optional)* Add a short description of the part
         3. Click **Start review**
         4. Answer a few focused questions → get a guideline-by-guideline summary
+        This tool is powered by a hosted multimodal model via the Hugging Face Inference API,
+        so it runs on free CPU hardware without loading big weights in this Space.
         """
     )