Spaces:

Chaste20
/

SmolVLM_Handshape_Letter

Runtime error

App Files Files Community

Chaste20 commited on Dec 12, 2025

Commit

5a1b053

verified ·

1 Parent(s): 62a145e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -6,14 +6,13 @@ from peft import PeftModel
 import traceback, textwrap, re
 BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
-FINETUNED_MODEL_ID = "https://huggingface.co/Chaste20/smolvlm2-asl-ql-2"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 DEFAULT_QUESTION = (
-    "Which ASL alphabet letter is shown in this image? "
-    "Answer with exactly one capital letter A–Z and nothing else."
 )
-ALLOWED_LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 processor = None
 model = None
@@ -52,17 +51,16 @@ def load_model():
     return processor, model
 def extract_letter(raw_text: str) -> str:
-    m = re.search(r"\b([A-Z])\b", raw_text.strip())
-    if m and m.group(1) in ALLOWED_LETTERS:
-        return m.group(1)
-    caps = [c for c in raw_text if c in ALLOWED_LETTERS]
-    return caps[-1] if caps else "?"
 @torch.inference_mode()
 def guardio_predict(image, question: str):
     try:
         if image is None:
-            return "⚠️ Please upload an image of an ASL handshape."
         if not question or not question.strip():
             question = DEFAULT_QUESTION
@@ -95,13 +93,14 @@ def guardio_predict(image, question: str):
             images=[image],
             padding=True,
             return_tensors="pt",
-        ).to(DEVICE)
         output_ids = mdl.generate(
             **inputs,
             max_new_tokens=8,
             do_sample=False,
-            num_beams=1,
             temperature=0.1,
             pad_token_id=proc.tokenizer.eos_token_id,
         )
@@ -119,7 +118,7 @@ def guardio_predict(image, question: str):
                 f"Raw model output: `{raw_text}`"
             )
-        return f"🔤 **Predicted letter: {letter}**\n\nRaw model output: `{raw_text}`"
     except Exception as e:
         traceback.print_exc()
@@ -136,7 +135,7 @@ def build_demo():
     with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
         gr.Markdown(
             """
-            # 🧤 Guardio – ASL Letter Demo
             - Upload an image of a **single ASL alphabet handshape**
             - Ask: *"Which ASL alphabet letter is this image?"*
@@ -151,7 +150,10 @@ def build_demo():
                 btn = gr.Button("Ask Guardio", variant="primary")
             with gr.Column():
-                out = gr.Markdown("Upload an image and click **Ask Guardio**.")
         btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])

 import traceback, textwrap, re
 BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
+FINETUNED_MODEL_ID = "Chaste20/smolvlm2-asl-ql-2"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 DEFAULT_QUESTION = (
+    "What sign language letter is this image?"
 )
+ALLOWED_LETTERS = [chr(ord("A") + i) for i in range(26)]
 processor = None
 model = None
     return processor, model
 def extract_letter(raw_text: str) -> str:
+    for ch in raw_text:
+        if ch in ALLOWED_LETTERS:
+            return ch
+    return "?"
 @torch.inference_mode()
 def guardio_predict(image, question: str):
     try:
         if image is None:
+            return "Please upload an image of an ASL handshape."
         if not question or not question.strip():
             question = DEFAULT_QUESTION
             images=[image],
             padding=True,
             return_tensors="pt",
+        )
+        inputs = {k: v.to(DEVICE, dtype=DTYPE) for k, v in inputs.items()}
         output_ids = mdl.generate(
             **inputs,
             max_new_tokens=8,
             do_sample=False,
+            num_beams=2,
             temperature=0.1,
             pad_token_id=proc.tokenizer.eos_token_id,
         )
                 f"Raw model output: `{raw_text}`"
             )
+        return f"\n\nPredicted letter: {letter}"
     except Exception as e:
         traceback.print_exc()
     with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
         gr.Markdown(
             """
+            Guardio – ASL Letter Demo
             - Upload an image of a **single ASL alphabet handshape**
             - Ask: *"Which ASL alphabet letter is this image?"*
                 btn = gr.Button("Ask Guardio", variant="primary")
             with gr.Column():
+                out = gr.Markdown(
+                    label="Model answer",
+                    value="Upload an image and click **Ask Guardio**.",
+                )
         btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])