Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,14 +6,13 @@ from peft import PeftModel
|
|
| 6 |
import traceback, textwrap, re
|
| 7 |
|
| 8 |
BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
|
| 9 |
-
FINETUNED_MODEL_ID = "
|
| 10 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
| 12 |
DEFAULT_QUESTION = (
|
| 13 |
-
"
|
| 14 |
-
"Answer with exactly one capital letter A–Z and nothing else."
|
| 15 |
)
|
| 16 |
-
ALLOWED_LETTERS = "
|
| 17 |
|
| 18 |
processor = None
|
| 19 |
model = None
|
|
@@ -52,17 +51,16 @@ def load_model():
|
|
| 52 |
return processor, model
|
| 53 |
|
| 54 |
def extract_letter(raw_text: str) -> str:
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
return caps[-1] if caps else "?"
|
| 60 |
|
| 61 |
@torch.inference_mode()
|
| 62 |
def guardio_predict(image, question: str):
|
| 63 |
try:
|
| 64 |
if image is None:
|
| 65 |
-
return "
|
| 66 |
|
| 67 |
if not question or not question.strip():
|
| 68 |
question = DEFAULT_QUESTION
|
|
@@ -95,13 +93,14 @@ def guardio_predict(image, question: str):
|
|
| 95 |
images=[image],
|
| 96 |
padding=True,
|
| 97 |
return_tensors="pt",
|
| 98 |
-
)
|
|
|
|
| 99 |
|
| 100 |
output_ids = mdl.generate(
|
| 101 |
**inputs,
|
| 102 |
max_new_tokens=8,
|
| 103 |
do_sample=False,
|
| 104 |
-
num_beams=
|
| 105 |
temperature=0.1,
|
| 106 |
pad_token_id=proc.tokenizer.eos_token_id,
|
| 107 |
)
|
|
@@ -119,7 +118,7 @@ def guardio_predict(image, question: str):
|
|
| 119 |
f"Raw model output: `{raw_text}`"
|
| 120 |
)
|
| 121 |
|
| 122 |
-
return f"
|
| 123 |
|
| 124 |
except Exception as e:
|
| 125 |
traceback.print_exc()
|
|
@@ -136,7 +135,7 @@ def build_demo():
|
|
| 136 |
with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
|
| 137 |
gr.Markdown(
|
| 138 |
"""
|
| 139 |
-
|
| 140 |
|
| 141 |
- Upload an image of a **single ASL alphabet handshape**
|
| 142 |
- Ask: *"Which ASL alphabet letter is this image?"*
|
|
@@ -151,7 +150,10 @@ def build_demo():
|
|
| 151 |
btn = gr.Button("Ask Guardio", variant="primary")
|
| 152 |
|
| 153 |
with gr.Column():
|
| 154 |
-
out = gr.Markdown(
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])
|
| 157 |
|
|
|
|
| 6 |
import traceback, textwrap, re
|
| 7 |
|
| 8 |
BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
|
| 9 |
+
FINETUNED_MODEL_ID = "Chaste20/smolvlm2-asl-ql-2"
|
| 10 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 11 |
DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
| 12 |
DEFAULT_QUESTION = (
|
| 13 |
+
"What sign language letter is this image?"
|
|
|
|
| 14 |
)
|
| 15 |
+
ALLOWED_LETTERS = [chr(ord("A") + i) for i in range(26)]
|
| 16 |
|
| 17 |
processor = None
|
| 18 |
model = None
|
|
|
|
| 51 |
return processor, model
|
| 52 |
|
| 53 |
def extract_letter(raw_text: str) -> str:
|
| 54 |
+
for ch in raw_text:
|
| 55 |
+
if ch in ALLOWED_LETTERS:
|
| 56 |
+
return ch
|
| 57 |
+
return "?"
|
|
|
|
| 58 |
|
| 59 |
@torch.inference_mode()
|
| 60 |
def guardio_predict(image, question: str):
|
| 61 |
try:
|
| 62 |
if image is None:
|
| 63 |
+
return "Please upload an image of an ASL handshape."
|
| 64 |
|
| 65 |
if not question or not question.strip():
|
| 66 |
question = DEFAULT_QUESTION
|
|
|
|
| 93 |
images=[image],
|
| 94 |
padding=True,
|
| 95 |
return_tensors="pt",
|
| 96 |
+
)
|
| 97 |
+
inputs = {k: v.to(DEVICE, dtype=DTYPE) for k, v in inputs.items()}
|
| 98 |
|
| 99 |
output_ids = mdl.generate(
|
| 100 |
**inputs,
|
| 101 |
max_new_tokens=8,
|
| 102 |
do_sample=False,
|
| 103 |
+
num_beams=2,
|
| 104 |
temperature=0.1,
|
| 105 |
pad_token_id=proc.tokenizer.eos_token_id,
|
| 106 |
)
|
|
|
|
| 118 |
f"Raw model output: `{raw_text}`"
|
| 119 |
)
|
| 120 |
|
| 121 |
+
return f"\n\nPredicted letter: {letter}"
|
| 122 |
|
| 123 |
except Exception as e:
|
| 124 |
traceback.print_exc()
|
|
|
|
| 135 |
with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
|
| 136 |
gr.Markdown(
|
| 137 |
"""
|
| 138 |
+
Guardio – ASL Letter Demo
|
| 139 |
|
| 140 |
- Upload an image of a **single ASL alphabet handshape**
|
| 141 |
- Ask: *"Which ASL alphabet letter is this image?"*
|
|
|
|
| 150 |
btn = gr.Button("Ask Guardio", variant="primary")
|
| 151 |
|
| 152 |
with gr.Column():
|
| 153 |
+
out = gr.Markdown(
|
| 154 |
+
label="Model answer",
|
| 155 |
+
value="Upload an image and click **Ask Guardio**.",
|
| 156 |
+
)
|
| 157 |
|
| 158 |
btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])
|
| 159 |
|