Spaces:
Runtime error
Runtime error
File size: 4,641 Bytes
301b5d0 cb45a42 301b5d0 5a1b053 301b5d0 5a1b053 301b5d0 5a1b053 cb45a42 77a3ed6 cb45a42 77a3ed6 cb45a42 301b5d0 cb45a42 77a3ed6 cb45a42 77a3ed6 cb45a42 5a1b053 cb45a42 5a1b053 cb45a42 5a1b053 cb45a42 5a1b053 cb45a42 77a3ed6 cb45a42 5a1b053 cb45a42 301b5d0 cb45a42 77a3ed6 cb45a42 5a1b053 cb45a42 5a1b053 cb45a42 301b5d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import torch
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModelForImageTextToText
from peft import PeftModel
import traceback, textwrap, re
BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
FINETUNED_MODEL_ID = "Chaste20/smolvlm2-asl-ql-2"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
DEFAULT_QUESTION = (
"What sign language letter is this image?"
)
ALLOWED_LETTERS = [chr(ord("A") + i) for i in range(26)]
processor = None
model = None
def load_model():
global processor, model
if processor is not None and model is not None:
return processor, model
print("๐ Loading processor from", BASE_MODEL_ID)
processor = AutoProcessor.from_pretrained(
BASE_MODEL_ID,
trust_remote_code=True
)
print("๐ Loading base model from", BASE_MODEL_ID)
base = AutoModelForImageTextToText.from_pretrained(
BASE_MODEL_ID,
torch_dtype=DTYPE,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True,
)
print("๐ Attaching PEFT adapter from", FINETUNED_MODEL_ID)
model_peft = PeftModel.from_pretrained(
base,
FINETUNED_MODEL_ID,
torch_dtype=DTYPE,
)
model_peft.to(DEVICE)
model_peft.eval()
model_peft.config.use_cache = True
model = model_peft
print("โ
Guardio model loaded on", DEVICE)
return processor, model
def extract_letter(raw_text: str) -> str:
for ch in raw_text:
if ch in ALLOWED_LETTERS:
return ch
return "?"
@torch.inference_mode()
def guardio_predict(image, question: str):
try:
if image is None:
return "Please upload an image of an ASL handshape."
if not question or not question.strip():
question = DEFAULT_QUESTION
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
if image.mode != "RGB":
image = image.convert("RGB")
proc, mdl = load_model()
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": question},
{"type": "image"},
],
}
]
text = proc.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=False,
)
inputs = proc(
text=[text],
images=[image],
padding=True,
return_tensors="pt",
)
inputs = {k: v.to(DEVICE, dtype=DTYPE) for k, v in inputs.items()}
output_ids = mdl.generate(
**inputs,
max_new_tokens=8,
do_sample=False,
num_beams=2,
temperature=0.1,
pad_token_id=proc.tokenizer.eos_token_id,
)
raw_text = proc.batch_decode(
output_ids,
skip_special_tokens=True,
)[0].strip()
letter = extract_letter(raw_text)
if letter == "?":
return (
"โ I couldnโt confidently map this to a single AโZ letter.\n\n"
f"Raw model output: `{raw_text}`"
)
return f"\n\nPredicted letter: {letter}"
except Exception as e:
traceback.print_exc()
msg = textwrap.dedent(f"""
๐จ **Internal error while running the model**
**Type:** `{type(e).__name__}`
**Message:** `{e}`
""").strip()
return msg
def build_demo():
with gr.Blocks(title="Guardio โ ASL Letter Demo (HF Space)") as demo:
gr.Markdown(
"""
Guardio โ ASL Letter Demo
- Upload an image of a **single ASL alphabet handshape**
- Ask: *"Which ASL alphabet letter is this image?"*
- The model predicts a single AโZ letter.
"""
)
with gr.Row():
with gr.Column():
img = gr.Image(label="ASL handshape image", type="pil", height=320)
q = gr.Textbox(label="Question", value=DEFAULT_QUESTION, lines=2)
btn = gr.Button("Ask Guardio", variant="primary")
with gr.Column():
out = gr.Markdown(
label="Model answer",
value="Upload an image and click **Ask Guardio**.",
)
btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch()
|