Chaste20 commited on
Commit
5a1b053
·
verified ·
1 Parent(s): 62a145e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -6,14 +6,13 @@ from peft import PeftModel
6
  import traceback, textwrap, re
7
 
8
  BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
9
- FINETUNED_MODEL_ID = "https://huggingface.co/Chaste20/smolvlm2-asl-ql-2"
10
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
11
  DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
12
  DEFAULT_QUESTION = (
13
- "Which ASL alphabet letter is shown in this image? "
14
- "Answer with exactly one capital letter A–Z and nothing else."
15
  )
16
- ALLOWED_LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
17
 
18
  processor = None
19
  model = None
@@ -52,17 +51,16 @@ def load_model():
52
  return processor, model
53
 
54
  def extract_letter(raw_text: str) -> str:
55
- m = re.search(r"\b([A-Z])\b", raw_text.strip())
56
- if m and m.group(1) in ALLOWED_LETTERS:
57
- return m.group(1)
58
- caps = [c for c in raw_text if c in ALLOWED_LETTERS]
59
- return caps[-1] if caps else "?"
60
 
61
  @torch.inference_mode()
62
  def guardio_predict(image, question: str):
63
  try:
64
  if image is None:
65
- return "⚠️ Please upload an image of an ASL handshape."
66
 
67
  if not question or not question.strip():
68
  question = DEFAULT_QUESTION
@@ -95,13 +93,14 @@ def guardio_predict(image, question: str):
95
  images=[image],
96
  padding=True,
97
  return_tensors="pt",
98
- ).to(DEVICE)
 
99
 
100
  output_ids = mdl.generate(
101
  **inputs,
102
  max_new_tokens=8,
103
  do_sample=False,
104
- num_beams=1,
105
  temperature=0.1,
106
  pad_token_id=proc.tokenizer.eos_token_id,
107
  )
@@ -119,7 +118,7 @@ def guardio_predict(image, question: str):
119
  f"Raw model output: `{raw_text}`"
120
  )
121
 
122
- return f"🔤 **Predicted letter: {letter}**\n\nRaw model output: `{raw_text}`"
123
 
124
  except Exception as e:
125
  traceback.print_exc()
@@ -136,7 +135,7 @@ def build_demo():
136
  with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
137
  gr.Markdown(
138
  """
139
- # 🧤 Guardio – ASL Letter Demo
140
 
141
  - Upload an image of a **single ASL alphabet handshape**
142
  - Ask: *"Which ASL alphabet letter is this image?"*
@@ -151,7 +150,10 @@ def build_demo():
151
  btn = gr.Button("Ask Guardio", variant="primary")
152
 
153
  with gr.Column():
154
- out = gr.Markdown("Upload an image and click **Ask Guardio**.")
 
 
 
155
 
156
  btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])
157
 
 
6
  import traceback, textwrap, re
7
 
8
  BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
9
+ FINETUNED_MODEL_ID = "Chaste20/smolvlm2-asl-ql-2"
10
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
11
  DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
12
  DEFAULT_QUESTION = (
13
+ "What sign language letter is this image?"
 
14
  )
15
+ ALLOWED_LETTERS = [chr(ord("A") + i) for i in range(26)]
16
 
17
  processor = None
18
  model = None
 
51
  return processor, model
52
 
53
  def extract_letter(raw_text: str) -> str:
54
+ for ch in raw_text:
55
+ if ch in ALLOWED_LETTERS:
56
+ return ch
57
+ return "?"
 
58
 
59
  @torch.inference_mode()
60
  def guardio_predict(image, question: str):
61
  try:
62
  if image is None:
63
+ return "Please upload an image of an ASL handshape."
64
 
65
  if not question or not question.strip():
66
  question = DEFAULT_QUESTION
 
93
  images=[image],
94
  padding=True,
95
  return_tensors="pt",
96
+ )
97
+ inputs = {k: v.to(DEVICE, dtype=DTYPE) for k, v in inputs.items()}
98
 
99
  output_ids = mdl.generate(
100
  **inputs,
101
  max_new_tokens=8,
102
  do_sample=False,
103
+ num_beams=2,
104
  temperature=0.1,
105
  pad_token_id=proc.tokenizer.eos_token_id,
106
  )
 
118
  f"Raw model output: `{raw_text}`"
119
  )
120
 
121
+ return f"\n\nPredicted letter: {letter}"
122
 
123
  except Exception as e:
124
  traceback.print_exc()
 
135
  with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
136
  gr.Markdown(
137
  """
138
+ Guardio – ASL Letter Demo
139
 
140
  - Upload an image of a **single ASL alphabet handshape**
141
  - Ask: *"Which ASL alphabet letter is this image?"*
 
150
  btn = gr.Button("Ask Guardio", variant="primary")
151
 
152
  with gr.Column():
153
+ out = gr.Markdown(
154
+ label="Model answer",
155
+ value="Upload an image and click **Ask Guardio**.",
156
+ )
157
 
158
  btn.click(fn=guardio_predict, inputs=[img, q], outputs=[out])
159