Chaste20 commited on
Commit
77a3ed6
·
verified ·
1 Parent(s): cb45a42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -29
app.py CHANGED
@@ -1,19 +1,14 @@
1
- import torch
 
2
  import gradio as gr
3
  from PIL import Image
4
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
5
  from peft import PeftModel
6
- import traceback, textwrap, re
7
-
8
- BASE_MODEL_ID = "HuggingFaceTB/SmolVLM2-256M-Video-Instruct"
9
- FINETUNED_MODEL_ID = "Chaste20/smolvlm2-asl-ql-2"
10
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
11
- DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32
12
- DEFAULT_QUESTION = (
13
- "Which ASL alphabet letter is shown in this image? "
14
- "Answer with exactly one capital letter A–Z and nothing else."
15
- )
16
- ALLOWED_LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
17
 
18
  processor = None
19
  model = None
@@ -23,21 +18,21 @@ def load_model():
23
  if processor is not None and model is not None:
24
  return processor, model
25
 
26
- print(" Loading processor from", BASE_MODEL_ID)
27
  processor = AutoProcessor.from_pretrained(
28
  BASE_MODEL_ID,
29
  trust_remote_code=True
30
  )
31
 
32
- print(" Loading base model from", BASE_MODEL_ID)
33
  base = AutoModelForImageTextToText.from_pretrained(
34
  BASE_MODEL_ID,
35
  torch_dtype=DTYPE,
36
  device_map="auto" if torch.cuda.is_available() else None,
37
- trust_remote_code=True,
38
  )
39
 
40
- print(" Attaching PEFT adapter from", FINETUNED_MODEL_ID)
41
  model_peft = PeftModel.from_pretrained(
42
  base,
43
  FINETUNED_MODEL_ID,
@@ -48,25 +43,27 @@ def load_model():
48
  model_peft.config.use_cache = True
49
 
50
  model = model_peft
51
- print(" Guardio model loaded on", DEVICE)
52
  return processor, model
53
 
 
54
  def extract_letter(raw_text: str) -> str:
55
- m = re.search(r"\b([A-Z])\b", raw_text.strip())
56
- if m and m.group(1) in ALLOWED_LETTERS:
57
- return m.group(1)
58
- caps = [c for c in raw_text if c in ALLOWED_LETTERS]
59
- return caps[-1] if caps else "?"
60
 
61
  @torch.inference_mode()
62
  def guardio_predict(image, question: str):
63
  try:
64
  if image is None:
65
- return " Please upload an image of an ASL handshape."
66
 
67
  if not question or not question.strip():
68
  question = DEFAULT_QUESTION
69
 
 
70
  if not isinstance(image, Image.Image):
71
  image = Image.fromarray(image)
72
  if image.mode != "RGB":
@@ -84,6 +81,7 @@ def guardio_predict(image, question: str):
84
  }
85
  ]
86
 
 
87
  text = proc.apply_chat_template(
88
  messages,
89
  add_generation_prompt=True,
@@ -95,14 +93,16 @@ def guardio_predict(image, question: str):
95
  images=[image],
96
  padding=True,
97
  return_tensors="pt",
98
- ).to(DEVICE)
 
99
 
100
  output_ids = mdl.generate(
101
  **inputs,
102
  max_new_tokens=8,
103
  do_sample=False,
104
- num_beams=1,
105
  temperature=0.1,
 
106
  pad_token_id=proc.tokenizer.eos_token_id,
107
  )
108
 
@@ -115,23 +115,28 @@ def guardio_predict(image, question: str):
115
 
116
  if letter == "?":
117
  return (
118
- " I couldn’t confidently map this to a single A–Z letter.\n\n"
119
  f"Raw model output: `{raw_text}`"
120
  )
121
 
122
- return f" **Predicted letter: {letter}**\n\nRaw model output: `{raw_text}`"
 
123
 
124
  except Exception as e:
125
- traceback.print_exc()
 
126
  msg = textwrap.dedent(f"""
127
- **Internal error while running the model**
128
 
129
  **Type:** `{type(e).__name__}`
130
  **Message:** `{e}`
131
 
 
132
  """).strip()
 
133
  return msg
134
 
 
135
  def build_demo():
136
  with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
137
  gr.Markdown(
 
1
+ import traceback
2
+ import textwrap
3
  import gradio as gr
4
  from PIL import Image
5
  from transformers import AutoProcessor, AutoModelForImageTextToText
6
+ from transformers import AutoProcessor, AutoModelForVision2Seq
7
  from peft import PeftModel
8
+ import num2words
9
+
10
+
11
+
 
 
 
 
 
 
 
12
 
13
  processor = None
14
  model = None
 
18
  if processor is not None and model is not None:
19
  return processor, model
20
 
21
+ print("🔄 Loading processor from", BASE_MODEL_ID)
22
  processor = AutoProcessor.from_pretrained(
23
  BASE_MODEL_ID,
24
  trust_remote_code=True
25
  )
26
 
27
+ print("🔄 Loading base model from", BASE_MODEL_ID)
28
  base = AutoModelForImageTextToText.from_pretrained(
29
  BASE_MODEL_ID,
30
  torch_dtype=DTYPE,
31
  device_map="auto" if torch.cuda.is_available() else None,
32
+ trust_remote_code=True
33
  )
34
 
35
+ print("🔄 Attaching PEFT adapter from", FINETUNED_MODEL_ID)
36
  model_peft = PeftModel.from_pretrained(
37
  base,
38
  FINETUNED_MODEL_ID,
 
43
  model_peft.config.use_cache = True
44
 
45
  model = model_peft
46
+ print(" Guardio model loaded on", DEVICE)
47
  return processor, model
48
 
49
+
50
  def extract_letter(raw_text: str) -> str:
51
+ for ch in raw_text:
52
+ if ch in ALLOWED_LETTERS:
53
+ return ch
54
+ return "?"
55
+
56
 
57
  @torch.inference_mode()
58
  def guardio_predict(image, question: str):
59
  try:
60
  if image is None:
61
+ return "⚠️ Please upload an image of an ASL handshape."
62
 
63
  if not question or not question.strip():
64
  question = DEFAULT_QUESTION
65
 
66
+ # Ensure PIL image
67
  if not isinstance(image, Image.Image):
68
  image = Image.fromarray(image)
69
  if image.mode != "RGB":
 
81
  }
82
  ]
83
 
84
+ # chat template with <image> token
85
  text = proc.apply_chat_template(
86
  messages,
87
  add_generation_prompt=True,
 
93
  images=[image],
94
  padding=True,
95
  return_tensors="pt",
96
+ )
97
+ inputs = {k: v.to(DEVICE, dtype=DTYPE) for k, v in inputs.items()}
98
 
99
  output_ids = mdl.generate(
100
  **inputs,
101
  max_new_tokens=8,
102
  do_sample=False,
103
+ num_beams=2,
104
  temperature=0.1,
105
+
106
  pad_token_id=proc.tokenizer.eos_token_id,
107
  )
108
 
 
115
 
116
  if letter == "?":
117
  return (
118
+ " I couldn’t confidently map this to a single A–Z letter.\n\n"
119
  f"Raw model output: `{raw_text}`"
120
  )
121
 
122
+ #return f"🔤 **Predicted letter: {letter}**\n\n`Raw output: {raw_text}`"
123
+ return f"**\n`Raw output: {raw_text} ** "
124
 
125
  except Exception as e:
126
+ traceback.print_exc() # show full error in Colab logs
127
+
128
  msg = textwrap.dedent(f"""
129
+ 🚨 **Internal error while running the model**
130
 
131
  **Type:** `{type(e).__name__}`
132
  **Message:** `{e}`
133
 
134
+ Check the Colab cell output for the full traceback.
135
  """).strip()
136
+
137
  return msg
138
 
139
+
140
  def build_demo():
141
  with gr.Blocks(title="Guardio – ASL Letter Demo (HF Space)") as demo:
142
  gr.Markdown(