Mr-HASSAN commited on
Commit
0e03f83
ยท
verified ยท
1 Parent(s): 1dd48ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -30
app.py CHANGED
@@ -8,7 +8,7 @@ import torch
8
  import spaces # ู…ู‡ู… ู„ู€ @spaces.GPU
9
 
10
  # =============================
11
- # ุฅุนุฏุงุฏ ู…ูุชุงุญ Gemini (ุญุทู‡ ู‡ู†ุง)
12
  # =============================
13
 
14
  GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
@@ -43,15 +43,17 @@ def fix_with_gemini(raw_text: str) -> str:
43
  # =============================
44
 
45
  WEIGHTS_PATH = "best.pt"
46
- IMG_SIZE = 720
47
- CONF_THRESHOLD = 0.60 # ุชุฎููŠุถ ู„ู„ุณู‡ูˆู„ุฉ
48
 
49
- # ุฅุนุฏุงุฏุงุช ุชุฌู…ูŠุน ุงู„ุญุฑูˆู
50
  MIN_STABLE_FRAMES = 1
51
  FRAME_SKIP = 1
52
  MAX_FRAMES = 1000
53
  WORD_GAP_FRAMES = 10
54
 
 
 
 
55
  arabic_map = {
56
  "aleff": "ุง",
57
  "bb": "ุจ",
@@ -87,15 +89,13 @@ arabic_map = {
87
  "la": "ู„ุง",
88
  }
89
 
90
- # ู‡ู†ุณุชุฎุฏู… ู…ูˆุฏูŠู„ ุนุงู„ู…ูŠ ู„ูƒู† ู†ุญู…ู‘ู„ู‡ ุนู†ุฏ ุฃูˆู„ ุงุณุชุฎุฏุงู… ูู‚ุท
91
  yolo_model = None
92
  DEVICE = "cpu"
93
 
94
 
95
  def get_model():
96
  """
97
- ูŠุญู…ู‘ู„ YOLO ู…ุฑุฉ ูˆุงุญุฏุฉุŒ ูˆูŠุญุงูˆู„ ู†ู‚ู„ู‡ ู„ู€ GPU ู„ูˆ ู…ุชูˆูุฑ.
98
- ูŠูุณุชุฏุนู‰ ุฏุงุฎู„ ุฏุงู„ุฉ ุนู„ูŠู‡ุง @spaces.GPU ุจุนุฏ ู…ุง ุงู„ู€ GPU ูŠุดุชุบู„ ูุนู„ูŠู‹ุง.
99
  """
100
  global yolo_model, DEVICE
101
 
@@ -104,7 +104,6 @@ def get_model():
104
  yolo_model = YOLO(WEIGHTS_PATH)
105
  print("๐Ÿ“š Classes:", yolo_model.names)
106
 
107
- # ู†ุนูŠุฏ ุงู„ุชุญู‚ู‚ ู…ู† CUDA ู‡ู†ุง (ุจุนุฏ ู…ุง GPU ูŠุดุชุบู„ ููŠ Spaces)
108
  if torch.cuda.is_available():
109
  if DEVICE != "cuda":
110
  DEVICE = "cuda"
@@ -114,25 +113,66 @@ def get_model():
114
  except Exception as e:
115
  print("โš ๏ธ ุชุนุฐุฑ ู†ู‚ู„ ุงู„ู…ูˆุฏูŠู„ ุฅู„ู‰ cuda:", e)
116
  else:
 
 
117
  DEVICE = "cpu"
118
- print("โš ๏ธ CUDA ุบูŠุฑ ู…ุชูˆูุฑุŒ ุณูŠุชู… ุงุณุชุฎุฏุงู… CPU.")
119
 
120
  return yolo_model
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  # =============================
124
  # ุถุบุท ุงู„ููŠุฏูŠูˆ ู‚ุจู„ ุงู„ู…ุนุงู„ุฌุฉ
125
  # =============================
126
 
127
- def preprocess_video(input_path: str, target_width: int = 640, target_fps: int = 8) -> str:
128
  """
129
- ูŠู‚ู„ู„ ุฏู‚ุฉ ุงู„ููŠุฏูŠูˆ ูˆุงู„ู€ FPS ุนุดุงู† ู†ุฎู„ูŠ ุงู„ุจุฑูˆุณูŠุณ ุฃุณุฑุน.
130
- ูŠุฑุฌู‘ุน ู…ุณุงุฑ ููŠุฏูŠูˆ ุฎููŠู ุฌุฏูŠุฏ.
 
131
  """
132
  cap = cv2.VideoCapture(input_path)
133
  if not cap.isOpened():
134
  print("[preprocess] ุชุนุฐุฑ ูุชุญ ุงู„ููŠุฏูŠูˆุŒ ุณู†ุณุชุฎุฏู… ุงู„ู…ู„ู ุงู„ุฃุตู„ูŠ ูƒู…ุง ู‡ูˆ.")
135
- return input_path # fallback
136
 
137
  orig_fps = cap.get(cv2.CAP_PROP_FPS)
138
  w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -145,13 +185,26 @@ def preprocess_video(input_path: str, target_width: int = 640, target_fps: int =
145
  frame_step = max(1, int(round(orig_fps / target_fps)))
146
  out_fps = orig_fps / frame_step
147
 
148
- target_height = int(target_width * h / w)
 
 
 
 
 
 
 
149
 
150
  fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
151
  os.close(fd)
152
 
 
 
 
 
 
 
153
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
154
- out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (target_width, target_height))
155
 
156
  frame_idx = 0
157
  while True:
@@ -160,14 +213,18 @@ def preprocess_video(input_path: str, target_width: int = 640, target_fps: int =
160
  break
161
 
162
  if frame_idx % frame_step == 0:
163
- resized = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
164
- out.write(resized)
 
 
 
 
165
 
166
  frame_idx += 1
167
 
168
  cap.release()
169
  out.release()
170
- print(f"[preprocess] original_fps={orig_fps:.2f}, new_fps={out_fps:.2f}, saved={tmp_path}")
171
  return tmp_path
172
 
173
 
@@ -176,7 +233,7 @@ def preprocess_video(input_path: str, target_width: int = 640, target_fps: int =
176
  # =============================
177
 
178
  def detect_frame(frame_bgr):
179
- model = get_model() # ู†ุชุฃูƒุฏ ุงู„ู…ูˆุฏูŠู„ ุฌุงู‡ุฒ ูˆุนู„ู‰ ุงู„ุฌู‡ุงุฒ ุงู„ุตุญูŠุญ
180
 
181
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
182
  result = model.predict(
@@ -184,11 +241,14 @@ def detect_frame(frame_bgr):
184
  conf=CONF_THRESHOLD,
185
  imgsz=IMG_SIZE,
186
  verbose=False,
187
- device=DEVICE, # cuda ุฃูˆ cpu ุญุณุจ ุงู„ู…ุชุงุญ
188
  )[0]
189
 
190
  boxes = result.boxes
191
 
 
 
 
192
  if boxes is None or len(boxes) == 0:
193
  return [], frame_bgr
194
 
@@ -220,13 +280,13 @@ def detect_frame(frame_bgr):
220
 
221
 
222
  # =============================
223
- # VIDEO โ†’ RAW TEXT + OUTPUT VIDEO
224
  # =============================
225
 
226
  def extract_and_render(video_path: str):
227
  cap = cv2.VideoCapture(video_path)
228
  if not cap.isOpened():
229
- return "", None
230
 
231
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
232
  out_path = "processed_output.mp4"
@@ -248,6 +308,9 @@ def extract_and_render(video_path: str):
248
  last_seen = None
249
  frame_index = 0
250
 
 
 
 
251
  while True:
252
  ret, frame = cap.read()
253
  if not ret:
@@ -265,6 +328,9 @@ def extract_and_render(video_path: str):
265
  out.write(rendered)
266
 
267
  if labels:
 
 
 
268
  label = labels[0]
269
  last_seen = frame_index
270
 
@@ -295,42 +361,61 @@ def extract_and_render(video_path: str):
295
  words.append(word)
296
 
297
  raw_text = " ".join(words).strip()
298
- return raw_text, out_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
 
301
  # =============================
302
  # Gradio + @spaces.GPU
303
  # =============================
304
 
305
- @spaces.GPU # ู‡ุฐุง ุงู„ู„ูŠ ูŠุฑุถูŠ Hugging Face ูˆูŠุดุบู‘ู„ GPU on demand
306
  def run(file):
307
  if file is None:
308
- return "ู„ู… ูŠุชู… ุฑูุน ููŠุฏูŠูˆ", "", None
309
 
310
  video_path = file.name
311
 
312
- light_path = preprocess_video(video_path, target_width=640, target_fps=8)
 
313
 
314
- raw, processed_path = extract_and_render(light_path)
315
  pretty = fix_with_gemini(raw) if raw else ""
316
 
317
  if not raw:
318
  raw = "ู„ู… ูŠุชู… ุงู„ุชุนุฑู ุนู„ู‰ ุฃูŠ ู†ุต ู…ู† ุงู„ุฅุดุงุฑุงุช."
319
 
320
- return raw, pretty, processed_path
321
 
322
 
323
  with gr.Blocks() as demo:
324
- gr.Markdown("## ๐ŸคŸ ASL โ†’ Arabic (YOLO + Gemini) โ€” ู†ุณุฎุฉ GPU ุนู„ู‰ Hugging Face Spaces")
325
 
326
  inp = gr.File(label="ุงุฑูุน ููŠุฏูŠูˆ ุงู„ุฅุดุงุฑุฉ")
327
  raw = gr.Textbox(label="ุงู„ู†ุต ุงู„ุฎุงู…", lines=3)
328
  pretty = gr.Textbox(label="ุงู„ู†ุต ุงู„ู…ุญุณู† (Gemini)", lines=3)
329
  video_out = gr.Video(label="ุงู„ููŠุฏูŠูˆ ุจุนุฏ ุงู„ุจุฑูˆุณูŠุณ")
 
330
 
331
  btn = gr.Button("ุงุจุฏุฃ ุงู„ู…ุนุงู„ุฌุฉ")
332
 
333
- btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out])
334
 
335
  if __name__ == "__main__":
336
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
8
  import spaces # ู…ู‡ู… ู„ู€ @spaces.GPU
9
 
10
  # =============================
11
+ # ุฅุนุฏุงุฏ ู…ูุชุงุญ Gemini (ุญุท ุงู„ู…ูุชุงุญ ู‡ู†ุง)
12
  # =============================
13
 
14
  GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
 
43
  # =============================
44
 
45
  WEIGHTS_PATH = "best.pt"
46
+ IMG_SIZE = 640 # ุญุฌู… ุงู„ุฅุฏุฎุงู„ ู„ู€ YOLO
47
+ CONF_THRESHOLD = 0.15 # ู…ุฎูุถ ุนุดุงู† ู†ู„ุชู‚ุท ุฃูƒุซุฑ
48
 
 
49
  MIN_STABLE_FRAMES = 1
50
  FRAME_SKIP = 1
51
  MAX_FRAMES = 1000
52
  WORD_GAP_FRAMES = 10
53
 
54
+ # ู„ูˆ ุญุงุจ ุชู„ุบูŠ ุงู„ู‚ุต ุงู„ู…ุฑูƒุฒูŠ (ุฒูˆู…)ุŒ ุฎู„ูŠู‡ False
55
+ CENTER_CROP = True
56
+
57
  arabic_map = {
58
  "aleff": "ุง",
59
  "bb": "ุจ",
 
89
  "la": "ู„ุง",
90
  }
91
 
 
92
  yolo_model = None
93
  DEVICE = "cpu"
94
 
95
 
96
  def get_model():
97
  """
98
+ ูŠุญู…ู‘ู„ YOLO ู…ุฑุฉ ูˆุงุญุฏุฉุŒ ูˆูŠุญุงูˆู„ ูŠุณุชุฎุฏู… CUDA ู„ูˆ ู…ุชูˆูุฑ.
 
99
  """
100
  global yolo_model, DEVICE
101
 
 
104
  yolo_model = YOLO(WEIGHTS_PATH)
105
  print("๐Ÿ“š Classes:", yolo_model.names)
106
 
 
107
  if torch.cuda.is_available():
108
  if DEVICE != "cuda":
109
  DEVICE = "cuda"
 
113
  except Exception as e:
114
  print("โš ๏ธ ุชุนุฐุฑ ู†ู‚ู„ ุงู„ู…ูˆุฏูŠู„ ุฅู„ู‰ cuda:", e)
115
  else:
116
+ if DEVICE != "cpu":
117
+ print("โš ๏ธ CUDA ุบูŠุฑ ู…ุชูˆูุฑุŒ ุณูŠุชู… ุงุณุชุฎุฏุงู… CPU.")
118
  DEVICE = "cpu"
 
119
 
120
  return yolo_model
121
 
122
 
123
+ # =============================
124
+ # ุฏุงู„ุฉ ู…ุณุงุนุฏุฉ: ุชูƒุจูŠุฑ + ู‚ุต ู…ู† ุงู„ูˆุณุท 640x640
125
+ # =============================
126
+
127
+ def resize_and_center_crop(frame, target: int = 640):
128
+ """
129
+ - ู†ูƒุจุฑ/ู†ุตุบุฑ ุจุญูŠุซ ุฃู‚ุตุฑ ุถู„ุน = target
130
+ - ุซู… ู†ู‚ุต ู…ุฑุจุน 640x640 ู…ู† ุงู„ูˆุณุท (Zoom ู„ุทูŠู ุนู„ู‰ ุงู„ู…ุฑูƒุฒ)
131
+ """
132
+ h, w = frame.shape[:2]
133
+ short_side = min(w, h)
134
+ if short_side <= 0:
135
+ return frame
136
+
137
+ scale = target / short_side
138
+ new_w = int(w * scale)
139
+ new_h = int(h * scale)
140
+
141
+ frame = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
142
+
143
+ h, w = frame.shape[:2]
144
+ x1 = max(0, (w - target) // 2)
145
+ y1 = max(0, (h - target) // 2)
146
+ x2 = x1 + target
147
+ y2 = y1 + target
148
+
149
+ x2 = min(x2, w)
150
+ y2 = min(y2, h)
151
+
152
+ crop = frame[y1:y2, x1:x2]
153
+
154
+ # ู„ูˆ ู„ุณุจุจ ู…ุง ุงู„ู‚ุต ุฃุตุบุฑ ู…ู† 640x640ุŒ ู†ุฑุฌุน ู†ุถุจุทู‡
155
+ ch, cw = crop.shape[:2]
156
+ if ch != target or cw != target:
157
+ crop = cv2.resize(crop, (target, target), interpolation=cv2.INTER_AREA)
158
+
159
+ return crop
160
+
161
+
162
  # =============================
163
  # ุถุบุท ุงู„ููŠุฏูŠูˆ ู‚ุจู„ ุงู„ู…ุนุงู„ุฌุฉ
164
  # =============================
165
 
166
+ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps: int = 8) -> str:
167
  """
168
+ ู†ุถุจุท ุงู„ููŠุฏูŠูˆ ุจุญูŠุซ:
169
+ - ุฃู‚ุตุฑ ุถู„ุน โ‰ˆ target_short_side
170
+ - ู…ุน ุฎูŠุงุฑ ู‚ุต ู…ุฑูƒุฒูŠ 640x640 (Zoom) ู„ูˆ CENTER_CROP = True
171
  """
172
  cap = cv2.VideoCapture(input_path)
173
  if not cap.isOpened():
174
  print("[preprocess] ุชุนุฐุฑ ูุชุญ ุงู„ููŠุฏูŠูˆุŒ ุณู†ุณุชุฎุฏู… ุงู„ู…ู„ู ุงู„ุฃุตู„ูŠ ูƒู…ุง ู‡ูˆ.")
175
+ return input_path
176
 
177
  orig_fps = cap.get(cv2.CAP_PROP_FPS)
178
  w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 
185
  frame_step = max(1, int(round(orig_fps / target_fps)))
186
  out_fps = orig_fps / frame_step
187
 
188
+ short_side = min(w, h)
189
+ if short_side <= 0:
190
+ scale = 1.0
191
+ else:
192
+ scale = target_short_side / short_side
193
+
194
+ new_w = int(w * scale)
195
+ new_h = int(h * scale)
196
 
197
  fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
198
  os.close(fd)
199
 
200
+ # ู„ูˆ ุจู†ู‚ุต 640x640 ู†ุฎู„ูŠ ุงู„ู€ writer ุจุฑุถูˆ 640x640
201
+ if CENTER_CROP:
202
+ out_w, out_h = IMG_SIZE, IMG_SIZE
203
+ else:
204
+ out_w, out_h = new_w, new_h
205
+
206
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
207
+ out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (out_w, out_h))
208
 
209
  frame_idx = 0
210
  while True:
 
213
  break
214
 
215
  if frame_idx % frame_step == 0:
216
+ if CENTER_CROP:
217
+ processed = resize_and_center_crop(frame, target=IMG_SIZE)
218
+ else:
219
+ processed = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
220
+
221
+ out.write(processed)
222
 
223
  frame_idx += 1
224
 
225
  cap.release()
226
  out.release()
227
+ print(f"[preprocess] orig=({w}x{h}), new=({out_w}x{out_h}), saved={tmp_path}")
228
  return tmp_path
229
 
230
 
 
233
  # =============================
234
 
235
  def detect_frame(frame_bgr):
236
+ model = get_model()
237
 
238
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
239
  result = model.predict(
 
241
  conf=CONF_THRESHOLD,
242
  imgsz=IMG_SIZE,
243
  verbose=False,
244
+ device=DEVICE,
245
  )[0]
246
 
247
  boxes = result.boxes
248
 
249
+ num_boxes = 0 if boxes is None else len(boxes)
250
+ print(f"[detect_frame] boxes={num_boxes}")
251
+
252
  if boxes is None or len(boxes) == 0:
253
  return [], frame_bgr
254
 
 
280
 
281
 
282
  # =============================
283
+ # VIDEO โ†’ RAW TEXT + OUTPUT VIDEO + DEBUG
284
  # =============================
285
 
286
  def extract_and_render(video_path: str):
287
  cap = cv2.VideoCapture(video_path)
288
  if not cap.isOpened():
289
+ return "", None, "ุชุนุฐุฑ ูุชุญ ุงู„ููŠุฏูŠูˆ ููŠ extract_and_render"
290
 
291
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
292
  out_path = "processed_output.mp4"
 
308
  last_seen = None
309
  frame_index = 0
310
 
311
+ frames_with_dets = 0
312
+ debug_lines = []
313
+
314
  while True:
315
  ret, frame = cap.read()
316
  if not ret:
 
328
  out.write(rendered)
329
 
330
  if labels:
331
+ frames_with_dets += 1
332
+ debug_lines.append(f"frame {frame_index}: {labels}")
333
+
334
  label = labels[0]
335
  last_seen = frame_index
336
 
 
361
  words.append(word)
362
 
363
  raw_text = " ".join(words).strip()
364
+
365
+ if not debug_lines:
366
+ debug_info = (
367
+ f"total_frames={frame_index}, frames_with_detections=0\n"
368
+ "ู„ู… ูŠุชู… ุฑุตุฏ ุฃูŠ ุตู†ุงุฏูŠู‚ (boxes) ู…ู† YOLO ููŠ ุฃูŠ ูุฑูŠู….\n"
369
+ "ุชุญู‚ู‚ ู…ู†:\n"
370
+ "- ุฃู† best.pt ู‡ูˆ ู…ูˆุฏูŠู„ detection ูˆุชุฏุฑูŠุจู‡ ุณู„ูŠู….\n"
371
+ "- ุฃู† ุงู„ููŠุฏูŠูˆ ู…ุดุงุจู‡ ู„ุชุฏุฑูŠุจ ุงู„ู…ูˆุฏูŠู„ ู…ู† ู†ุงุญูŠุฉ ูˆุถุนูŠุฉ ุงู„ูŠุฏ ูˆุงู„ูƒุงู…ูŠุฑุง."
372
+ )
373
+ else:
374
+ sample = "\n".join(debug_lines[:30])
375
+ debug_info = (
376
+ f"total_frames={frame_index}, frames_with_detections={frames_with_dets}\n"
377
+ "ุฃู…ุซู„ุฉ ู…ู† ุงู„ูุฑูŠู…ุงุช ุงู„ู„ูŠ ููŠู‡ุง ุญุฑูˆู:\n"
378
+ f"{sample}"
379
+ )
380
+
381
+ return raw_text, out_path, debug_info
382
 
383
 
384
  # =============================
385
  # Gradio + @spaces.GPU
386
  # =============================
387
 
388
+ @spaces.GPU
389
  def run(file):
390
  if file is None:
391
+ return "ู„ู… ูŠุชู… ุฑูุน ููŠุฏูŠูˆ", "", None, "ู„ู… ูŠุชู… ุฑูุน ููŠุฏูŠูˆ"
392
 
393
  video_path = file.name
394
 
395
+ # ุงู„ุชูƒุจูŠุฑ + center crop 640x640
396
+ light_path = preprocess_video(video_path, target_short_side=640, target_fps=8)
397
 
398
+ raw, processed_path, debug_info = extract_and_render(light_path)
399
  pretty = fix_with_gemini(raw) if raw else ""
400
 
401
  if not raw:
402
  raw = "ู„ู… ูŠุชู… ุงู„ุชุนุฑู ุนู„ู‰ ุฃูŠ ู†ุต ู…ู† ุงู„ุฅุดุงุฑุงุช."
403
 
404
+ return raw, pretty, processed_path, debug_info
405
 
406
 
407
  with gr.Blocks() as demo:
408
+ gr.Markdown("## ๐ŸคŸ ASL โ†’ Arabic (YOLO + Gemini) โ€” ู…ุน ุชูƒุจูŠุฑ ุฃูุถู„ ู„ู„ููŠุฏูŠูˆู‡ุงุช ุงู„ุนุฑุถูŠุฉ")
409
 
410
  inp = gr.File(label="ุงุฑูุน ููŠุฏูŠูˆ ุงู„ุฅุดุงุฑุฉ")
411
  raw = gr.Textbox(label="ุงู„ู†ุต ุงู„ุฎุงู…", lines=3)
412
  pretty = gr.Textbox(label="ุงู„ู†ุต ุงู„ู…ุญุณู† (Gemini)", lines=3)
413
  video_out = gr.Video(label="ุงู„ููŠุฏูŠูˆ ุจุนุฏ ุงู„ุจุฑูˆุณูŠุณ")
414
+ debug_box = gr.Textbox(label="Debug info", lines=10)
415
 
416
  btn = gr.Button("ุงุจุฏุฃ ุงู„ู…ุนุงู„ุฌุฉ")
417
 
418
+ btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out, debug_box])
419
 
420
  if __name__ == "__main__":
421
  demo.launch(server_name="0.0.0.0", server_port=7860)