Mr-HASSAN commited on
Commit
649533c
·
verified ·
1 Parent(s): 6482c9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -60
app.py CHANGED
@@ -5,14 +5,22 @@ import google.generativeai as genai
5
  from ultralytics import YOLO
6
  import tempfile
7
  import torch
8
- import spaces # مهم لـ @spaces.GP
 
 
 
 
 
9
 
10
  # =============================
11
- # إعداد مفتاح Gemini (حط المفتاح هنا)
12
  # =============================
13
-
14
  GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
15
 
 
 
 
16
  genai.configure(api_key=GEMINI_API_KEY)
17
 
18
  SYSTEM_PROMPT = (
@@ -25,7 +33,6 @@ SYSTEM_PROMPT = (
25
  "أعد النص فقط بدون شرح."
26
  )
27
 
28
-
29
  def fix_with_gemini(raw_text: str) -> str:
30
  if not raw_text:
31
  return ""
@@ -39,19 +46,17 @@ def fix_with_gemini(raw_text: str) -> str:
39
 
40
 
41
  # =============================
42
- # إعدادات YOLO + متغيرات عامة
43
  # =============================
44
-
45
  WEIGHTS_PATH = "best.pt"
46
- IMG_SIZE = 640 # حجم الإدخال لـ YOLO
47
- CONF_THRESHOLD = 0.15 # مخفض عشان نلتقط أكثر
48
 
49
  MIN_STABLE_FRAMES = 1
50
  FRAME_SKIP = 1
51
  MAX_FRAMES = 1000
52
  WORD_GAP_FRAMES = 10
53
 
54
- # لو حاب تلغي القص المركزي (زوم)، خليه False
55
  CENTER_CROP = True
56
 
57
  arabic_map = {
@@ -92,11 +97,7 @@ arabic_map = {
92
  yolo_model = None
93
  DEVICE = "cpu"
94
 
95
-
96
  def get_model():
97
- """
98
- يحمّل YOLO مرة واحدة، ويحاول يستخدم CUDA لو متوفر.
99
- """
100
  global yolo_model, DEVICE
101
 
102
  if yolo_model is None:
@@ -121,14 +122,33 @@ def get_model():
121
 
122
 
123
  # =============================
124
- # دالة مساعدة: تكبير + قص من الوسط 640x640
125
  # =============================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
 
 
 
 
 
 
127
  def resize_and_center_crop(frame, target: int = 640):
128
- """
129
- - نكبر/نصغر بحيث أقصر ضلع = target
130
- - ثم نقص مربع 640x640 من الوسط (Zoom لطيف على المركز)
131
- """
132
  h, w = frame.shape[:2]
133
  short_side = min(w, h)
134
  if short_side <= 0:
@@ -143,15 +163,11 @@ def resize_and_center_crop(frame, target: int = 640):
143
  h, w = frame.shape[:2]
144
  x1 = max(0, (w - target) // 2)
145
  y1 = max(0, (h - target) // 2)
146
- x2 = x1 + target
147
- y2 = y1 + target
148
-
149
- x2 = min(x2, w)
150
- y2 = min(y2, h)
151
 
152
  crop = frame[y1:y2, x1:x2]
153
 
154
- # لو لسبب ما القص أصغر من 640x640، نرجع نضبطه
155
  ch, cw = crop.shape[:2]
156
  if ch != target or cw != target:
157
  crop = cv2.resize(crop, (target, target), interpolation=cv2.INTER_AREA)
@@ -160,15 +176,9 @@ def resize_and_center_crop(frame, target: int = 640):
160
 
161
 
162
  # =============================
163
- # ضغط الفيديو قبل المعالجة
164
  # =============================
165
-
166
  def preprocess_video(input_path: str, target_short_side: int = 640, target_fps: int = 8) -> str:
167
- """
168
- نضبط الفيديو بحيث:
169
- - أقصر ضلع ≈ target_short_side
170
- - مع خيار قص مركزي 640x640 (Zoom) لو CENTER_CROP = True
171
- """
172
  cap = cv2.VideoCapture(input_path)
173
  if not cap.isOpened():
174
  print("[preprocess] تعذر فتح الفيديو، سنستخدم الملف الأصلي كما هو.")
@@ -186,22 +196,14 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
186
  out_fps = orig_fps / frame_step
187
 
188
  short_side = min(w, h)
189
- if short_side <= 0:
190
- scale = 1.0
191
- else:
192
- scale = target_short_side / short_side
193
-
194
  new_w = int(w * scale)
195
  new_h = int(h * scale)
196
 
197
  fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
198
  os.close(fd)
199
 
200
- # لو بنقص 640x640 نخل�� الـ writer برضو 640x640
201
- if CENTER_CROP:
202
- out_w, out_h = IMG_SIZE, IMG_SIZE
203
- else:
204
- out_w, out_h = new_w, new_h
205
 
206
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
207
  out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (out_w, out_h))
@@ -217,7 +219,6 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
217
  processed = resize_and_center_crop(frame, target=IMG_SIZE)
218
  else:
219
  processed = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
220
-
221
  out.write(processed)
222
 
223
  frame_idx += 1
@@ -231,7 +232,6 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
231
  # =============================
232
  # معالجة فريم واحد
233
  # =============================
234
-
235
  def detect_frame(frame_bgr):
236
  model = get_model()
237
 
@@ -245,7 +245,6 @@ def detect_frame(frame_bgr):
245
  )[0]
246
 
247
  boxes = result.boxes
248
-
249
  num_boxes = 0 if boxes is None else len(boxes)
250
  print(f"[detect_frame] boxes={num_boxes}")
251
 
@@ -266,15 +265,7 @@ def detect_frame(frame_bgr):
266
  labels.append(letter)
267
 
268
  cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)
269
- cv2.putText(
270
- frame_bgr,
271
- letter,
272
- (x1, y1 - 10),
273
- cv2.FONT_HERSHEY_SIMPLEX,
274
- 0.7,
275
- (0, 255, 0),
276
- 2,
277
- )
278
 
279
  return labels, frame_bgr
280
 
@@ -282,7 +273,6 @@ def detect_frame(frame_bgr):
282
  # =============================
283
  # VIDEO → RAW TEXT + OUTPUT VIDEO + DEBUG
284
  # =============================
285
-
286
  def extract_and_render(video_path: str):
287
  cap = cv2.VideoCapture(video_path)
288
  if not cap.isOpened():
@@ -294,7 +284,6 @@ def extract_and_render(video_path: str):
294
  fps = cap.get(cv2.CAP_PROP_FPS)
295
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
296
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
297
-
298
  if fps <= 0:
299
  fps = 8.0
300
 
@@ -384,15 +373,12 @@ def extract_and_render(video_path: str):
384
  # =============================
385
  # Gradio + @spaces.GPU
386
  # =============================
387
-
388
  @spaces.GPU
389
  def run(file):
390
  if file is None:
391
  return "لم يتم رفع فيديو", "", None, "لم يتم رفع فيديو"
392
 
393
  video_path = file.name
394
-
395
- # التكبير + center crop 640x640
396
  light_path = preprocess_video(video_path, target_short_side=640, target_fps=8)
397
 
398
  raw, processed_path, debug_info = extract_and_render(light_path)
@@ -405,7 +391,7 @@ def run(file):
405
 
406
 
407
  with gr.Blocks() as demo:
408
- gr.Markdown("## 🤟 ASL → Arabic (YOLO + Gemini) — مع تكبير أفضل للفيديوهات العرضية")
409
 
410
  inp = gr.File(label="ارفع فيديو الإشارة")
411
  raw = gr.Textbox(label="النص الخام", lines=3)
@@ -414,8 +400,8 @@ with gr.Blocks() as demo:
414
  debug_box = gr.Textbox(label="Debug info", lines=10)
415
 
416
  btn = gr.Button("ابدأ المعالجة")
417
-
418
  btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out, debug_box])
419
 
420
  if __name__ == "__main__":
421
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
5
  from ultralytics import YOLO
6
  import tempfile
7
  import torch
8
+ import spaces
9
+
10
+ import numpy as np
11
+ from PIL import Image, ImageDraw, ImageFont
12
+ import arabic_reshaper
13
+ from bidi.algorithm import get_display
14
 
15
  # =============================
16
+ # Gemini API Key
17
  # =============================
18
+ # ⚠️ الصق مفتاحك محليًا هنا داخل ملفك (لا تنشره بمستودع عام)
19
  GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
20
 
21
+ # خيار "أقل خطورة": لو موجود Secrets/Env استخدمه بدل المكتوب
22
+ # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or "PASTE_YOUR_GEMINI_KEY_HERE"
23
+
24
  genai.configure(api_key=GEMINI_API_KEY)
25
 
26
  SYSTEM_PROMPT = (
 
33
  "أعد النص فقط بدون شرح."
34
  )
35
 
 
36
  def fix_with_gemini(raw_text: str) -> str:
37
  if not raw_text:
38
  return ""
 
46
 
47
 
48
  # =============================
49
+ # إعدادات YOLO
50
  # =============================
 
51
  WEIGHTS_PATH = "best.pt"
52
+ IMG_SIZE = 640
53
+ CONF_THRESHOLD = 0.15
54
 
55
  MIN_STABLE_FRAMES = 1
56
  FRAME_SKIP = 1
57
  MAX_FRAMES = 1000
58
  WORD_GAP_FRAMES = 10
59
 
 
60
  CENTER_CROP = True
61
 
62
  arabic_map = {
 
97
  yolo_model = None
98
  DEVICE = "cpu"
99
 
 
100
  def get_model():
 
 
 
101
  global yolo_model, DEVICE
102
 
103
  if yolo_model is None:
 
122
 
123
 
124
  # =============================
125
+ # إصلاح ????: رسم عربي على الفيديو via PIL
126
  # =============================
127
+ FONT_PATH = os.path.join(os.path.dirname(__file__), "NotoNaskhArabic-VariableFont_wght.ttf")
128
+
129
+ def draw_arabic_text(frame_bgr, text, x, y, font_size=36, bgr_color=(0, 255, 0)):
130
+ img = Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
131
+ draw = ImageDraw.Draw(img)
132
+
133
+ try:
134
+ font = ImageFont.truetype(FONT_PATH, font_size)
135
+ except Exception as e:
136
+ print("⚠️ خطأ تحميل الخط العربي:", e)
137
+ font = ImageFont.load_default()
138
+
139
+ shaped = arabic_reshaper.reshape(text)
140
+ rtl_text = get_display(shaped)
141
+
142
+ rgb_color = (bgr_color[2], bgr_color[1], bgr_color[0])
143
+ draw.text((x, y), rtl_text, font=font, fill=rgb_color)
144
 
145
+ return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
146
+
147
+
148
+ # =============================
149
+ # تكبير + قص من الوسط 640x640
150
+ # =============================
151
  def resize_and_center_crop(frame, target: int = 640):
 
 
 
 
152
  h, w = frame.shape[:2]
153
  short_side = min(w, h)
154
  if short_side <= 0:
 
163
  h, w = frame.shape[:2]
164
  x1 = max(0, (w - target) // 2)
165
  y1 = max(0, (h - target) // 2)
166
+ x2 = min(x1 + target, w)
167
+ y2 = min(y1 + target, h)
 
 
 
168
 
169
  crop = frame[y1:y2, x1:x2]
170
 
 
171
  ch, cw = crop.shape[:2]
172
  if ch != target or cw != target:
173
  crop = cv2.resize(crop, (target, target), interpolation=cv2.INTER_AREA)
 
176
 
177
 
178
  # =============================
179
+ # تجهيز الفيديو قبل المعالجة
180
  # =============================
 
181
  def preprocess_video(input_path: str, target_short_side: int = 640, target_fps: int = 8) -> str:
 
 
 
 
 
182
  cap = cv2.VideoCapture(input_path)
183
  if not cap.isOpened():
184
  print("[preprocess] تعذر فتح الفيديو، سنستخدم الملف الأصلي كما هو.")
 
196
  out_fps = orig_fps / frame_step
197
 
198
  short_side = min(w, h)
199
+ scale = 1.0 if short_side <= 0 else (target_short_side / short_side)
 
 
 
 
200
  new_w = int(w * scale)
201
  new_h = int(h * scale)
202
 
203
  fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
204
  os.close(fd)
205
 
206
+ out_w, out_h = (IMG_SIZE, IMG_SIZE) if CENTER_CROP else (new_w, new_h)
 
 
 
 
207
 
208
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
209
  out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (out_w, out_h))
 
219
  processed = resize_and_center_crop(frame, target=IMG_SIZE)
220
  else:
221
  processed = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
 
222
  out.write(processed)
223
 
224
  frame_idx += 1
 
232
  # =============================
233
  # معالجة فريم واحد
234
  # =============================
 
235
  def detect_frame(frame_bgr):
236
  model = get_model()
237
 
 
245
  )[0]
246
 
247
  boxes = result.boxes
 
248
  num_boxes = 0 if boxes is None else len(boxes)
249
  print(f"[detect_frame] boxes={num_boxes}")
250
 
 
265
  labels.append(letter)
266
 
267
  cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)
268
+ frame_bgr = draw_arabic_text(frame_bgr, letter, x1, max(0, y1 - 45), font_size=36)
 
 
 
 
 
 
 
 
269
 
270
  return labels, frame_bgr
271
 
 
273
  # =============================
274
  # VIDEO → RAW TEXT + OUTPUT VIDEO + DEBUG
275
  # =============================
 
276
  def extract_and_render(video_path: str):
277
  cap = cv2.VideoCapture(video_path)
278
  if not cap.isOpened():
 
284
  fps = cap.get(cv2.CAP_PROP_FPS)
285
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
286
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
287
  if fps <= 0:
288
  fps = 8.0
289
 
 
373
  # =============================
374
  # Gradio + @spaces.GPU
375
  # =============================
 
376
  @spaces.GPU
377
  def run(file):
378
  if file is None:
379
  return "لم يتم رفع فيديو", "", None, "لم يتم رفع فيديو"
380
 
381
  video_path = file.name
 
 
382
  light_path = preprocess_video(video_path, target_short_side=640, target_fps=8)
383
 
384
  raw, processed_path, debug_info = extract_and_render(light_path)
 
391
 
392
 
393
  with gr.Blocks() as demo:
394
+ gr.Markdown("## 🤟 ASL → Arabic (YOLO + Gemini) — إصلاح ظهور الحروف العربية داخل الفيديو")
395
 
396
  inp = gr.File(label="ارفع فيديو الإشارة")
397
  raw = gr.Textbox(label="النص الخام", lines=3)
 
400
  debug_box = gr.Textbox(label="Debug info", lines=10)
401
 
402
  btn = gr.Button("ابدأ المعالجة")
 
403
  btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out, debug_box])
404
 
405
  if __name__ == "__main__":
406
  demo.launch(server_name="0.0.0.0", server_port=7860)
407
+