Spaces:

Mr-HASSAN
/

testing

Paused

App Files Files Community

Mr-HASSAN commited on Nov 25, 2025

Commit

649533c

verified ·

1 Parent(s): 6482c9c

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -60

app.py CHANGED Viewed

@@ -5,14 +5,22 @@ import google.generativeai as genai
 from ultralytics import YOLO
 import tempfile
 import torch
-import spaces  # مهم لـ @spaces.GP
 # =============================
-# إعداد مفتاح Gemini (حط المفتاح هنا)
 # =============================
 GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
 genai.configure(api_key=GEMINI_API_KEY)
 SYSTEM_PROMPT = (
@@ -25,7 +33,6 @@ SYSTEM_PROMPT = (
     "أعد النص فقط بدون شرح."
 )
 def fix_with_gemini(raw_text: str) -> str:
     if not raw_text:
         return ""
@@ -39,19 +46,17 @@ def fix_with_gemini(raw_text: str) -> str:
 # =============================
-# إعدادات YOLO + متغيرات عامة
 # =============================
 WEIGHTS_PATH = "best.pt"
-IMG_SIZE = 640          # حجم الإدخال لـ YOLO
-CONF_THRESHOLD = 0.15   # مخفض عشان نلتقط أكثر
 MIN_STABLE_FRAMES = 1
 FRAME_SKIP = 1
 MAX_FRAMES = 1000
 WORD_GAP_FRAMES = 10
-# لو حاب تلغي القص المركزي (زوم)، خليه False
 CENTER_CROP = True
 arabic_map = {
@@ -92,11 +97,7 @@ arabic_map = {
 yolo_model = None
 DEVICE = "cpu"
 def get_model():
-    """
-    يحمّل YOLO مرة واحدة، ويحاول يستخدم CUDA لو متوفر.
-    """
     global yolo_model, DEVICE
     if yolo_model is None:
@@ -121,14 +122,33 @@ def get_model():
 # =============================
-# دالة مساعدة: تكبير + قص من الوسط 640x640
 # =============================
 def resize_and_center_crop(frame, target: int = 640):
-    """
-    - نكبر/نصغر بحيث أقصر ضلع = target
-    - ثم نقص مربع 640x640 من الوسط (Zoom لطيف على المركز)
-    """
     h, w = frame.shape[:2]
     short_side = min(w, h)
     if short_side <= 0:
@@ -143,15 +163,11 @@ def resize_and_center_crop(frame, target: int = 640):
     h, w = frame.shape[:2]
     x1 = max(0, (w - target) // 2)
     y1 = max(0, (h - target) // 2)
-    x2 = x1 + target
-    y2 = y1 + target
-    x2 = min(x2, w)
-    y2 = min(y2, h)
     crop = frame[y1:y2, x1:x2]
-    # لو لسبب ما القص أصغر من 640x640، نرجع نضبطه
     ch, cw = crop.shape[:2]
     if ch != target or cw != target:
         crop = cv2.resize(crop, (target, target), interpolation=cv2.INTER_AREA)
@@ -160,15 +176,9 @@ def resize_and_center_crop(frame, target: int = 640):
 # =============================
-# ضغط الفيديو قبل المعالجة
 # =============================
 def preprocess_video(input_path: str, target_short_side: int = 640, target_fps: int = 8) -> str:
-    """
-    نضبط الفيديو بحيث:
-    - أقصر ضلع ≈ target_short_side
-    - مع خيار قص مركزي 640x640 (Zoom) لو CENTER_CROP = True
-    """
     cap = cv2.VideoCapture(input_path)
     if not cap.isOpened():
         print("[preprocess] تعذر فتح الفيديو، سنستخدم الملف الأصلي كما هو.")
@@ -186,22 +196,14 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
         out_fps = orig_fps / frame_step
     short_side = min(w, h)
-    if short_side <= 0:
-        scale = 1.0
-    else:
-        scale = target_short_side / short_side
     new_w = int(w * scale)
     new_h = int(h * scale)
     fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
     os.close(fd)
-    # لو بنقص 640x640 نخل�� الـ writer برضو 640x640
-    if CENTER_CROP:
-        out_w, out_h = IMG_SIZE, IMG_SIZE
-    else:
-        out_w, out_h = new_w, new_h
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (out_w, out_h))
@@ -217,7 +219,6 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
                 processed = resize_and_center_crop(frame, target=IMG_SIZE)
             else:
                 processed = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
             out.write(processed)
         frame_idx += 1
@@ -231,7 +232,6 @@ def preprocess_video(input_path: str, target_short_side: int = 640, target_fps:
 # =============================
 # معالجة فريم واحد
 # =============================
 def detect_frame(frame_bgr):
     model = get_model()
@@ -245,7 +245,6 @@ def detect_frame(frame_bgr):
     )[0]
     boxes = result.boxes
     num_boxes = 0 if boxes is None else len(boxes)
     print(f"[detect_frame] boxes={num_boxes}")
@@ -266,15 +265,7 @@ def detect_frame(frame_bgr):
         labels.append(letter)
         cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)
-        cv2.putText(
-            frame_bgr,
-            letter,
-            (x1, y1 - 10),
-            cv2.FONT_HERSHEY_SIMPLEX,
-            0.7,
-            (0, 255, 0),
-            2,
-        )
     return labels, frame_bgr
@@ -282,7 +273,6 @@ def detect_frame(frame_bgr):
 # =============================
 # VIDEO → RAW TEXT + OUTPUT VIDEO + DEBUG
 # =============================
 def extract_and_render(video_path: str):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
@@ -294,7 +284,6 @@ def extract_and_render(video_path: str):
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     if fps <= 0:
         fps = 8.0
@@ -384,15 +373,12 @@ def extract_and_render(video_path: str):
 # =============================
 # Gradio + @spaces.GPU
 # =============================
 @spaces.GPU
 def run(file):
     if file is None:
         return "لم يتم رفع فيديو", "", None, "لم يتم رفع فيديو"
     video_path = file.name
-    # التكبير + center crop 640x640
     light_path = preprocess_video(video_path, target_short_side=640, target_fps=8)
     raw, processed_path, debug_info = extract_and_render(light_path)
@@ -405,7 +391,7 @@ def run(file):
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤟 ASL → Arabic (YOLO + Gemini) — مع تكبير أفضل للفيديوهات العرضية")
     inp = gr.File(label="ارفع فيديو الإشارة")
     raw = gr.Textbox(label="النص الخام", lines=3)
@@ -414,8 +400,8 @@ with gr.Blocks() as demo:
     debug_box = gr.Textbox(label="Debug info", lines=10)
     btn = gr.Button("ابدأ المعالجة")
     btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out, debug_box])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 from ultralytics import YOLO
 import tempfile
 import torch
+import spaces
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+import arabic_reshaper
+from bidi.algorithm import get_display
 # =============================
+# Gemini API Key
 # =============================
+# ⚠️ الصق مفتاحك محليًا هنا داخل ملفك (لا تنشره بمستودع عام)
 GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
+# خيار "أقل خطورة": لو موجود Secrets/Env استخدمه بدل المكتوب
+# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or "PASTE_YOUR_GEMINI_KEY_HERE"
 genai.configure(api_key=GEMINI_API_KEY)
 SYSTEM_PROMPT = (
     "أعد النص فقط بدون شرح."
 )
 def fix_with_gemini(raw_text: str) -> str:
     if not raw_text:
         return ""
 # =============================
+# إعدادات YOLO
 # =============================
 WEIGHTS_PATH = "best.pt"
+IMG_SIZE = 640
+CONF_THRESHOLD = 0.15
 MIN_STABLE_FRAMES = 1
 FRAME_SKIP = 1
 MAX_FRAMES = 1000
 WORD_GAP_FRAMES = 10
 CENTER_CROP = True
 arabic_map = {
 yolo_model = None
 DEVICE = "cpu"
 def get_model():
     global yolo_model, DEVICE
     if yolo_model is None:
 # =============================
+# إصلاح ????: رسم عربي على الفيديو via PIL
 # =============================
+FONT_PATH = os.path.join(os.path.dirname(__file__), "NotoNaskhArabic-VariableFont_wght.ttf")
+def draw_arabic_text(frame_bgr, text, x, y, font_size=36, bgr_color=(0, 255, 0)):
+    img = Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype(FONT_PATH, font_size)
+    except Exception as e:
+        print("⚠️ خطأ تحميل الخط العربي:", e)
+        font = ImageFont.load_default()
+    shaped = arabic_reshaper.reshape(text)
+    rtl_text = get_display(shaped)
+    rgb_color = (bgr_color[2], bgr_color[1], bgr_color[0])
+    draw.text((x, y), rtl_text, font=font, fill=rgb_color)
+    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+# =============================
+# تكبير + قص من الوسط 640x640
+# =============================
 def resize_and_center_crop(frame, target: int = 640):
     h, w = frame.shape[:2]
     short_side = min(w, h)
     if short_side <= 0:
     h, w = frame.shape[:2]
     x1 = max(0, (w - target) // 2)
     y1 = max(0, (h - target) // 2)
+    x2 = min(x1 + target, w)
+    y2 = min(y1 + target, h)
     crop = frame[y1:y2, x1:x2]
     ch, cw = crop.shape[:2]
     if ch != target or cw != target:
         crop = cv2.resize(crop, (target, target), interpolation=cv2.INTER_AREA)
 # =============================
+# تجهيز الفيديو قبل المعالجة
 # =============================
 def preprocess_video(input_path: str, target_short_side: int = 640, target_fps: int = 8) -> str:
     cap = cv2.VideoCapture(input_path)
     if not cap.isOpened():
         print("[preprocess] تعذر فتح الفيديو، سنستخدم الملف الأصلي كما هو.")
         out_fps = orig_fps / frame_step
     short_side = min(w, h)
+    scale = 1.0 if short_side <= 0 else (target_short_side / short_side)
     new_w = int(w * scale)
     new_h = int(h * scale)
     fd, tmp_path = tempfile.mkstemp(suffix=".mp4")
     os.close(fd)
+    out_w, out_h = (IMG_SIZE, IMG_SIZE) if CENTER_CROP else (new_w, new_h)
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
     out = cv2.VideoWriter(tmp_path, fourcc, out_fps, (out_w, out_h))
                 processed = resize_and_center_crop(frame, target=IMG_SIZE)
             else:
                 processed = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
             out.write(processed)
         frame_idx += 1
 # =============================
 # معالجة فريم واحد
 # =============================
 def detect_frame(frame_bgr):
     model = get_model()
     )[0]
     boxes = result.boxes
     num_boxes = 0 if boxes is None else len(boxes)
     print(f"[detect_frame] boxes={num_boxes}")
         labels.append(letter)
         cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        frame_bgr = draw_arabic_text(frame_bgr, letter, x1, max(0, y1 - 45), font_size=36)
     return labels, frame_bgr
 # =============================
 # VIDEO → RAW TEXT + OUTPUT VIDEO + DEBUG
 # =============================
 def extract_and_render(video_path: str):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     if fps <= 0:
         fps = 8.0
 # =============================
 # Gradio + @spaces.GPU
 # =============================
 @spaces.GPU
 def run(file):
     if file is None:
         return "لم يتم رفع فيديو", "", None, "لم يتم رفع فيديو"
     video_path = file.name
     light_path = preprocess_video(video_path, target_short_side=640, target_fps=8)
     raw, processed_path, debug_info = extract_and_render(light_path)
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤟 ASL → Arabic (YOLO + Gemini) — إصلاح ظهور الحروف العربية داخل الفيديو")
     inp = gr.File(label="ارفع فيديو الإشارة")
     raw = gr.Textbox(label="النص الخام", lines=3)
     debug_box = gr.Textbox(label="Debug info", lines=10)
     btn = gr.Button("ابدأ المعالجة")
     btn.click(run, inputs=[inp], outputs=[raw, pretty, video_out, debug_box])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)