Spaces:

Mr-HASSAN
/

testing

Paused

App Files Files Community

Mr-HASSAN commited on Nov 24, 2025

Commit

ed83cdc

verified ·

1 Parent(s): ed4b382

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -43

app.py CHANGED Viewed

@@ -10,17 +10,27 @@ import arabic_reshaper
 from bidi.algorithm import get_display
 import google.generativeai as genai
-import torch  # 👈 مهم للـ GPU
 # ==========================
 # ⚠️ هنا تحط مفتاحك مباشرة
 # ==========================
-GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"  # 👈 استبدله بمفتاحك
-# اضبط الـ API
 genai.configure(api_key=GEMINI_API_KEY)
 # ==========================
 # إعدادات YOLO + الثوابت
 # ==========================
@@ -29,17 +39,10 @@ WEIGHTS_PATH = "best.pt"
 IMG_SIZE = 256
 CONF_THRESHOLD = 0.5
-# عدد الإطارات المطلوبة عشان نثبت الحرف
 MIN_STABLE_FRAMES = 3
-# بعد كم ثانية نحذر المستخدم إن الكلمة بتنتهي
 WARN_BEFORE_RESET = 1.5
-# بعد كم ثانية نعتبر الكلمة انتهت ونرسلها لـ Gemini
 RESET_DELAY = 2.5
-# اختيار الجهاز: GPU لو موجود، غيره CPU
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print("🔥 Using device:", DEVICE)
 arabic_map = {
     "aleff": "ا", "bb": "ب", "ta": "ت", "thaa": "ث", "jeem": "ج",
     "haa": "ح", "khaa": "خ", "dal": "د", "thal": "ذ", "ra": "ر",
@@ -61,39 +64,42 @@ SYSTEM_PROMPT = (
 )
 # ==========================
-# دوال رسم العربي
 # ==========================
-def draw_arabic_text(
-    img,
-    text,
-    x,
-    y,
-    font_path="NotoNaskhArabic-VariableFont_wght.ttf",
-    font_size=24,
-):
-    reshaped = arabic_reshaper.reshape(text)
-    bidi_text = get_display(reshaped)
-    img_pil = Image.fromarray(img)
-    draw = ImageDraw.Draw(img_pil)
-    try:
-        font = ImageFont.truetype(font_path, font_size)
-    except Exception:
-        font = ImageFont.load_default()
-    draw.text((x, y), bidi_text, font=font, fill=(0, 0, 0))
-    return np.array(img_pil)
 def draw_detections(result, frame, names):
     boxes = result.boxes
     detected_labels = []
     if boxes is None or len(boxes) == 0:
         return frame, detected_labels
     for box in boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0])
         cls_id = int(box.cls[0])
@@ -106,6 +112,7 @@ def draw_detections(result, frame, names):
         ar_label = arabic_map.get(eng_label, eng_label)
         detected_labels.append(ar_label)
         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
         label_bg_y1 = max(0, y1 - 35)
@@ -118,18 +125,40 @@ def draw_detections(result, frame, names):
             -1,
         )
-        frame = draw_arabic_text(frame, ar_label, x1 + 5, label_bg_y1 + 5)
-    return frame, detected_labels
 # ==========================
-# تحميل YOLO
 # ==========================
 print("🔹 Loading YOLO model...")
 model = YOLO(WEIGHTS_PATH)
-model.to(DEVICE)  # 👈 نحمل الموديل على الـ GPU لو موجود
 print("📚 Classes:", model.names)
@@ -142,7 +171,7 @@ def call_gemini_on_word(word: str) -> str:
         return ""
     try:
-        # نفس الموديل اللي كنت تستخدمه
         model_g = genai.GenerativeModel("gemini-1.5-flash")
         prompt = (
@@ -172,18 +201,19 @@ def process_frame(
     if chat_history is None:
         chat_history = []
-    # نحول من RGB (gradio) إلى BGR (opencv)
     frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-    # نعكس الصورة أفقياً (كأنها مراية)
     frame_bgr = cv2.flip(frame_bgr, 1)
-    # نستخدم YOLO مع نفس الجهاز (GPU/CPU)
     results = model.predict(
         frame_bgr,
         conf=CONF_THRESHOLD,
         imgsz=IMG_SIZE,
         verbose=False,
-        device=DEVICE,  # 👈 هنا
     )[0]
     annotated, labels = draw_detections(results, frame_bgr, model.names)
@@ -207,19 +237,15 @@ def process_frame(
         elapsed = time.time() - last_letter_time
         if elapsed > RESET_DELAY:
-            # نعتبر الكلمة انتهت
             final_text = current_word
-            # نضيفها للشات كرسالة من المريض
             chat_history.append(["🖐️ من الإشارات", final_text])
-            # نرسلها لـ Gemini
             gpt_reply = call_gemini_on_word(final_text)
             if gpt_reply:
                 chat_history.append(["🤖 المساعد", gpt_reply])
-            # نرجع نفضّي الحالة
             current_word = ""
             last_label = None
             stable_count = 0
@@ -293,4 +319,6 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
-    demo.launch()

 from bidi.algorithm import get_display
 import google.generativeai as genai
+import torch
 # ==========================
 # ⚠️ هنا تحط مفتاحك مباشرة
 # ==========================
+GEMINI_API_KEY = "YOUR_GEMINI_API_KEY_HERE"  # 👈 استبدله بمفتاحك
 genai.configure(api_key=GEMINI_API_KEY)
+# ==========================
+# إعدادات أداء PyTorch / GPU
+# ==========================
+# تفعيل تسريع CUDNN
+torch.backends.cudnn.benchmark = True
+DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
+USE_HALF = DEVICE.startswith("cuda")
+print("🔥 Using device:", DEVICE)
 # ==========================
 # إعدادات YOLO + الثوابت
 # ==========================
 IMG_SIZE = 256
 CONF_THRESHOLD = 0.5
 MIN_STABLE_FRAMES = 3
 WARN_BEFORE_RESET = 1.5
 RESET_DELAY = 2.5
 arabic_map = {
     "aleff": "ا", "bb": "ب", "ta": "ت", "thaa": "ث", "jeem": "ج",
     "haa": "ح", "khaa": "خ", "dal": "د", "thal": "ذ", "ra": "ر",
 )
 # ==========================
+# إعداد خط عربي مرة وحدة (بدون تكرار)
 # ==========================
+DEFAULT_FONT_SIZE = 24
+DEFAULT_FONT_PATH = "NotoNaskhArabic-VariableFont_wght.ttf"
+try:
+    FONT_AR = ImageFont.truetype(DEFAULT_FONT_PATH, DEFAULT_FONT_SIZE)
+except Exception:
+    FONT_AR = ImageFont.load_default()
+def prepare_arabic(text: str) -> str:
+    """تجهيز النص العربي (reshaper + bidi) بدون رسم."""
+    reshaped = arabic_reshaper.reshape(text)
+    bidi_text = get_display(reshaped)
+    return bidi_text
+# ==========================
+# رسم الديتكشن بشكل أسرع
+# ==========================
 def draw_detections(result, frame, names):
+    """
+    نرسم البوكسات بـ OpenCV،
+    وبعدين نحول الصورة إلى PIL مرة واحدة ونرسم كل النصوص.
+    """
     boxes = result.boxes
     detected_labels = []
     if boxes is None or len(boxes) == 0:
         return frame, detected_labels
+    label_infos = []
     for box in boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0])
         cls_id = int(box.cls[0])
         ar_label = arabic_map.get(eng_label, eng_label)
         detected_labels.append(ar_label)
+        # رسم البوكس
         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
         label_bg_y1 = max(0, y1 - 35)
             -1,
         )
+        label_infos.append(
+            (
+                prepare_arabic(ar_label),  # نص عربي جاهز للرسم
+                x1 + 5,
+                label_bg_y1 + 5,
+            )
+        )
+    # نحول لمرة وحدة إلى PIL ونرسم كل النصوص
+    img_pil = Image.fromarray(frame)
+    draw = ImageDraw.Draw(img_pil)
+    for bidi_text, tx, ty in label_infos:
+        draw.text((tx, ty), bidi_text, font=FONT_AR, fill=(0, 0, 0))
+    return np.array(img_pil), detected_labels
 # ==========================
+# تحميل YOLO على GPU + half
 # ==========================
 print("🔹 Loading YOLO model...")
 model = YOLO(WEIGHTS_PATH)
+model.to(DEVICE)
+# تفعيل half precision لو معنا GPU
+if USE_HALF:
+    try:
+        model.model.half()
+        print("⚡ Using half precision for YOLO on GPU")
+    except Exception as e:
+        print("⚠️ Could not enable half precision:", e)
 print("📚 Classes:", model.names)
         return ""
     try:
+        # flash سريع أصلا، نخليه كما هو
         model_g = genai.GenerativeModel("gemini-1.5-flash")
         prompt = (
     if chat_history is None:
         chat_history = []
+    # Gradio يعطينا RGB، نحوله BGR لـ OpenCV
     frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    # نعكس الصورة أفقياً
     frame_bgr = cv2.flip(frame_bgr, 1)
+    # YOLO على GPU + نصف دقة لو متوفر
     results = model.predict(
         frame_bgr,
         conf=CONF_THRESHOLD,
         imgsz=IMG_SIZE,
         verbose=False,
+        device=DEVICE,
+        half=USE_HALF,
     )[0]
     annotated, labels = draw_detections(results, frame_bgr, model.names)
         elapsed = time.time() - last_letter_time
         if elapsed > RESET_DELAY:
             final_text = current_word
             chat_history.append(["🖐️ من الإشارات", final_text])
             gpt_reply = call_gemini_on_word(final_text)
             if gpt_reply:
                 chat_history.append(["🤖 المساعد", gpt_reply])
             current_word = ""
             last_label = None
             stable_count = 0
 if __name__ == "__main__":
+    # لو أنت على Hugging Face Spaces وطلع لك موضوع الـ hot-reload،
+    # نقدر نضيف باتش بسيط هنا، بس حالياً نخليها عادية:
+    demo.launch(server_name="0.0.0.0", server_port=7860)