Spaces:

GannaEslam38
/

whisper_code-switching

Sleeping

App Files Files Community

GannaEslam38 commited on Nov 30, 2025

Commit

301c4de

verified ·

1 Parent(s): e629d9f

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -17

app.py CHANGED Viewed

@@ -7,28 +7,34 @@ import os
 MODEL_ID = "MohamedRashad/Arabic-Whisper-CodeSwitching-Edition"
 # تحديد الجهاز المستخدم (GPU أو CPU)
-# يفضل استخدام 0 للجهاز الأول (GPU) إذا كان متاحاً، وإلا نستخدم "cpu"
 device = 0 if torch.cuda.is_available() else "cpu"
 print(f"Device set to use: {device}")
-# 2. تحميل النموذج عبر الـ Pipeline
 try:
-    print("Loading ASR pipeline (Whisper)...")
-    # تحميل النموذج باستخدام pipeline
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
         model=MODEL_ID,
         device=device,
     )
-    print("Pipeline loaded successfully.")
 except Exception as e:
-    # هذا الجزء يمكن حذفه إذا كنت واثقاً من تحميل الـ pipeline
-    print(f"Error loading pipeline: {e}. Falling back to error message.")
     asr_pipeline = None
-# 3. دالة النسخ الصوتي (مع معالجة التقسيم للملفات الطويلة)
 def transcribe_audio(audio_path):
     """تحول ملف الصوت إلى نص باستخدام نموذج Whisper ASR، مع تقسيم للملفات الطويلة."""
     if asr_pipeline is None:
@@ -40,18 +46,14 @@ def transcribe_audio(audio_path):
     print(f"Processing audio file: {audio_path}")
     try:
-        # استخدام التقسيم (Chunking) لمعالجة الملفات الطويلة (أفضل دقة واستقرار)
-        # تقسيم الصوت إلى مقاطع 30 ثانية، مع تداخل 5 ثواني لضمان عدم فقدان السياق
         result = asr_pipeline(
             audio_path,
-            chunk_length_s=30,      # طول المقطع الأقصى (30 ثانية)
-            stride=(5, 5),          # تداخل 5 ثواني بين المقاطع
-            return_timestamps=True  # هذا ضروري لتشغيل وضع التقسيم
         )
-        # النتيجة تحتوي على قاموس يضم key اسمه 'chunks' أو 'text'
-        # نجمع النصوص من جميع المقاطع لإنتاج النص النهائي
         if 'chunks' in result and isinstance(result['chunks'], list):
             segments_text = [seg['text'] for seg in result['chunks']]
             final_text = " ".join(segments_text).strip()
@@ -63,7 +65,6 @@ def transcribe_audio(audio_path):
         return final_text
     except Exception as e:
-        # قد يحدث هذا الخطأ إذا كان الملف غير صالح أو واجه النموذج مشكلة في الذاكرة
         return f"حدث خطأ أثناء معالجة الصوت: {e}"

 MODEL_ID = "MohamedRashad/Arabic-Whisper-CodeSwitching-Edition"
 # تحديد الجهاز المستخدم (GPU أو CPU)
 device = 0 if torch.cuda.is_available() else "cpu"
 print(f"Device set to use: {device}")
+# 2. تحميل النموذج عبر الـ Pipeline (مع تحديد وسائط التقسيم عند التحميل)
 try:
+    print("Loading ASR pipeline (Whisper) with chunking parameters...")
+    # تحديد وسائط التقسيم هنا لتجنب تمريرها كـ model_kwargs غير مستخدمة
+    pipeline_kwargs = {
+        "chunk_length_s": 30,  # طول المقطع الأقصى (30 ثانية)
+        "stride": (5, 5),      # تداخل 5 ثواني بين المقاطع
+    }
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
         model=MODEL_ID,
         device=device,
+        **pipeline_kwargs # تمرير وسائط التقسيم هنا
     )
+    print("Pipeline loaded successfully with chunking enabled.")
 except Exception as e:
+    print(f"Error loading pipeline: {e}.")
+    # إذا فشل التحميل، نستخدم pipeline فارغ لمنع تعطل التطبيق
     asr_pipeline = None
+# 3. دالة النسخ الصوتي
 def transcribe_audio(audio_path):
     """تحول ملف الصوت إلى نص باستخدام نموذج Whisper ASR، مع تقسيم للملفات الطويلة."""
     if asr_pipeline is None:
     print(f"Processing audio file: {audio_path}")
     try:
+        # 💡 التعديل هنا: نزيل 'chunk_length_s' و 'stride' ونبقي فقط 'return_timestamps'
+        # وسائط التقسيم تم تحديدها بالفعل في خطوة التحميل (Initialisation)
         result = asr_pipeline(
             audio_path,
+            return_timestamps=True  # ضروري لعملية التقسيم
         )
+        # استخراج النص من النتيجة (النتيجة تحتوي على 'chunks' عند استخدام return_timestamps=True)
         if 'chunks' in result and isinstance(result['chunks'], list):
             segments_text = [seg['text'] for seg in result['chunks']]
             final_text = " ".join(segments_text).strip()
         return final_text
     except Exception as e:
         return f"حدث خطأ أثناء معالجة الصوت: {e}"