Spaces:

KIMOSSINO
/

TRANSCRIPTIONV4

Running

App Files Files Community

KIMOSSINO commited on Dec 11, 2024

Commit

36abef4

verified ·

1 Parent(s): 37a7a82

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -28

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import os
 import json
 import tempfile
 import requests
 import gradio as gr
 import whisper
 import torch
-from gtts import gTTS
 from pathlib import Path
 # تهيئة النماذج
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -18,26 +20,66 @@ GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini
 # قاموس للغات المدعومة
 SUPPORTED_LANGUAGES = {
-    "ar": "العربية",
-    "en": "English",
-    "fr": "Français",
-    "es": "Español"
 }
 # قاموس لأنواع الأصوات
 VOICE_TYPES = {
-    "رجل": {"speed": 0.9, "pitch": 0.8},
-    "امرأة": {"speed": 1.0, "pitch": 1.2},
-    "طفل": {"speed": 1.1, "pitch": 1.5}
 }
-def transcribe_audio(audio_file, source_lang):
-    """تحويل الصوت إلى نص باستخدام Whisper"""
     try:
-        result = whisper_model.transcribe(audio_file, language=source_lang)
         return result["text"]
     except Exception as e:
-        return f"خطأ في التحويل: {str(e)}"
 def translate_text(text, source_lang, target_lang):
     """ترجمة النص باستخدام Gemini API"""
@@ -45,7 +87,7 @@ def translate_text(text, source_lang, target_lang):
         return text
     try:
-        prompt = f"Translate the following text from {SUPPORTED_LANGUAGES[source_lang]} to {SUPPORTED_LANGUAGES[target_lang]}. Only provide the translation without any additional text or explanation:\n\n{text}"
         payload = {
             "contents": [{
@@ -73,45 +115,60 @@ def translate_text(text, source_lang, target_lang):
     except Exception as e:
         return f"خطأ في الترجمة: {str(e)}"
-def text_to_speech(text, language, voice_type):
-    """تحويل النص إلى صوت"""
     try:
-        # إنشاء مجلد مؤقت للملفات الصوتية إذا لم يكن موجوداً
         temp_dir = Path("temp_audio")
         temp_dir.mkdir(exist_ok=True)
         # إنشاء ملف صوتي مؤقت
-        temp_file = temp_dir / f"output_{voice_type}_{language}.mp3"
-        # تحويل النص إلى صوت مع تطبيق إعدادات نوع الصوت
-        voice_settings = VOICE_TYPES[voice_type]
-        tts = gTTS(text=text, lang=language, slow=False)
-        tts.save(str(temp_file))
-        return str(temp_file)
     except Exception as e:
         return f"خطأ في تحويل النص إلى صوت: {str(e)}"
 # إنشاء واجهة Gradio
 with gr.Blocks(title="معالج الصوت والترجمة", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# معالج الصوت والترجمة متعدد اللغات")
-    with gr.Tab("تحويل الصوت إلى نص"):
         with gr.Row():
-            audio_input = gr.Audio(type="filepath", label="الملف الصوتي")
             source_lang = gr.Dropdown(
                 choices=list(SUPPORTED_LANGUAGES.keys()),
                 value="ar",
-                label="لغة الملف الصوتي"
             )
         transcribe_btn = gr.Button("تحويل إلى نص")
         transcribed_text = gr.Textbox(label="النص المستخرج", lines=5)
         transcribe_btn.click(
-            fn=transcribe_audio,
-            inputs=[audio_input, source_lang],
             outputs=transcribed_text
         )
@@ -160,7 +217,7 @@ with gr.Blocks(title="معالج الصوت والترجمة", theme=gr.themes.S
         tts_btn = gr.Button("تحويل إلى صوت")
         tts_btn.click(
-            fn=text_to_speech,
             inputs=[tts_text, tts_lang, voice_type],
             outputs=tts_output
         )

 import os
 import json
+import asyncio
 import tempfile
 import requests
 import gradio as gr
 import whisper
 import torch
+import edge_tts
 from pathlib import Path
+from moviepy.editor import VideoFileClip
 # تهيئة النماذج
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # قاموس للغات المدعومة
 SUPPORTED_LANGUAGES = {
+    "ar": {"name": "العربية", "code": "ar-SA"},
+    "en": {"name": "English", "code": "en-US"},
+    "fr": {"name": "Français", "code": "fr-FR"},
+    "es": {"name": "Español", "code": "es-ES"}
 }
 # قاموس لأنواع الأصوات
 VOICE_TYPES = {
+    "رجل": {
+        "ar": "ar-SA-HamedNeural",
+        "en": "en-US-ChristopherNeural",
+        "fr": "fr-FR-HenriNeural",
+        "es": "es-ES-AlvaroNeural"
+    },
+    "امرأة": {
+        "ar": "ar-SA-ZariyahNeural",
+        "en": "en-US-JennyNeural",
+        "fr": "fr-FR-DeniseNeural",
+        "es": "es-ES-ElviraNeural"
+    },
+    "طفل": {
+        "ar": "ar-SA-ZariyahNeural",  # نستخدم صوت المرأة مع تعديل النبرة
+        "en": "en-US-JennyNeural",
+        "fr": "fr-FR-DeniseNeural",
+        "es": "es-ES-ElviraNeural"
+    }
 }
+def extract_audio_from_video(video_path):
+    """استخراج الصوت من الفيديو"""
     try:
+        video = VideoFileClip(video_path)
+        temp_audio_path = tempfile.mktemp(suffix=".mp3")
+        video.audio.write_audiofile(temp_audio_path, codec='mp3')
+        video.close()
+        return temp_audio_path
+    except Exception as e:
+        raise Exception(f"خطأ في استخراج الصوت من الفيديو: {str(e)}")
+def process_media_file(file_path, source_lang):
+    """معالجة ملف الوسائط (صوت أو فيديو)"""
+    try:
+        # التحقق من نوع الملف
+        if file_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
+            # إذا كان فيديو، استخرج الصوت منه
+            audio_path = extract_audio_from_video(file_path)
+        else:
+            # إذا كان ملف صوتي، استخدمه مباشرة
+            audio_path = file_path
+        # تحويل الصوت إلى نص
+        result = whisper_model.transcribe(audio_path, language=source_lang)
+        # حذف الملف المؤقت إذا كان فيديو
+        if file_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
+            os.remove(audio_path)
         return result["text"]
     except Exception as e:
+        return f"خطأ في معالجة الملف: {str(e)}"
 def translate_text(text, source_lang, target_lang):
     """ترجمة النص باستخدام Gemini API"""
         return text
     try:
+        prompt = f"Translate the following text from {SUPPORTED_LANGUAGES[source_lang]['name']} to {SUPPORTED_LANGUAGES[target_lang]['name']}. Only provide the translation without any additional text or explanation:\n\n{text}"
         payload = {
             "contents": [{
     except Exception as e:
         return f"خطأ في الترجمة: {str(e)}"
+async def text_to_speech(text, language, voice_type):
+    """تحويل النص إلى صوت باستخدام Edge TTS"""
     try:
+        # إنشاء مجلد مؤقت للملفات الصوتية
         temp_dir = Path("temp_audio")
         temp_dir.mkdir(exist_ok=True)
+        # اختيار الصوت المناسب
+        voice = VOICE_TYPES[voice_type][language]
+        # تعديل السرعة والنبرة حسب نوع الصوت
+        rate = "+0%" if voice_type != "طفل" else "+15%"
+        pitch = "+0Hz" if voice_type == "رجل" else "+50Hz" if voice_type == "امرأة" else "+100Hz"
         # إنشاء ملف صوتي مؤقت
+        output_file = temp_dir / f"output_{voice_type}_{language}.mp3"
+        # تكوين كائن communicate
+        communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
+        # حفظ الملف الصوتي
+        await communicate.save(str(output_file))
+        return str(output_file)
     except Exception as e:
         return f"خطأ في تحويل النص إلى صوت: {str(e)}"
+def text_to_speech_wrapper(text, language, voice_type):
+    """wrapper function لتشغيل الدالة غير المتزامنة"""
+    return asyncio.run(text_to_speech(text, language, voice_type))
 # إنشاء واجهة Gradio
 with gr.Blocks(title="معالج الصوت والترجمة", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# معالج الصوت والترجمة متعدد اللغات")
+    with gr.Tab("تحويل الوسائط إلى نص"):
         with gr.Row():
+            media_input = gr.File(
+                label="ملف صوتي أو فيديو",
+                file_types=["audio/*", "video/*"]
+            )
             source_lang = gr.Dropdown(
                 choices=list(SUPPORTED_LANGUAGES.keys()),
                 value="ar",
+                label="لغة الملف"
             )
         transcribe_btn = gr.Button("تحويل إلى نص")
         transcribed_text = gr.Textbox(label="النص المستخرج", lines=5)
         transcribe_btn.click(
+            fn=process_media_file,
+            inputs=[media_input, source_lang],
             outputs=transcribed_text
         )
         tts_btn = gr.Button("تحويل إلى صوت")
         tts_btn.click(
+            fn=text_to_speech_wrapper,
             inputs=[tts_text, tts_lang, voice_type],
             outputs=tts_output
         )