Spaces:

mohmmed5787
/

samirmo

Build error

App Files Files Community

mohmmed5787 commited on Feb 2

Commit

79bb5bd

verified ·

1 Parent(s): 5e7e544

Upload app.py

Browse files

Files changed (1) hide show

app.py +318 -0

app.py ADDED Viewed

	@@ -0,0 +1,318 @@

+import gradio as gr
+import torch
+import os
+import numpy as np
+from pathlib import Path
+import tempfile
+import cv2
+from PIL import Image
+import torchaudio
+import subprocess
+import warnings
+warnings.filterwarnings('ignore')
+# تحميل النماذج
+print("🔄 جاري تحميل النماذج...")
+# SadTalker للوجه المتحرك
+try:
+    from sadtalker.test_audio2coeff import Audio2Coeff
+    from sadtalker.facerender.animate import AnimateFromCoeff
+    from sadtalker.test_audio2video import audio2video
+    print("✅ SadTalker جاهز")
+except:
+    print("⚠️ SadTalker غير متوفر، سيتم استخدام Wav2Lip")
+# Wav2Lip كخيار احتياطي
+try:
+    import sys
+    sys.path.append('./Wav2Lip')
+    from models import Wav2Lip
+    from inference import load_checkpoint
+    print("✅ Wav2Lip جاهز")
+except:
+    print("⚠️ Wav2Lip غير متوفر")
+# TTS العربي
+try:
+    from TTS.api import TTS
+    tts_model = TTS("tts_models/ara/fairseq/vits")
+    print("✅ Arabic TTS جاهز")
+except:
+    print("⚠️ TTS غير متوفر")
+def process_lipsync(
+    image_file,
+    audio_file=None,
+    text_input=None,
+    emotional_intensity=80,
+    stability=90,
+    lip_sync_accuracy=95
+):
+    """
+    معالجة مزامنة الشفاه
+    """
+    try:
+        # التحقق من المدخلات
+        if image_file is None:
+            return None, "❌ الرجاء رفع صورة"
+        if audio_file is None and not text_input:
+            return None, "❌ الرجاء إدخال صوت أو نص"
+        # إنشاء مجلد مؤقت
+        temp_dir = tempfile.mkdtemp()
+        # معالجة الصورة
+        image_path = os.path.join(temp_dir, "image.png")
+        if isinstance(image_file, str):
+            img = Image.open(image_file)
+        else:
+            img = Image.fromarray(image_file)
+        img.save(image_path)
+        # معالجة الصوت
+        if text_input and audio_file is None:
+            # تحويل النص إلى صوت
+            audio_path = os.path.join(temp_dir, "audio.wav")
+            try:
+                tts_model.tts_to_file(
+                    text=text_input,
+                    file_path=audio_path
+                )
+            except Exception as e:
+                return None, f"❌ خطأ في تحويل النص إلى صوت: {str(e)}"
+        else:
+            audio_path = audio_file
+        # تطبيق SadTalker
+        output_video = os.path.join(temp_dir, "output.mp4")
+        try:
+            # محاولة SadTalker أولاً
+            from sadtalker.inference import inference
+            result = inference(
+                source_image=image_path,
+                driven_audio=audio_path,
+                result_dir=temp_dir,
+                pose_style=int(emotional_intensity / 100 * 46),  # 0-46
+                still=stability > 50,
+                preprocess='crop',
+                expression_scale=lip_sync_accuracy / 100,
+            )
+            output_video = result
+        except Exception as e:
+            # استخدام Wav2Lip كبديل
+            try:
+                checkpoint_path = './Wav2Lip/checkpoints/wav2lip_gan.pth'
+                cmd = f"""
+                cd Wav2Lip && python inference.py \
+                    --checkpoint_path {checkpoint_path} \
+                    --face {image_path} \
+                    --audio {audio_path} \
+                    --outfile {output_video} \
+                    --pads 0 10 0 0 \
+                    --fps 25 \
+                    --resize_factor {stability / 100} \
+                    --nosmooth
+                """
+                subprocess.run(cmd, shell=True, check=True)
+            except Exception as e2:
+                return None, f"❌ خطأ في الإنتاج: {str(e2)}"
+        # تحسين الفيديو
+        enhanced_video = os.path.join(temp_dir, "enhanced.mp4")
+        enhance_video_quality(output_video, enhanced_video, lip_sync_accuracy)
+        return enhanced_video, "✅ تم الإنتاج بنجاح!"
+    except Exception as e:
+        return None, f"❌ خطأ: {str(e)}"
+def enhance_video_quality(input_video, output_video, quality_level):
+    """
+    تحسين جودة الفيديو
+    """
+    try:
+        # استخدام FFmpeg لتحسين الجودة
+        cmd = f"""
+        ffmpeg -i {input_video} \
+            -vf "unsharp=5:5:1.0:5:5:0.0" \
+            -c:v libx264 \
+            -preset slow \
+            -crf {int((100 - quality_level) / 5)} \
+            -c:a aac \
+            -b:a 192k \
+            {output_video} \
+            -y
+        """
+        subprocess.run(cmd, shell=True, check=True)
+        return True
+    except:
+        # إذا فشل، انسخ الملف الأ��لي
+        import shutil
+        shutil.copy(input_video, output_video)
+        return False
+# إنشاء الواجهة
+print("🎨 إنشاء الواجهة...")
+# قراءة HTML
+html_file = Path(__file__).parent / "index.html"
+if html_file.exists():
+    with open(html_file, 'r', encoding='utf-8') as f:
+        custom_html = f.read()
+else:
+    custom_html = """
+    <div style="text-align:center; padding:20px; background:linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius:15px;">
+        <h1 style="color:white; font-size:3em;">🎬 استوديو مزامنة الشفاه</h1>
+        <p style="color:white; font-size:1.2em;">⚡ Powered by Hugging Face AI</p>
+    </div>
+    """
+# Gradio Interface مع HTML مخصص
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="purple",
+        secondary_hue="pink",
+    ),
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    """,
+    title="استوديو مزامنة الشفاه"
+) as app:
+    # إضافة HTML المخصص
+    gr.HTML(custom_html)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 🖼️ الصورة")
+            image_input = gr.Image(
+                label="ارفع صورة الوجه",
+                type="filepath",
+                height=400
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎤 الصوت أو النص")
+            mode_selector = gr.Radio(
+                ["رفع ملف صوتي", "إدخال نص"],
+                value="رفع ملف صوتي",
+                label="اختر الطريقة"
+            )
+            audio_input = gr.Audio(
+                label="ارفع ملف صوتي",
+                type="filepath",
+                visible=True
+            )
+            text_input = gr.Textbox(
+                label="أو اكتب النص (سيتم تحويله لصوت)",
+                placeholder="اكتب النص هنا...",
+                lines=5,
+                visible=False
+            )
+    # الإعدادات المتقدمة
+    with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
+        with gr.Row():
+            emotional_intensity = gr.Slider(
+                minimum=0,
+                maximum=100,
+                value=80,
+                step=1,
+                label="💫 التعبير العاطفي (Emotional Intensity)"
+            )
+            stability = gr.Slider(
+                minimum=0,
+                maximum=100,
+                value=90,
+                step=1,
+                label="🎯 ثبات الوجه (Face Stability)"
+            )
+            lip_sync_accuracy = gr.Slider(
+                minimum=0,
+                maximum=100,
+                value=95,
+                step=1,
+                label="💋 دقة مزامنة الشفاه (Lip Sync Precision)"
+            )
+    # زر الإنتاج
+    generate_btn = gr.Button(
+        "🎬 إنتاج الفيديو الآن",
+        variant="primary",
+        size="lg"
+    )
+    # النتائج
+    with gr.Row():
+        with gr.Column():
+            output_video = gr.Video(label="✨ الفيديو الناتج")
+            status_message = gr.Textbox(label="الحالة", interactive=False)
+    # معالجة تغيير الوضع
+    def toggle_mode(mode):
+        if mode == "رفع ملف صوتي":
+            return gr.update(visible=True), gr.update(visible=False)
+        else:
+            return gr.update(visible=False), gr.update(visible=True)
+    mode_selector.change(
+        fn=toggle_mode,
+        inputs=[mode_selector],
+        outputs=[audio_input, text_input]
+    )
+    # معالجة الإنتاج
+    generate_btn.click(
+        fn=process_lipsync,
+        inputs=[
+            image_input,
+            audio_input,
+            text_input,
+            emotional_intensity,
+            stability,
+            lip_sync_accuracy
+        ],
+        outputs=[output_video, status_message]
+    )
+    # دليل الاستخدام
+    gr.Markdown("""
+    ## 📚 كيفية الاستخدام
+    1. **ارفع صورة**: اختر صورة واضحة للوجه (يفضل بورتريه)
+    2. **أضف الصوت**: ارفع ملف صوتي أو اكتب نصاً
+    3. **اضبط الإعدادات**: (اختياري) عدّل الإعدادات المتقدمة
+    4. **اضغط إنتاج**: انتظر النتيجة المذهلة!
+    ---
+    ⚡ **Powered by**: Hugging Face AI
+    🎭 **التقنيات**: SadTalker, Wav2Lip, MMS-TTS-Arabic
+    💡 **نمط**: Hedra Emotional + HeyGen Stability
+    """)
+print("✅ التطبيق جاهز!")
+if __name__ == "__main__":
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )