Spaces:

mohmmed5787
/

samirmo

Build error

App Files Files Community

samirmo / app.py

mohmmed5787

Upload app.py

79bb5bd verified 3 months ago

raw

history blame contribute delete

9.91 kB

	import gradio as gr
	import torch
	import os
	import numpy as np
	from pathlib import Path
	import tempfile
	import cv2
	from PIL import Image
	import torchaudio
	import subprocess
	import warnings
	warnings.filterwarnings('ignore')

	# تحميل النماذج
	print("🔄 جاري تحميل النماذج...")

	# SadTalker للوجه المتحرك
	try:
	from sadtalker.test_audio2coeff import Audio2Coeff
	from sadtalker.facerender.animate import AnimateFromCoeff
	from sadtalker.test_audio2video import audio2video
	print("✅ SadTalker جاهز")
	except:
	print("⚠️ SadTalker غير متوفر، سيتم استخدام Wav2Lip")

	# Wav2Lip كخيار احتياطي
	try:
	import sys
	sys.path.append('./Wav2Lip')
	from models import Wav2Lip
	from inference import load_checkpoint
	print("✅ Wav2Lip جاهز")
	except:
	print("⚠️ Wav2Lip غير متوفر")

	# TTS العربي
	try:
	from TTS.api import TTS
	tts_model = TTS("tts_models/ara/fairseq/vits")
	print("✅ Arabic TTS جاهز")
	except:
	print("⚠️ TTS غير متوفر")

	def process_lipsync(
	image_file,
	audio_file=None,
	text_input=None,
	emotional_intensity=80,
	stability=90,
	lip_sync_accuracy=95
	):
	"""
	معالجة مزامنة الشفاه
	"""
	try:
	# التحقق من المدخلات
	if image_file is None:
	return None, "❌ الرجاء رفع صورة"

	if audio_file is None and not text_input:
	return None, "❌ الرجاء إدخال صوت أو نص"

	# إنشاء مجلد مؤقت
	temp_dir = tempfile.mkdtemp()

	# معالجة الصورة
	image_path = os.path.join(temp_dir, "image.png")
	if isinstance(image_file, str):
	img = Image.open(image_file)
	else:
	img = Image.fromarray(image_file)
	img.save(image_path)

	# معالجة الصوت
	if text_input and audio_file is None:
	# تحويل النص إلى صوت
	audio_path = os.path.join(temp_dir, "audio.wav")
	try:
	tts_model.tts_to_file(
	text=text_input,
	file_path=audio_path
	)
	except Exception as e:
	return None, f"❌ خطأ في تحويل النص إلى صوت: {str(e)}"
	else:
	audio_path = audio_file

	# تطبيق SadTalker
	output_video = os.path.join(temp_dir, "output.mp4")

	try:
	# محاولة SadTalker أولاً
	from sadtalker.inference import inference

	result = inference(
	source_image=image_path,
	driven_audio=audio_path,
	result_dir=temp_dir,
	pose_style=int(emotional_intensity / 100 * 46), # 0-46
	still=stability > 50,
	preprocess='crop',
	expression_scale=lip_sync_accuracy / 100,
	)

	output_video = result

	except Exception as e:
	# استخدام Wav2Lip كبديل
	try:
	checkpoint_path = './Wav2Lip/checkpoints/wav2lip_gan.pth'

	cmd = f"""
	cd Wav2Lip && python inference.py \
	--checkpoint_path {checkpoint_path} \
	--face {image_path} \
	--audio {audio_path} \
	--outfile {output_video} \
	--pads 0 10 0 0 \
	--fps 25 \
	--resize_factor {stability / 100} \
	--nosmooth
	"""

	subprocess.run(cmd, shell=True, check=True)

	except Exception as e2:
	return None, f"❌ خطأ في الإنتاج: {str(e2)}"

	# تحسين الفيديو
	enhanced_video = os.path.join(temp_dir, "enhanced.mp4")
	enhance_video_quality(output_video, enhanced_video, lip_sync_accuracy)

	return enhanced_video, "✅ تم الإنتاج بنجاح!"

	except Exception as e:
	return None, f"❌ خطأ: {str(e)}"

	def enhance_video_quality(input_video, output_video, quality_level):
	"""
	تحسين جودة الفيديو
	"""
	try:
	# استخدام FFmpeg لتحسين الجودة
	cmd = f"""
	ffmpeg -i {input_video} \
	-vf "unsharp=5:5:1.0:5:5:0.0" \
	-c:v libx264 \
	-preset slow \
	-crf {int((100 - quality_level) / 5)} \
	-c:a aac \
	-b:a 192k \
	{output_video} \
	-y
	"""
	subprocess.run(cmd, shell=True, check=True)
	return True
	except:
	# إذا فشل، انسخ الملف الأصلي
	import shutil
	shutil.copy(input_video, output_video)
	return False

	# إنشاء الواجهة
	print("🎨 إنشاء الواجهة...")

	# قراءة HTML
	html_file = Path(__file__).parent / "index.html"
	if html_file.exists():
	with open(html_file, 'r', encoding='utf-8') as f:
	custom_html = f.read()
	else:
	custom_html = """
	<div style="text-align:center; padding:20px; background:linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius:15px;">
	<h1 style="color:white; font-size:3em;">🎬 استوديو مزامنة الشفاه</h1>
	<p style="color:white; font-size:1.2em;">⚡ Powered by Hugging Face AI</p>
	</div>
	"""

	# Gradio Interface مع HTML مخصص
	with gr.Blocks(
	theme=gr.themes.Soft(
	primary_hue="purple",
	secondary_hue="pink",
	),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: auto !important;
	}
	""",
	title="استوديو مزامنة الشفاه"
	) as app:

	# إضافة HTML المخصص
	gr.HTML(custom_html)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 🖼️ الصورة")
	image_input = gr.Image(
	label="ارفع صورة الوجه",
	type="filepath",
	height=400
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🎤 الصوت أو النص")

	mode_selector = gr.Radio(
	["رفع ملف صوتي", "إدخال نص"],
	value="رفع ملف صوتي",
	label="اختر الطريقة"
	)

	audio_input = gr.Audio(
	label="ارفع ملف صوتي",
	type="filepath",
	visible=True
	)

	text_input = gr.Textbox(
	label="أو اكتب النص (سيتم تحويله لصوت)",
	placeholder="اكتب النص هنا...",
	lines=5,
	visible=False
	)

	# الإعدادات المتقدمة
	with gr.Accordion("⚙️ إعدادات متقدمة", open=False):
	with gr.Row():
	emotional_intensity = gr.Slider(
	minimum=0,
	maximum=100,
	value=80,
	step=1,
	label="💫 التعبير العاطفي (Emotional Intensity)"
	)

	stability = gr.Slider(
	minimum=0,
	maximum=100,
	value=90,
	step=1,
	label="🎯 ثبات الوجه (Face Stability)"
	)

	lip_sync_accuracy = gr.Slider(
	minimum=0,
	maximum=100,
	value=95,
	step=1,
	label="💋 دقة مزامنة الشفاه (Lip Sync Precision)"
	)

	# زر الإنتاج
	generate_btn = gr.Button(
	"🎬 إنتاج الفيديو الآن",
	variant="primary",
	size="lg"
	)

	# النتائج
	with gr.Row():
	with gr.Column():
	output_video = gr.Video(label="✨ الفيديو الناتج")
	status_message = gr.Textbox(label="الحالة", interactive=False)

	# معالجة تغيير الوضع
	def toggle_mode(mode):
	if mode == "رفع ملف صوتي":
	return gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True)

	mode_selector.change(
	fn=toggle_mode,
	inputs=[mode_selector],
	outputs=[audio_input, text_input]
	)

	# معالجة الإنتاج
	generate_btn.click(
	fn=process_lipsync,
	inputs=[
	image_input,
	audio_input,
	text_input,
	emotional_intensity,
	stability,
	lip_sync_accuracy
	],
	outputs=[output_video, status_message]
	)

	# دليل الاستخدام
	gr.Markdown("""
	## 📚 كيفية الاستخدام

	1. ارفع صورة: اختر صورة واضحة للوجه (يفضل بورتريه)
	2. أضف الصوت: ارفع ملف صوتي أو اكتب نصاً
	3. اضبط الإعدادات: (اختياري) عدّل الإعدادات المتقدمة
	4. اضغط إنتاج: انتظر النتيجة المذهلة!

	---

	⚡ Powered by: Hugging Face AI
	🎭 التقنيات: SadTalker, Wav2Lip, MMS-TTS-Arabic
	💡 نمط: Hedra Emotional + HeyGen Stability
	""")

	print("✅ التطبيق جاهز!")

	if __name__ == "__main__":
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)