Spaces:

Mustafa-albakkar
/

Videoagent

Running

App Files Files Community

Mustafa-albakkar commited on Jan 31

Commit

95f614f

verified ·

1 Parent(s): dfc939f

Upload 4 files

Browse files

Files changed (4) hide show

app.py +577 -0
kokoro_engine.py +207 -0
packages.txt +1 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,577 @@

+# === FILE: app.py (Video Agent - Kokoro TTS with Voice Rotation) ===
+# Updated version - Fixed for Hugging Face Spaces
+import os
+import io
+import json
+import base64
+import logging
+from typing import Optional, Dict, Any, Tuple, List
+from datetime import datetime
+import tempfile
+import gradio as gr
+import numpy as np
+from PIL import Image
+# استيراد مكتبات معالجة الصوت والفيديو
+from kokoro_engine import KokoroEngine
+KOKORO_AVAILABLE = True
+try:
+    from moviepy.editor import ImageClip, AudioFileClip
+    MOVIEPY_AVAILABLE = True
+except ImportError:
+    MOVIEPY_AVAILABLE = False
+    logging.warning("⚠️ MoviePy not available. Install with: pip install moviepy")
+# ---------------- Logging Setup ----------------
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+log = logging.getLogger("video_agent")
+# ---------------- Environment Variables ----------------
+VIDEO_HISTORY_DIR = os.getenv("VIDEO_HISTORY_DIR", "video_history")
+MAX_HISTORY_COUNT = int(os.getenv("MAX_HISTORY_COUNT", "10"))
+VOICE_STATE_FILE = os.getenv("VOICE_STATE_FILE", "voice_rotation_state.json")
+# ---------------- Kokoro Voices List ----------------
+KOKORO_VOICES = [
+    # British Female
+    "bf_alice",
+    "bf_emma",
+    "bf_isabella",
+    "bf_lily",
+    # American Female
+    "af_alloy",
+    "af_aoede",
+    "af_bella",
+    "af_heart",
+    "af_jessica",
+    "af_kore",
+    "af_nicole",
+    "af_nova",
+    "af_river",
+    "af_sarah",
+    "af_sky",
+    # British Male
+    "bm_daniel",
+    "bm_fable",
+    "bm_george",
+    "bm_lewis",
+    # American Male
+    "am_adam",
+    "am_echo",
+    "am_eric",
+    "am_fenrir",
+    "am_liam",
+    "am_michael",
+    "am_onyx",
+    "am_puck"
+]
+# ---------------- Initialization Check ----------------
+IS_SERVICE_READY = True
+# ---------------- Voice Rotation Manager ----------------
+class VoiceRotationManager:
+    """إدارة تدوير الأصوات بشكل دوري"""
+    def __init__(self, state_file: str, voices: List[str]):
+        self.state_file = state_file
+        self.voices = voices
+        self.current_index = 0
+        self.load_state()
+    def load_state(self):
+        """تحميل حالة التدوير من الملف"""
+        if os.path.exists(self.state_file):
+            try:
+                with open(self.state_file, "r") as f:
+                    data = json.load(f)
+                    self.current_index = data.get("current_voice_index", 0)
+                    # التأكد من أن المؤشر في النطاق الصحيح
+                    if self.current_index >= len(self.voices):
+                        self.current_index = 0
+                log.info(f"Voice rotation state loaded: index={self.current_index}")
+            except Exception as e:
+                log.warning(f"Could not load voice rotation state: {e}")
+                self.current_index = 0
+        else:
+            log.info("No voice rotation state file found, starting from index 0")
+    def save_state(self):
+        """حفظ حالة التدوير في الملف"""
+        try:
+            with open(self.state_file, "w") as f:
+                json.dump({"current_voice_index": self.current_index}, f)
+            log.info(f"Voice rotation state saved: index={self.current_index}")
+        except Exception as e:
+            log.error(f"Failed to save voice rotation state: {e}")
+    def get_next_voice(self) -> str:
+        """الحصول على الصوت التالي والانتقال للصوت الذي يليه"""
+        voice = self.voices[self.current_index]
+        log.info(f"Selected voice: {voice} (index: {self.current_index}/{len(self.voices)-1})")
+        # الانتقال للصوت التالي
+        self.current_index = (self.current_index + 1) % len(self.voices)
+        self.save_state()
+        return voice
+    def get_current_voice(self) -> str:
+        """الحصول على الصوت الحالي بدون تغيير المؤشر"""
+        return self.voices[self.current_index]
+    def reset(self):
+        """إعادة تعيين التدوير إلى البداية"""
+        self.current_index = 0
+        self.save_state()
+        log.info("Voice rotation reset to index 0")
+# ---------------- Video Agent Class ----------------
+class VideoAgent:
+    def __init__(self):
+        self.log = logging.getLogger("video_agent")
+        self.history = []
+        self._setup_history_dir()
+        # تهيئة مدير تدوير الأصوات
+        self.voice_manager = VoiceRotationManager(VOICE_STATE_FILE, KOKORO_VOICES)
+        # تهيئة نموذج Kokoro TTS
+        self.tts = None
+        self.kokoro_available = False
+        self.log.info("Initializing Kokoro TTS (ONNX - NeuML model)...")
+        try:
+            # استخدام المحرك المحدث
+            self.tts = KokoroEngine()
+            self.kokoro_available = True
+            self.log.info("✅ Kokoro-ONNX initialized successfully (NeuML model)")
+        except Exception as e:
+            self.tts = None
+            self.kokoro_available = False
+            self.log.error("❌ Kokoro TTS initialization failed")
+            self.log.error(str(e))
+            import traceback
+            traceback.print_exc()
+    def _setup_history_dir(self):
+        """إنشاء مجلد السجل إذا لم يكن موجوداً"""
+        if not os.path.exists(VIDEO_HISTORY_DIR):
+            os.makedirs(VIDEO_HISTORY_DIR)
+            self.log.info(f"Created video history directory: {VIDEO_HISTORY_DIR}")
+    def build_speech_text(self, quote: str, author: str, culture: str) -> str:
+        """بناء النص المنطوق بناءً على المعلومات المتوفرة."""
+        quote_clean = quote.strip().strip('"\'')
+        if author and author.lower() not in ["unknown", "unknown speaker", "manual post", "custom", ""]:
+            return f"{author} once said: {quote_clean}"
+        elif culture and culture.lower() not in ["diverse", "diverse culture", "custom", "unknown", ""]:
+            return f"In {culture} culture, they say: {quote_clean}"
+        else:
+            return f"There is an old saying: {quote_clean}"
+    def generate_audio(self, text: str, output_path: str, voice: Optional[str] = None, speed: float = 1.0) -> Tuple[bool, str]:
+        """توليد ملف صوتي من النص باستخدام Kokoro TTS."""
+        if not self.kokoro_available or self.tts is None:
+            self.log.error("Kokoro TTS not available.")
+            return False, ""
+        try:
+            if voice is None:
+                voice = self.voice_manager.get_next_voice()
+            else:
+                if voice not in KOKORO_VOICES:
+                    self.log.warning(f"Voice '{voice}' not found, using next voice from rotation")
+                    voice = self.voice_manager.get_next_voice()
+            self.log.info(f"Generating audio with voice: {voice}")
+            self.log.info(f"Text: {text[:50]}...")
+            # تعيين الصوت
+            self.tts.set_voice(voice)
+            # توليد الصوت باستخدام المحرك المحدث
+            audio_data = self.tts.synthesize(text, speed=speed)
+            # حفظ الملف
+            import scipy.io.wavfile as wavfile
+            sample_rate = 24000
+            # التأكد من أن البيانات في النطاق الصحيح لـ int16
+            audio_int16 = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
+            wavfile.write(output_path, sample_rate, audio_int16)
+            self.log.info(f"✅ Audio generated successfully: {output_path}")
+            return True, voice
+        except Exception as e:
+            self.log.error(f"Audio generation failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False, ""
+    def create_video(self, image_bytes: bytes, audio_path: str, output_path: str) -> bool:
+        """إنشاء فيديو من صورة وملف صوتي."""
+        if not MOVIEPY_AVAILABLE:
+            self.log.error("MoviePy not available.")
+            return False
+        try:
+            image = Image.open(io.BytesIO(image_bytes))
+            img_array = np.array(image)
+            if not os.path.exists(audio_path):
+                raise FileNotFoundError(f"Audio file not found: {audio_path}")
+            audio = AudioFileClip(audio_path)
+            audio_duration = audio.duration
+            self.log.info(f"Creating video with duration: {audio_duration:.2f}s")
+            image_clip = ImageClip(img_array, duration=audio_duration)
+            video = image_clip.set_audio(audio)
+            video.write_videofile(
+                output_path,
+                fps=24,
+                codec='libx264',
+                audio_codec='aac',
+                temp_audiofile='temp-audio.m4a',
+                remove_temp=True,
+                verbose=False,
+                logger=None
+            )
+            audio.close()
+            video.close()
+            self.log.info(f"✅ Video created successfully: {output_path}")
+            return True
+        except Exception as e:
+            self.log.error(f"Video creation failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+    def process_request(
+        self,
+        image_base64: str,
+        quote: str,
+        author: str,
+        culture: str,
+        voice: Optional[str] = None,
+        speed: float = 1.0
+    ) -> Dict[str, Any]:
+        """معالجة طلب إنشاء فيديو كامل."""
+        if not self.kokoro_available or self.tts is None:
+            raise RuntimeError("Kokoro TTS not available")
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        safe_quote = quote.replace(" ", "_").replace("/", "_")[:30].strip()
+        temp_dir = tempfile.gettempdir()
+        audio_path = os.path.join(temp_dir, f"audio_{timestamp}.wav")
+        video_filename = f"{timestamp}_{safe_quote}.mp4"
+        video_path = os.path.join(VIDEO_HISTORY_DIR, video_filename)
+        try:
+            image_bytes = base64.b64decode(image_base64)
+            speech_text = self.build_speech_text(quote, author, culture)
+            success, voice_used = self.generate_audio(speech_text, audio_path, voice, speed)
+            if not success:
+                raise RuntimeError("Failed to generate audio.")
+            if not self.create_video(image_bytes, audio_path, video_path):
+                raise RuntimeError("Failed to create video.")
+            with open(video_path, "rb") as f:
+                video_bytes = f.read()
+            video_base64 = base64.b64encode(video_bytes).decode('utf-8')
+            entry = {
+                "timestamp": timestamp,
+                "quote": quote,
+                "author": author,
+                "culture": culture,
+                "voice": voice_used,
+                "video_path": video_path,
+                "speech_text": speech_text,
+                "duration": self._get_video_duration(video_path)
+            }
+            self.history.insert(0, entry)
+            self.history = self.history[:MAX_HISTORY_COUNT]
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+            self.log.info(f"✅ Video processing completed: {video_filename}")
+            return {
+                "video_base64": video_base64,
+                "video_path": video_path,
+                "speech_text": speech_text,
+                "voice_used": voice_used,
+                "status": "success",
+                "message": f"Video generated successfully with voice: {voice_used}"
+            }
+        except Exception as e:
+            self.log.error(f"Video processing failed: {e}")
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+            raise RuntimeError(f"Video generation failed: {str(e)}")
+    def _get_video_duration(self, video_path: str) -> float:
+        """الحصول على مدة الفيديو بالثواني"""
+        try:
+            if MOVIEPY_AVAILABLE:
+                from moviepy.editor import VideoFileClip
+                clip = VideoFileClip(video_path)
+                duration = clip.duration
+                clip.close()
+                return duration
+        except Exception as e:
+            self.log.warning(f"Could not get video duration: {e}")
+        return 0.0
+    def get_history(self) -> List[Dict[str, Any]]:
+        """الحصول على سجل الفيديوهات"""
+        return self.history
+    def get_voice_rotation_info(self) -> Dict[str, Any]:
+        """الحصول على معلومات حالة تدوير الأصوات"""
+        return {
+            "current_voice": self.voice_manager.get_current_voice(),
+            "current_index": self.voice_manager.current_index,
+            "total_voices": len(KOKORO_VOICES),
+            "all_voices": KOKORO_VOICES
+        }
+# ---------------- Global Agent Instance ----------------
+agent = VideoAgent()
+if IS_SERVICE_READY:
+    if agent.tts is not None:
+        current_voice = agent.voice_manager.get_current_voice()
+        STATUS_MESSAGE = f"✅ Video Agent ready with Kokoro-ONNX. Next voice: {current_voice}"
+    else:
+        STATUS_MESSAGE = "⚠️ Video Agent ready but Kokoro-ONNX failed to initialize."
+else:
+    STATUS_MESSAGE = "❌ Service not ready"
+# ---------------- Gradio Functions ----------------
+def gradio_generate_video(
+    image_input,
+    quote: str,
+    author: str,
+    culture: str,
+    voice_override: str,
+    speed: float
+) -> Tuple[Optional[str], str, str]:
+    """دالة Gradio للواجهة التفاعلية."""
+    if not IS_SERVICE_READY:
+        return None, f"❌ Service not ready: {STATUS_MESSAGE}", ""
+    if not image_input:
+        return None, "❌ Please provide an image.", ""
+    if not quote:
+        return None, "❌ Please provide a quote.", ""
+    try:
+        if isinstance(image_input, str):
+            with open(image_input, "rb") as f:
+                image_bytes = f.read()
+        else:
+            buffered = io.BytesIO()
+            image_input.save(buffered, format="JPEG")
+            image_bytes = buffered.getvalue()
+        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
+        voice = None if voice_override == "Auto" or not voice_override else voice_override
+        result = agent.process_request(
+            image_base64=image_base64,
+            quote=quote,
+            author=author or "Unknown",
+            culture=culture or "Diverse",
+            voice=voice,
+            speed=speed
+        )
+        video_path = result["video_path"]
+        voice_used = result["voice_used"]
+        voice_info = agent.get_voice_rotation_info()
+        next_voice = voice_info["current_voice"]
+        current_index = voice_info["current_index"]
+        total_voices = voice_info["total_voices"]
+        status_msg = (
+            f"✅ {result['message']}\n\n"
+            f"**Speech:** {result['speech_text']}\n"
+            f"**Duration:** {result.get('duration', 0):.2f}s"
+        )
+        voice_rotation_msg = (
+            f"**Voice Used:** {voice_used}\n"
+            f"**Next Voice:** {next_voice}\n"
+            f"**Progress:** {current_index}/{total_voices}"
+        )
+        return video_path, status_msg, voice_rotation_msg
+    except Exception as e:
+        log.error(f"Video generation failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, f"❌ Error: {str(e)}", ""
+def gradio_api_endpoint(
+    image_base64: str,
+    quote: str,
+    author: str,
+    culture: str,
+    voice: Optional[str] = None,
+    speed: float = 1.0
+) -> Dict[str, Any]:
+    """نقطة النهاية للـ API"""
+    if not IS_SERVICE_READY:
+        raise RuntimeError(f"Service not ready: {STATUS_MESSAGE}")
+    log.info(f"API request received for quote: {quote[:50]}...")
+    return agent.process_request(
+        image_base64=image_base64,
+        quote=quote,
+        author=author,
+        culture=culture,
+        voice=voice,
+        speed=speed
+    )
+def format_history_for_gallery() -> List[Tuple[str, str]]:
+    """تنسيق السجل لعرضه في Gallery"""
+    formatted = []
+    for entry in agent.get_history():
+        if entry.get("video_path") and os.path.exists(entry["video_path"]):
+            caption = (
+                f'"{entry["quote"][:50]}..." - {entry["author"]}\n'
+                f'Voice: {entry.get("voice", "N/A")} | Duration: {entry.get("duration", 0):.1f}s'
+            )
+            formatted.append((entry["video_path"], caption))
+    return formatted
+def gradio_refresh_history():
+    return format_history_for_gallery()
+def gradio_reset_voice_rotation():
+    agent.voice_manager.reset()
+    voice_info = agent.get_voice_rotation_info()
+    return f"✅ Voice rotation reset! Next voice: {voice_info['current_voice']}"
+def gradio_get_voice_info():
+    voice_info = agent.get_voice_rotation_info()
+    return (
+        f"**Current Voice:** {voice_info['current_voice']}\n"
+        f"**Index:** {voice_info['current_index']}/{voice_info['total_voices']}\n"
+        f"**Total Voices:** {len(voice_info['all_voices'])}"
+    )
+# ---------------- Gradio Interface ----------------
+with gr.Blocks(title="Video Agent - Kokoro TTS") as demo:
+    gr.Markdown("# 🎬 Video Agent - Kokoro-ONNX with Voice Rotation")
+    gr.Markdown(f"**Status:** {STATUS_MESSAGE}")
+    if IS_SERVICE_READY:
+        gr.Markdown(f"**Voices:** {len(KOKORO_VOICES)} available | **Rotation:** Automatic | **Engine:** Kokoro-ONNX (stable)")
+    gr.Markdown("---")
+    with gr.Tab("Generate Video"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                image_input = gr.Image(label="Upload Image", type="pil")
+                quote_input = gr.Textbox(label="Quote", lines=3)
+                author_input = gr.Textbox(label="Author")
+                culture_input = gr.Textbox(label="Culture")
+                voice_dropdown = gr.Dropdown(
+                    choices=["Auto"] + KOKORO_VOICES,
+                    value="Auto",
+                    label="Voice (Auto = rotation)"
+                )
+                speed_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    label="Speech Speed"
+                )
+                generate_btn = gr.Button("🎬 Generate", variant="primary")
+                status_output = gr.Textbox(label="Status", lines=4)
+                voice_rotation_output = gr.Textbox(label="Voice Info", lines=3)
+            with gr.Column(scale=1):
+                video_output = gr.Video(label="Generated Video")
+        generate_btn.click(
+            fn=gradio_generate_video,
+            inputs=[image_input, quote_input, author_input, culture_input, voice_dropdown, speed_slider],
+            outputs=[video_output, status_output, voice_rotation_output]
+        )
+    with gr.Tab("History"):
+        refresh_btn = gr.Button("🔄 Refresh")
+        history_gallery = gr.Gallery(label="Recent Videos", columns=2)
+        refresh_btn.click(fn=gradio_refresh_history, outputs=[history_gallery])
+    with gr.Tab("Voice Management"):
+        with gr.Row():
+            voice_info_btn = gr.Button("📊 Get Info")
+            reset_btn = gr.Button("🔄 Reset Rotation")
+        voice_mgmt_output = gr.Textbox(label="Voice Info", lines=5)
+        voice_info_btn.click(fn=gradio_get_voice_info, outputs=[voice_mgmt_output])
+        reset_btn.click(fn=gradio_reset_voice_rotation, outputs=[voice_mgmt_output])
+        gr.Markdown(f"### {len(KOKORO_VOICES)} Voices Available")
+        gr.Markdown("**British Female (4):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("bf_")]))
+        gr.Markdown("**American Female (11):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("af_")]))
+        gr.Markdown("**British Male (4):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("bm_")]))
+        gr.Markdown("**American Male (8):** " + ", ".join([v for v in KOKORO_VOICES if v.startswith("am_")]))
+    with gr.Tab("API"):
+        api_image = gr.Textbox(label="Image (Base64)", lines=2)
+        api_quote = gr.Textbox(label="Quote")
+        api_author = gr.Textbox(label="Author")
+        api_culture = gr.Textbox(label="Culture")
+        api_voice = gr.Dropdown(choices=["None"] + KOKORO_VOICES, value="None")
+        api_speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
+        api_output = gr.JSON(label="Response")
+        gr.Button("Test API").click(
+            fn=lambda img, q, a, c, v, s: gradio_api_endpoint(img, q, a, c, None if v == "None" else v, s),
+            inputs=[api_image, api_quote, api_author, api_culture, api_voice, api_speed],
+            outputs=[api_output],
+            api_name="generate_video"
+        )
+if __name__ == "__main__":
+    PORT = int(os.getenv("PORT", "7860"))
+    log.info(f"Starting Video Agent with Kokoro-ONNX ({len(KOKORO_VOICES)} voices)...")
+    demo.launch(server_name="0.0.0.0", server_port=PORT)

kokoro_engine.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Kokoro TTS Engine using ONNX
+Simple, stable, works on Python 3.9+
+Uses NeuML/kokoro-base-onnx model
+"""
+import os
+import logging
+import json
+import numpy as np
+from huggingface_hub import hf_hub_download
+log = logging.getLogger("kokoro_engine")
+class KokoroEngine:
+    """
+    محرك Kokoro TTS باستخدام ONNX
+    يعمل على Python 3.9+ بدون phonemizer أو spacy
+    """
+    def __init__(self, voice: str = "af_alloy", sample_rate: int = 24000):
+        """
+        تهيئة محرك Kokoro
+        Args:
+            voice: اسم الصوت الافتراضي
+            sample_rate: معدل العينة (دائماً 24000 لـ Kokoro)
+        """
+        self.sample_rate = sample_rate
+        self.voice = voice
+        self.session = None
+        self.voices_data = None
+        self.tokenizer = None
+        try:
+            # تحميل ملفات النموذج من Hugging Face
+            self._download_and_setup()
+            log.info(f"✅ Kokoro-ONNX initialized successfully")
+        except Exception as e:
+            log.error(f"❌ Failed to initialize Kokoro-ONNX: {e}")
+            raise
+    def _download_and_setup(self):
+        """تحميل وتهيئة النموذج"""
+        try:
+            # تحميل ملف ONNX من NeuML (أكثر استقراراً)
+            model_path = hf_hub_download(
+                repo_id="NeuML/kokoro-base-onnx",
+                filename="model.onnx",
+                cache_dir="./models"
+            )
+            log.info(f"Model downloaded: {model_path}")
+            # تحميل ملف voices.json
+            voices_json_path = hf_hub_download(
+                repo_id="NeuML/kokoro-base-onnx",
+                filename="voices.json",
+                cache_dir="./models"
+            )
+            log.info(f"Voices file downloaded: {voices_json_path}")
+            # تحميل بيانات الأصوات
+            with open(voices_json_path, 'r', encoding='utf-8') as f:
+                self.voices_data = json.load(f)
+            # تهيئة ONNX Runtime
+            import onnxruntime
+            self.session = onnxruntime.InferenceSession(
+                model_path,
+                providers=['CPUExecutionProvider']
+            )
+            # تهيئة Tokenizer
+            self._setup_tokenizer()
+        except Exception as e:
+            log.error(f"Failed to download/setup model: {e}")
+            raise
+    def _setup_tokenizer(self):
+        """تهيئة tokenizer باستخدام ttstokenizer (لا يحتاج espeak)"""
+        try:
+            # استخدام ttstokenizer إذا كان متاحاً (أفضل)
+            from ttstokenizer import IPATokenizer
+            self.tokenizer = IPATokenizer()
+            log.info("Using ttstokenizer for tokenization")
+        except ImportError:
+            # Fallback: استخدام misaki (يحتاج تثبيت)
+            try:
+                from misaki.g2p import grapheme_to_phoneme
+                self.tokenizer = grapheme_to_phoneme
+                log.info("Using misaki for tokenization")
+            except ImportError:
+                log.error("No tokenizer available. Install ttstokenizer or misaki.")
+                raise RuntimeError("Tokenizer not available")
+    def _text_to_tokens(self, text: str) -> list:
+        """تحويل النص إلى tokens"""
+        try:
+            if hasattr(self.tokenizer, '__call__'):
+                # إذا كان tokenizer دالة (ttstokenizer)
+                tokens = self.tokenizer(text)
+            else:
+                # fallback
+                tokens = list(text.encode('utf-8'))
+            return tokens
+        except Exception as e:
+            log.error(f"Tokenization failed: {e}")
+            raise
+    def set_voice(self, voice: str):
+        """
+        تغيير الصوت
+        Args:
+            voice: اسم الصوت الجديد
+        """
+        # تحويل اسم الصوت من صيغة bf_alice إلى bf
+        voice_prefix = voice.split('_')[0] if '_' in voice else voice
+        if voice_prefix in self.voices_data:
+            self.voice = voice
+            self.voice_prefix = voice_prefix
+            log.debug(f"Voice changed to: {voice} (prefix: {voice_prefix})")
+        else:
+            log.warning(f"Voice {voice} not found, keeping current voice")
+    def synthesize(self, text: str, speed: float = 1.0) -> np.ndarray:
+        """
+        تحويل النص إلى صوت
+        Args:
+            text: النص المراد تحويله
+            speed: سرعة التحدث (1.0 = عادي)
+        Returns:
+            np.ndarray: البيانات الصوتية كـ numpy array (float32)
+        """
+        if self.session is None:
+            raise RuntimeError("Kokoro engine not initialized")
+        try:
+            # Tokenize النص
+            tokens = self._text_to_tokens(text)
+            # التأكد من أن الطول مناسب
+            if len(tokens) > 510:
+                log.warning(f"Text too long ({len(tokens)} tokens), truncating to 510")
+                tokens = tokens[:510]
+            # إضافة padding tokens
+            tokens_input = np.array([[0] + tokens + [0]], dtype=np.int64)
+            # الحصول على voice style
+            voice_prefix = self.voice.split('_')[0] if '_' in self.voice else self.voice
+            if voice_prefix not in self.voices_data:
+                voice_prefix = 'af'  # fallback
+            voice_array = np.array(self.voices_data[voice_prefix], dtype=np.float32)
+            style = voice_array[len(tokens)].reshape(1, -1)
+            # إعداد speed
+            speed_array = np.array([speed], dtype=np.float32)
+            # تشغيل النموذج
+            outputs = self.session.run(
+                None,
+                {
+                    "tokens": tokens_input,
+                    "style": style,
+                    "speed": speed_array
+                }
+            )
+            # استخراج الصوت
+            audio = outputs[0].flatten()
+            log.info(f"✅ Audio generated: {len(audio)} samples, duration: {len(audio)/self.sample_rate:.2f}s")
+            # التأكد من أن النوع float32
+            return audio.astype(np.float32)
+        except Exception as e:
+            log.error(f"❌ Synthesis failed: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def get_available_voices(self):
+        """
+        الحصول على قائمة الأصوات المتاحة
+        """
+        return [
+            # British Female
+            "bf_alice", "bf_emma", "bf_isabella", "bf_lily",
+            # American Female
+            "af_alloy", "af_aoede", "af_bella", "af_heart",
+            "af_jessica", "af_kore", "af_nicole", "af_nova",
+            "af_river", "af_sarah", "af_sky",
+            # British Male
+            "bm_daniel", "bm_fable", "bm_george", "bm_lewis",
+            # American Male
+            "am_adam", "am_echo", "am_eric", "am_fenrir",
+            "am_liam", "am_michael", "am_onyx", "am_puck"
+        ]

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio>=4.0.0
+numpy>=1.24.0
+scipy>=1.11.0
+onnxruntime>=1.16.0
+soundfile>=0.12.1
+moviepy==1.0.3
+Pillow>=10.0.0
+huggingface_hub
+ttstokenizer>=1.0.0