import os from zipfile import ZipFile import torch from fastapi import FastAPI from fastapi.responses import StreamingResponse from OpenVoice import se_extractor # لاحقاً لو حبيت تعمل voice cloning from OpenVoice.api import BaseSpeakerTTS, ToneColorConverter app = FastAPI() # -------- إعداد الموديل عند تشغيل الـ Space -------- CHECKPOINT_ZIP_URL = "https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip" CKPT_DIR = "checkpoints" EN_CKPT_BASE = os.path.join(CKPT_DIR, "base_speakers", "EN") CONVERTER_CKPT = os.path.join(CKPT_DIR, "converter") os.makedirs("outputs", exist_ok=True) if not os.path.exists(CKPT_DIR): print("Downloading OpenVoice checkpoints ...") os.system(f"wget {CHECKPOINT_ZIP_URL} -O ckpt.zip") print("Extracting checkpoints ...") ZipFile("ckpt.zip").extractall() print("Checkpoints ready.") device = "cuda" if torch.cuda.is_available() else "cpu" # Base TTS (إنجليزي أساساً – العربي عن طريق cross-lingual cloning) base_speaker_tts = BaseSpeakerTTS(f"{EN_CKPT_BASE}/config.json", device=device) base_speaker_tts.load_ckpt(f"{EN_CKPT_BASE}/checkpoint.pth") # لو لاحقاً حابب تعمل cloning لصوت معيّن: # tone_color_converter = ToneColorConverter(f"{CONVERTER_CKPT}/config.json", device=device) # tone_color_converter.load_ckpt(f"{CONVERTER_CKPT}/checkpoint.pth") # -------- API Endpoint -------- @app.post("/tts") async def tts_endpoint( text: str, speaker: str = "default", # ممكن تجرب: default, cheerful, sad, angry ... speed: float = 1.0, ): """ يحوّل النص إلى صوت WAV باستخدام OpenVoice. """ out_path = "outputs/out.wav" # اللغة الافتراضية EN – لو هتكتب عربي ممكن يطلع لكن بجودة أقل language = "English" # دالة tts من BaseSpeakerTTS base_speaker_tts.tts( text=text, output_path=out_path, speaker=speaker, language=language, speed=speed, ) audio_file = open(out_path, "rb") return StreamingResponse(audio_file, media_type="audio/wav") # مهم جدًا لـ Hugging Face Spaces if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)