alidw commited on
Commit
5b75713
·
verified ·
1 Parent(s): f18bb69

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from zipfile import ZipFile
3
+
4
+ import torch
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import StreamingResponse
7
+
8
+ from OpenVoice import se_extractor # لاحقاً لو حبيت تعمل voice cloning
9
+ from OpenVoice.api import BaseSpeakerTTS, ToneColorConverter
10
+
11
+ app = FastAPI()
12
+
13
+ # -------- إعداد الموديل عند تشغيل الـ Space --------
14
+
15
+ CHECKPOINT_ZIP_URL = "https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip"
16
+ CKPT_DIR = "checkpoints"
17
+ EN_CKPT_BASE = os.path.join(CKPT_DIR, "base_speakers", "EN")
18
+ CONVERTER_CKPT = os.path.join(CKPT_DIR, "converter")
19
+
20
+ os.makedirs("outputs", exist_ok=True)
21
+
22
+ if not os.path.exists(CKPT_DIR):
23
+ print("Downloading OpenVoice checkpoints ...")
24
+ os.system(f"wget {CHECKPOINT_ZIP_URL} -O ckpt.zip")
25
+ print("Extracting checkpoints ...")
26
+ ZipFile("ckpt.zip").extractall()
27
+ print("Checkpoints ready.")
28
+
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ # Base TTS (إنجليزي أساساً – العربي عن طريق cross-lingual cloning)
32
+ base_speaker_tts = BaseSpeakerTTS(f"{EN_CKPT_BASE}/config.json", device=device)
33
+ base_speaker_tts.load_ckpt(f"{EN_CKPT_BASE}/checkpoint.pth")
34
+
35
+ # لو لاحقاً حابب تعمل cloning لصوت معيّن:
36
+ # tone_color_converter = ToneColorConverter(f"{CONVERTER_CKPT}/config.json", device=device)
37
+ # tone_color_converter.load_ckpt(f"{CONVERTER_CKPT}/checkpoint.pth")
38
+
39
+
40
+ # -------- API Endpoint --------
41
+
42
+ @app.post("/tts")
43
+ async def tts_endpoint(
44
+ text: str,
45
+ speaker: str = "default", # ممكن تجرب: default, cheerful, sad, angry ...
46
+ speed: float = 1.0,
47
+ ):
48
+ """
49
+ يحوّل النص إلى صوت WAV باستخدام OpenVoice.
50
+ """
51
+ out_path = "outputs/out.wav"
52
+
53
+ # اللغة الافتراضية EN – لو هتكتب عربي ممكن يطلع لكن بجودة أقل
54
+ language = "English"
55
+
56
+ # دالة tts من BaseSpeakerTTS
57
+ base_speaker_tts.tts(
58
+ text=text,
59
+ output_path=out_path,
60
+ speaker=speaker,
61
+ language=language,
62
+ speed=speed,
63
+ )
64
+
65
+ audio_file = open(out_path, "rb")
66
+ return StreamingResponse(audio_file, media_type="audio/wav")
67
+
68
+
69
+ # مهم جدًا لـ Hugging Face Spaces
70
+ if __name__ == "__main__":
71
+ import uvicorn
72
+
73
+ uvicorn.run(app, host="0.0.0.0", port=7860)