File size: 2,323 Bytes
5b75713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
from zipfile import ZipFile

import torch
from fastapi import FastAPI
from fastapi.responses import StreamingResponse

from OpenVoice import se_extractor  # لاحقاً لو حبيت تعمل voice cloning
from OpenVoice.api import BaseSpeakerTTS, ToneColorConverter

app = FastAPI()

# -------- إعداد الموديل عند تشغيل الـ Space --------

CHECKPOINT_ZIP_URL = "https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip"
CKPT_DIR = "checkpoints"
EN_CKPT_BASE = os.path.join(CKPT_DIR, "base_speakers", "EN")
CONVERTER_CKPT = os.path.join(CKPT_DIR, "converter")

os.makedirs("outputs", exist_ok=True)

if not os.path.exists(CKPT_DIR):
    print("Downloading OpenVoice checkpoints ...")
    os.system(f"wget {CHECKPOINT_ZIP_URL} -O ckpt.zip")
    print("Extracting checkpoints ...")
    ZipFile("ckpt.zip").extractall()
    print("Checkpoints ready.")

device = "cuda" if torch.cuda.is_available() else "cpu"

# Base TTS (إنجليزي أساساً – العربي عن طريق cross-lingual cloning)
base_speaker_tts = BaseSpeakerTTS(f"{EN_CKPT_BASE}/config.json", device=device)
base_speaker_tts.load_ckpt(f"{EN_CKPT_BASE}/checkpoint.pth")

# لو لاحقاً حابب تعمل cloning لصوت معيّن:
# tone_color_converter = ToneColorConverter(f"{CONVERTER_CKPT}/config.json", device=device)
# tone_color_converter.load_ckpt(f"{CONVERTER_CKPT}/checkpoint.pth")


# -------- API Endpoint --------

@app.post("/tts")
async def tts_endpoint(
    text: str,
    speaker: str = "default",  # ممكن تجرب: default, cheerful, sad, angry ...
    speed: float = 1.0,
):
    """
    يحوّل النص إلى صوت WAV باستخدام OpenVoice.
    """
    out_path = "outputs/out.wav"

    # اللغة الافتراضية EN – لو هتكتب عربي ممكن يطلع لكن بجودة أقل
    language = "English"

    # دالة tts من BaseSpeakerTTS
    base_speaker_tts.tts(
        text=text,
        output_path=out_path,
        speaker=speaker,
        language=language,
        speed=speed,
    )

    audio_file = open(out_path, "rb")
    return StreamingResponse(audio_file, media_type="audio/wav")


# مهم جدًا لـ Hugging Face Spaces
if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)