|
|
import base64 |
|
|
from io import BytesIO |
|
|
|
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
import soundfile as sf |
|
|
from fastapi import FastAPI, Request, HTTPException |
|
|
from fastapi.responses import StreamingResponse |
|
|
from tts_arabic import tts as arabic_tts |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
API_KEY = "nGHjs7oK8jp7OvxZ5dVZdY6JEf3DVwRF" |
|
|
SAMPLE_RATE = 22050 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tts_core( |
|
|
text: str, |
|
|
speaker: str, |
|
|
pace: float, |
|
|
denoise: float, |
|
|
volume: float, |
|
|
vowelizer: str, |
|
|
model_id: str, |
|
|
vocoder_id: str, |
|
|
): |
|
|
text = (text or "").strip() |
|
|
if not text: |
|
|
return None, "❌ الرجاء إدخال نص عربي." |
|
|
|
|
|
try: |
|
|
pace = float(pace) |
|
|
denoise = float(denoise) |
|
|
volume = float(volume) |
|
|
except ValueError: |
|
|
return None, "❌ قيم pace / denoise / volume غير صحيحة." |
|
|
|
|
|
vowelizer_arg = None if vowelizer == "بدون تشكيل" else vowelizer |
|
|
|
|
|
try: |
|
|
wave = arabic_tts( |
|
|
text, |
|
|
speaker=int(speaker), |
|
|
pace=pace, |
|
|
denoise=denoise, |
|
|
volume=volume, |
|
|
play=False, |
|
|
pitch_mul=1.0, |
|
|
pitch_add=0.0, |
|
|
vowelizer=vowelizer_arg, |
|
|
model_id=model_id, |
|
|
vocoder_id=vocoder_id, |
|
|
cuda=None, |
|
|
save_to=None, |
|
|
) |
|
|
|
|
|
if isinstance(wave, list): |
|
|
wave = np.array(wave, dtype=np.float32) |
|
|
elif isinstance(wave, np.ndarray): |
|
|
wave = wave.astype(np.float32) |
|
|
else: |
|
|
wave = np.array(wave, dtype=np.float32) |
|
|
|
|
|
if wave.size == 0: |
|
|
return None, "❌ الموجة الصوتية فارغة." |
|
|
|
|
|
max_abs = float(np.max(np.abs(wave))) |
|
|
if max_abs > 1.0: |
|
|
wave = wave / max_abs |
|
|
|
|
|
return (SAMPLE_RATE, wave), "✅ تم توليد الصوت بنجاح." |
|
|
|
|
|
except Exception as e: |
|
|
print("TTS ERROR:", repr(e)) |
|
|
return None, f"❌ حدث خطأ أثناء التوليد: {e}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def gradio_generate_tts( |
|
|
text, |
|
|
speaker, |
|
|
pace, |
|
|
denoise, |
|
|
volume, |
|
|
vowelizer, |
|
|
model_id, |
|
|
vocoder_id, |
|
|
): |
|
|
return tts_core(text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=gradio_generate_tts, |
|
|
inputs=[ |
|
|
gr.Textbox( |
|
|
label="النص العربي", |
|
|
lines=4, |
|
|
placeholder="اكتب هنا الجملة أو الفقرة التي تريد تحويلها إلى صوت...", |
|
|
), |
|
|
gr.Dropdown( |
|
|
choices=["0", "1", "2", "3"], |
|
|
value="1", |
|
|
label="المتحدث (Speaker ID)", |
|
|
), |
|
|
gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="سرعة الكلام (pace)"), |
|
|
gr.Slider(0.0, 0.02, value=0.005, step=0.001, label="إزالة الضوضاء (denoise)"), |
|
|
gr.Slider(0.4, 1.0, value=0.9, step=0.05, label="مستوى الصوت (volume)"), |
|
|
gr.Dropdown( |
|
|
choices=["بدون تشكيل", "shakkelha", "catt_eo"], |
|
|
value="بدون تشكيل", |
|
|
label="تشكيل تلقائي للنص (Vowelizer)", |
|
|
), |
|
|
gr.Radio( |
|
|
choices=["fastpitch", "mixer128", "mixer80"], |
|
|
value="fastpitch", |
|
|
label="موديل Text→Mel (model_id)", |
|
|
), |
|
|
gr.Radio( |
|
|
choices=["hifigan", "vocos", "vocos44"], |
|
|
value="hifigan", |
|
|
label="Vocoder (vocoder_id)", |
|
|
), |
|
|
], |
|
|
outputs=[ |
|
|
gr.Audio(type="numpy", label="الصوت الناتج"), |
|
|
gr.Textbox(label="الحالة", interactive=False), |
|
|
], |
|
|
title="Arabic TTS (ONNX / CPU)", |
|
|
description="نموذج tts_arabic لتحويل النص العربي إلى كلام على CPU.", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
@app.post("/tts") |
|
|
async def tts_api(request: Request): |
|
|
""" |
|
|
POST /tts |
|
|
|
|
|
Headers: |
|
|
- x-api-key: ... |
|
|
- Content-Type: application/json |
|
|
|
|
|
Body JSON: |
|
|
{ |
|
|
"text": "...", |
|
|
"speaker": "1", |
|
|
"pace": 1.0, |
|
|
"denoise": 0.005, |
|
|
"volume": 0.9, |
|
|
"vowelizer": "بدون تشكيل", |
|
|
"model_id": "fastpitch", |
|
|
"vocoder_id": "hifigan" |
|
|
} |
|
|
|
|
|
Response: |
|
|
- Binary audio/wav (StreamingResponse) |
|
|
""" |
|
|
key = request.headers.get("x-api-key") |
|
|
if key != API_KEY: |
|
|
raise HTTPException(status_code=401, detail="Invalid or missing API Key") |
|
|
|
|
|
body = await request.json() |
|
|
text = body.get("text", "") |
|
|
speaker = body.get("speaker", "1") |
|
|
pace = body.get("pace", 1.0) |
|
|
denoise = body.get("denoise", 0.005) |
|
|
volume = body.get("volume", 0.9) |
|
|
vowelizer = body.get("vowelizer", "بدون تشكيل") |
|
|
model_id = body.get("model_id", "fastpitch") |
|
|
vocoder_id = body.get("vocoder_id", "hifigan") |
|
|
|
|
|
audio, status = tts_core( |
|
|
text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id |
|
|
) |
|
|
|
|
|
if audio is None: |
|
|
raise HTTPException(status_code=400, detail=status) |
|
|
|
|
|
sr, data = audio |
|
|
|
|
|
|
|
|
buffer = BytesIO() |
|
|
sf.write(buffer, data, sr, format="WAV") |
|
|
buffer.seek(0) |
|
|
|
|
|
headers = { |
|
|
"Content-Disposition": 'attachment; filename="tts.wav"' |
|
|
} |
|
|
|
|
|
|
|
|
return StreamingResponse(buffer, media_type="audio/wav", headers=headers) |
|
|
|
|
|
|
|
|
|
|
|
app = gr.mount_gradio_app(app, demo, path="/") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
|
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|