voice / app.py
alidw's picture
Update app.py
0029fbb verified
import base64
from io import BytesIO
import numpy as np
import gradio as gr
import soundfile as sf
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse
from tts_arabic import tts as arabic_tts
# --------------------------
# إعدادات عامة
# --------------------------
API_KEY = "nGHjs7oK8jp7OvxZ5dVZdY6JEf3DVwRF"
SAMPLE_RATE = 22050
# --------------------------
# قلب الــ TTS (دالة أساسية)
# --------------------------
def tts_core(
text: str,
speaker: str,
pace: float,
denoise: float,
volume: float,
vowelizer: str,
model_id: str,
vocoder_id: str,
):
text = (text or "").strip()
if not text:
return None, "❌ الرجاء إدخال نص عربي."
try:
pace = float(pace)
denoise = float(denoise)
volume = float(volume)
except ValueError:
return None, "❌ قيم pace / denoise / volume غير صحيحة."
vowelizer_arg = None if vowelizer == "بدون تشكيل" else vowelizer
try:
wave = arabic_tts(
text,
speaker=int(speaker),
pace=pace,
denoise=denoise,
volume=volume,
play=False,
pitch_mul=1.0,
pitch_add=0.0,
vowelizer=vowelizer_arg,
model_id=model_id,
vocoder_id=vocoder_id,
cuda=None,
save_to=None,
)
if isinstance(wave, list):
wave = np.array(wave, dtype=np.float32)
elif isinstance(wave, np.ndarray):
wave = wave.astype(np.float32)
else:
wave = np.array(wave, dtype=np.float32)
if wave.size == 0:
return None, "❌ الموجة الصوتية فارغة."
max_abs = float(np.max(np.abs(wave)))
if max_abs > 1.0:
wave = wave / max_abs
return (SAMPLE_RATE, wave), "✅ تم توليد الصوت بنجاح."
except Exception as e:
print("TTS ERROR:", repr(e))
return None, f"❌ حدث خطأ أثناء التوليد: {e}"
# --------------------------
# دالة Gradio (تستدعي القلب)
# --------------------------
def gradio_generate_tts(
text,
speaker,
pace,
denoise,
volume,
vowelizer,
model_id,
vocoder_id,
):
return tts_core(text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id)
# --------------------------
# واجهة Gradio
# --------------------------
demo = gr.Interface(
fn=gradio_generate_tts,
inputs=[
gr.Textbox(
label="النص العربي",
lines=4,
placeholder="اكتب هنا الجملة أو الفقرة التي تريد تحويلها إلى صوت...",
),
gr.Dropdown(
choices=["0", "1", "2", "3"],
value="1",
label="المتحدث (Speaker ID)",
),
gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="سرعة الكلام (pace)"),
gr.Slider(0.0, 0.02, value=0.005, step=0.001, label="إزالة الضوضاء (denoise)"),
gr.Slider(0.4, 1.0, value=0.9, step=0.05, label="مستوى الصوت (volume)"),
gr.Dropdown(
choices=["بدون تشكيل", "shakkelha", "catt_eo"],
value="بدون تشكيل",
label="تشكيل تلقائي للنص (Vowelizer)",
),
gr.Radio(
choices=["fastpitch", "mixer128", "mixer80"],
value="fastpitch",
label="موديل Text→Mel (model_id)",
),
gr.Radio(
choices=["hifigan", "vocos", "vocos44"],
value="hifigan",
label="Vocoder (vocoder_id)",
),
],
outputs=[
gr.Audio(type="numpy", label="الصوت الناتج"),
gr.Textbox(label="الحالة", interactive=False),
],
title="Arabic TTS (ONNX / CPU)",
description="نموذج tts_arabic لتحويل النص العربي إلى كلام على CPU.",
)
# --------------------------
# تطبيق FastAPI + Endpoint /tts
# --------------------------
app = FastAPI()
@app.post("/tts")
async def tts_api(request: Request):
"""
POST /tts
Headers:
- x-api-key: ...
- Content-Type: application/json
Body JSON:
{
"text": "...",
"speaker": "1",
"pace": 1.0,
"denoise": 0.005,
"volume": 0.9,
"vowelizer": "بدون تشكيل",
"model_id": "fastpitch",
"vocoder_id": "hifigan"
}
Response:
- Binary audio/wav (StreamingResponse)
"""
key = request.headers.get("x-api-key")
if key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid or missing API Key")
body = await request.json()
text = body.get("text", "")
speaker = body.get("speaker", "1")
pace = body.get("pace", 1.0)
denoise = body.get("denoise", 0.005)
volume = body.get("volume", 0.9)
vowelizer = body.get("vowelizer", "بدون تشكيل")
model_id = body.get("model_id", "fastpitch")
vocoder_id = body.get("vocoder_id", "hifigan")
audio, status = tts_core(
text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id
)
if audio is None:
raise HTTPException(status_code=400, detail=status)
sr, data = audio
# ----- تحويل الـ numpy إلى ملف WAV في الذاكرة -----
buffer = BytesIO()
sf.write(buffer, data, sr, format="WAV")
buffer.seek(0)
headers = {
"Content-Disposition": 'attachment; filename="tts.wav"'
}
# StreamingResponse يرجّع ملف صوتي حقيقي
return StreamingResponse(buffer, media_type="audio/wav", headers=headers)
# نركّب Gradio على الجذر "/"
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)