Spaces:

alidw
/

voice

Sleeping

File size: 5,982 Bytes

4959c3c
 
 
4a37de7
83c95ae
4959c3c
83c95ae
0029fbb
4a37de7
 
83c95ae
 
 
c6b5946
83c95ae
4a37de7
 
8dcfafa
83c95ae
 
 
 
 
8dcfafa
 
 
 
 
 
 
 
4a37de7
 
 
 
 
8dcfafa
 
 
 
 
 
 
83c95ae
4a37de7
 
 
 
83c95ae
 
 
 
 
8dcfafa
 
83c95ae
 
 
 
 
4a37de7
 
 
 
8dcfafa
 
 
 
 
83c95ae
 
 
 
8dcfafa
 
4a37de7
 
c6b5946
4a37de7
8dcfafa
4a37de7
 
 
83c95ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694dc03
c6b5946
83c95ae
c6b5946
8dcfafa
 
 
 
 
 
 
 
 
 
83c95ae
 
 
8dcfafa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6b5946
 
83c95ae
 
c6b5946
 
83c95ae
c6b5946
 
83c95ae
 
 
 
 
 
 
 
 
 
 
 
0029fbb
83c95ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4959c3c
 
0029fbb
83c95ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4959c3c
0029fbb
4959c3c
 
0029fbb
4959c3c
0029fbb
 
83c95ae
 
0029fbb
 
 
83c95ae
 
 
 
 
4a37de7
83c95ae

import base64
from io import BytesIO

import numpy as np
import gradio as gr
import soundfile as sf
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse
from tts_arabic import tts as arabic_tts

# --------------------------
# إعدادات عامة
# --------------------------

API_KEY = "nGHjs7oK8jp7OvxZ5dVZdY6JEf3DVwRF"
SAMPLE_RATE = 22050


# --------------------------
# قلب الــ TTS (دالة أساسية)
# --------------------------

def tts_core(
    text: str,
    speaker: str,
    pace: float,
    denoise: float,
    volume: float,
    vowelizer: str,
    model_id: str,
    vocoder_id: str,
):
    text = (text or "").strip()
    if not text:
        return None, "❌ الرجاء إدخال نص عربي."

    try:
        pace = float(pace)
        denoise = float(denoise)
        volume = float(volume)
    except ValueError:
        return None, "❌ قيم pace / denoise / volume غير صحيحة."

    vowelizer_arg = None if vowelizer == "بدون تشكيل" else vowelizer

    try:
        wave = arabic_tts(
            text,
            speaker=int(speaker),
            pace=pace,
            denoise=denoise,
            volume=volume,
            play=False,
            pitch_mul=1.0,
            pitch_add=0.0,
            vowelizer=vowelizer_arg,
            model_id=model_id,
            vocoder_id=vocoder_id,
            cuda=None,
            save_to=None,
        )

        if isinstance(wave, list):
            wave = np.array(wave, dtype=np.float32)
        elif isinstance(wave, np.ndarray):
            wave = wave.astype(np.float32)
        else:
            wave = np.array(wave, dtype=np.float32)

        if wave.size == 0:
            return None, "❌ الموجة الصوتية فارغة."

        max_abs = float(np.max(np.abs(wave)))
        if max_abs > 1.0:
            wave = wave / max_abs

        return (SAMPLE_RATE, wave), "✅ تم توليد الصوت بنجاح."

    except Exception as e:
        print("TTS ERROR:", repr(e))
        return None, f"❌ حدث خطأ أثناء التوليد: {e}"


# --------------------------
# دالة Gradio (تستدعي القلب)
# --------------------------

def gradio_generate_tts(
    text,
    speaker,
    pace,
    denoise,
    volume,
    vowelizer,
    model_id,
    vocoder_id,
):
    return tts_core(text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id)


# --------------------------
# واجهة Gradio
# --------------------------

demo = gr.Interface(
    fn=gradio_generate_tts,
    inputs=[
        gr.Textbox(
            label="النص العربي",
            lines=4,
            placeholder="اكتب هنا الجملة أو الفقرة التي تريد تحويلها إلى صوت...",
        ),
        gr.Dropdown(
            choices=["0", "1", "2", "3"],
            value="1",
            label="المتحدث (Speaker ID)",
        ),
        gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="سرعة الكلام (pace)"),
        gr.Slider(0.0, 0.02, value=0.005, step=0.001, label="إزالة الضوضاء (denoise)"),
        gr.Slider(0.4, 1.0, value=0.9, step=0.05, label="مستوى الصوت (volume)"),
        gr.Dropdown(
            choices=["بدون تشكيل", "shakkelha", "catt_eo"],
            value="بدون تشكيل",
            label="تشكيل تلقائي للنص (Vowelizer)",
        ),
        gr.Radio(
            choices=["fastpitch", "mixer128", "mixer80"],
            value="fastpitch",
            label="موديل Text→Mel (model_id)",
        ),
        gr.Radio(
            choices=["hifigan", "vocos", "vocos44"],
            value="hifigan",
            label="Vocoder (vocoder_id)",
        ),
    ],
    outputs=[
        gr.Audio(type="numpy", label="الصوت الناتج"),
        gr.Textbox(label="الحالة", interactive=False),
    ],
    title="Arabic TTS (ONNX / CPU)",
    description="نموذج tts_arabic لتحويل النص العربي إلى كلام على CPU.",
)


# --------------------------
# تطبيق FastAPI + Endpoint /tts
# --------------------------

app = FastAPI()


@app.post("/tts")
async def tts_api(request: Request):
    """
    POST /tts

    Headers:
      - x-api-key: ...
      - Content-Type: application/json

    Body JSON:
    {
      "text": "...",
      "speaker": "1",
      "pace": 1.0,
      "denoise": 0.005,
      "volume": 0.9,
      "vowelizer": "بدون تشكيل",
      "model_id": "fastpitch",
      "vocoder_id": "hifigan"
    }

    Response:
      - Binary audio/wav (StreamingResponse)
    """
    key = request.headers.get("x-api-key")
    if key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid or missing API Key")

    body = await request.json()
    text = body.get("text", "")
    speaker = body.get("speaker", "1")
    pace = body.get("pace", 1.0)
    denoise = body.get("denoise", 0.005)
    volume = body.get("volume", 0.9)
    vowelizer = body.get("vowelizer", "بدون تشكيل")
    model_id = body.get("model_id", "fastpitch")
    vocoder_id = body.get("vocoder_id", "hifigan")

    audio, status = tts_core(
        text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id
    )

    if audio is None:
        raise HTTPException(status_code=400, detail=status)

    sr, data = audio

    # ----- تحويل الـ numpy إلى ملف WAV في الذاكرة -----
    buffer = BytesIO()
    sf.write(buffer, data, sr, format="WAV")
    buffer.seek(0)

    headers = {
        "Content-Disposition": 'attachment; filename="tts.wav"'
    }

    # StreamingResponse يرجّع ملف صوتي حقيقي
    return StreamingResponse(buffer, media_type="audio/wav", headers=headers)


# نركّب Gradio على الجذر "/"
app = gr.mount_gradio_app(app, demo, path="/")


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)