Spaces:

mohamedtsou
/

speech

Sleeping

File size: 1,617 Bytes

from fastapi import FastAPI, File, UploadFile
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
import uvicorn
import subprocess

app = FastAPI()

# 🎤 Speech → Text (Whisper) — نجبره على العربية
stt = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-tiny",
    generate_kwargs={"language": "arabic"}
)

# 🧠 Text → Text (Chat)
chat = pipeline(
    "text2text-generation",
    model="google/flan-t5-base"
)

@app.get("/")
def root():
    return {"status": "ok"}

@app.post("/voice")
async def voice(file: UploadFile = File(...)):
    # 1️⃣ حفظ الملف الصوتي كما جاء (m4a / mp3 / wav)
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(await file.read())
        audio_in = f.name

    # 2️⃣ تحويل أي صوت إلى WAV (حلّ مشاكل الهاتف)
    audio_wav = audio_in + ".wav"
    subprocess.run(
        ["ffmpeg", "-y", "-i", audio_in, audio_wav],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL
    )

    # 3️⃣ Speech → Text
    text_in = stt(audio_wav)["text"]

    # 4️⃣ Chat response
    reply = chat(text_in, max_new_tokens=80)[0]["generated_text"]

    # 5️⃣ Text → Speech (عربي)
    audio_out = audio_in + "_reply.mp3"
    tts = gTTS(reply, lang="ar")
    tts.save(audio_out)

    return {
        "heard_text": text_in,
        "reply_text": reply,
        "audio_file": audio_out
    }

if __name__ == "__main__":
    uvicorn.run(
        app,
        host="0.0.0.0",
        port=int(os.environ.get("PORT", 7860))
    )