from fastapi import FastAPI, File, UploadFile from transformers import pipeline from gtts import gTTS import tempfile import os import uvicorn import subprocess app = FastAPI() # 🎤 Speech → Text (Whisper) — نجبره على العربية stt = pipeline( "automatic-speech-recognition", model="openai/whisper-tiny", generate_kwargs={"language": "arabic"} ) # 🧠 Text → Text (Chat) chat = pipeline( "text2text-generation", model="google/flan-t5-base" ) @app.get("/") def root(): return {"status": "ok"} @app.post("/voice") async def voice(file: UploadFile = File(...)): # 1️⃣ حفظ الملف الصوتي كما جاء (m4a / mp3 / wav) with tempfile.NamedTemporaryFile(delete=False) as f: f.write(await file.read()) audio_in = f.name # 2️⃣ تحويل أي صوت إلى WAV (حلّ مشاكل الهاتف) audio_wav = audio_in + ".wav" subprocess.run( ["ffmpeg", "-y", "-i", audio_in, audio_wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) # 3️⃣ Speech → Text text_in = stt(audio_wav)["text"] # 4️⃣ Chat response reply = chat(text_in, max_new_tokens=80)[0]["generated_text"] # 5️⃣ Text → Speech (عربي) audio_out = audio_in + "_reply.mp3" tts = gTTS(reply, lang="ar") tts.save(audio_out) return { "heard_text": text_in, "reply_text": reply, "audio_file": audio_out } if __name__ == "__main__": uvicorn.run( app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)) )