Spaces:

mohamedtsou
/

speech

Sleeping

mohamedtsou commited on Jan 29

Commit

a4f74bd

verified ·

1 Parent(s): 9224c83

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,21 @@
 from fastapi import FastAPI, File, UploadFile
 from transformers import pipeline
 from gtts import gTTS
-import tempfile, os, uvicorn
 app = FastAPI()
-# 🎤 صوت → نص (عربي)
 stt = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-tiny",
     generate_kwargs={"language": "arabic"}
 )
-# 🧠 نص → جواب
 chat = pipeline(
     "text2text-generation",
     model="google/flan-t5-base"
@@ -24,19 +27,27 @@ def root():
 @app.post("/voice")
 async def voice(file: UploadFile = File(...)):
-    # حفظ الصوت
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
         f.write(await file.read())
         audio_in = f.name
-    # 1) STT
-    text_in = stt(audio_in)["text"]
-    # 2) Chat
     reply = chat(text_in, max_new_tokens=80)[0]["generated_text"]
-    # 3) TTS عربي (gTTS)
-    audio_out = audio_in.replace(".wav", "_reply.mp3")
     tts = gTTS(reply, lang="ar")
     tts.save(audio_out)
@@ -47,5 +58,8 @@ async def voice(file: UploadFile = File(...)):
     }
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0",
-                port=int(os.environ.get("PORT", 7860)))

 from fastapi import FastAPI, File, UploadFile
 from transformers import pipeline
 from gtts import gTTS
+import tempfile
+import os
+import uvicorn
+import subprocess
 app = FastAPI()
+# 🎤 Speech → Text (Whisper) — نجبره على العربية
 stt = pipeline(
     "automatic-speech-recognition",
     model="openai/whisper-tiny",
     generate_kwargs={"language": "arabic"}
 )
+# 🧠 Text → Text (Chat)
 chat = pipeline(
     "text2text-generation",
     model="google/flan-t5-base"
 @app.post("/voice")
 async def voice(file: UploadFile = File(...)):
+    # 1️⃣ حفظ الملف الصوتي كما جاء (m4a / mp3 / wav)
+    with tempfile.NamedTemporaryFile(delete=False) as f:
         f.write(await file.read())
         audio_in = f.name
+    # 2️⃣ تحويل أي صوت إلى WAV (حلّ مشاكل الهاتف)
+    audio_wav = audio_in + ".wav"
+    subprocess.run(
+        ["ffmpeg", "-y", "-i", audio_in, audio_wav],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL
+    )
+    # 3️⃣ Speech → Text
+    text_in = stt(audio_wav)["text"]
+    # 4️⃣ Chat response
     reply = chat(text_in, max_new_tokens=80)[0]["generated_text"]
+    # 5️⃣ Text → Speech (عربي)
+    audio_out = audio_in + "_reply.mp3"
     tts = gTTS(reply, lang="ar")
     tts.save(audio_out)
     }
 if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=int(os.environ.get("PORT", 7860))
+    )