Spaces:

mohamedtsou
/

speech

Sleeping

speech / app.py

Update app.py

a4f74bd verified 19 days ago

1.62 kB

	from fastapi import FastAPI, File, UploadFile
	from transformers import pipeline
	from gtts import gTTS
	import tempfile
	import os
	import uvicorn
	import subprocess

	app = FastAPI()

	# 🎤 Speech → Text (Whisper) — نجبره على العربية
	stt = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-tiny",
	generate_kwargs={"language": "arabic"}
	)

	# 🧠 Text → Text (Chat)
	chat = pipeline(
	"text2text-generation",
	model="google/flan-t5-base"
	)

	@app.get("/")
	def root():
	return {"status": "ok"}

	@app.post("/voice")
	async def voice(file: UploadFile = File(...)):
	# 1️⃣ حفظ الملف الصوتي كما جاء (m4a / mp3 / wav)
	with tempfile.NamedTemporaryFile(delete=False) as f:
	f.write(await file.read())
	audio_in = f.name

	# 2️⃣ تحويل أي صوت إلى WAV (حلّ مشاكل الهاتف)
	audio_wav = audio_in + ".wav"
	subprocess.run(
	["ffmpeg", "-y", "-i", audio_in, audio_wav],
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL
	)

	# 3️⃣ Speech → Text
	text_in = stt(audio_wav)["text"]

	# 4️⃣ Chat response
	reply = chat(text_in, max_new_tokens=80)[0]["generated_text"]

	# 5️⃣ Text → Speech (عربي)
	audio_out = audio_in + "_reply.mp3"
	tts = gTTS(reply, lang="ar")
	tts.save(audio_out)

	return {
	"heard_text": text_in,
	"reply_text": reply,
	"audio_file": audio_out
	}

	if __name__ == "__main__":
	uvicorn.run(
	app,
	host="0.0.0.0",
	port=int(os.environ.get("PORT", 7860))
	)