Spaces:

alidw
/

voice

Sleeping

App Files Files Community

voice / app.py

alidw

Update app.py

0029fbb verified about 2 months ago

raw

history blame contribute delete

5.98 kB

	import base64
	from io import BytesIO

	import numpy as np
	import gradio as gr
	import soundfile as sf
	from fastapi import FastAPI, Request, HTTPException
	from fastapi.responses import StreamingResponse
	from tts_arabic import tts as arabic_tts

	# --------------------------
	# إعدادات عامة
	# --------------------------

	API_KEY = "nGHjs7oK8jp7OvxZ5dVZdY6JEf3DVwRF"
	SAMPLE_RATE = 22050


	# --------------------------
	# قلب الــ TTS (دالة أساسية)
	# --------------------------

	def tts_core(
	text: str,
	speaker: str,
	pace: float,
	denoise: float,
	volume: float,
	vowelizer: str,
	model_id: str,
	vocoder_id: str,
	):
	text = (text or "").strip()
	if not text:
	return None, "❌ الرجاء إدخال نص عربي."

	try:
	pace = float(pace)
	denoise = float(denoise)
	volume = float(volume)
	except ValueError:
	return None, "❌ قيم pace / denoise / volume غير صحيحة."

	vowelizer_arg = None if vowelizer == "بدون تشكيل" else vowelizer

	try:
	wave = arabic_tts(
	text,
	speaker=int(speaker),
	pace=pace,
	denoise=denoise,
	volume=volume,
	play=False,
	pitch_mul=1.0,
	pitch_add=0.0,
	vowelizer=vowelizer_arg,
	model_id=model_id,
	vocoder_id=vocoder_id,
	cuda=None,
	save_to=None,
	)

	if isinstance(wave, list):
	wave = np.array(wave, dtype=np.float32)
	elif isinstance(wave, np.ndarray):
	wave = wave.astype(np.float32)
	else:
	wave = np.array(wave, dtype=np.float32)

	if wave.size == 0:
	return None, "❌ الموجة الصوتية فارغة."

	max_abs = float(np.max(np.abs(wave)))
	if max_abs > 1.0:
	wave = wave / max_abs

	return (SAMPLE_RATE, wave), "✅ تم توليد الصوت بنجاح."

	except Exception as e:
	print("TTS ERROR:", repr(e))
	return None, f"❌ حدث خطأ أثناء التوليد: {e}"


	# --------------------------
	# دالة Gradio (تستدعي القلب)
	# --------------------------

	def gradio_generate_tts(
	text,
	speaker,
	pace,
	denoise,
	volume,
	vowelizer,
	model_id,
	vocoder_id,
	):
	return tts_core(text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id)


	# --------------------------
	# واجهة Gradio
	# --------------------------

	demo = gr.Interface(
	fn=gradio_generate_tts,
	inputs=[
	gr.Textbox(
	label="النص العربي",
	lines=4,
	placeholder="اكتب هنا الجملة أو الفقرة التي تريد تحويلها إلى صوت...",
	),
	gr.Dropdown(
	choices=["0", "1", "2", "3"],
	value="1",
	label="المتحدث (Speaker ID)",
	),
	gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="سرعة الكلام (pace)"),
	gr.Slider(0.0, 0.02, value=0.005, step=0.001, label="إزالة الضوضاء (denoise)"),
	gr.Slider(0.4, 1.0, value=0.9, step=0.05, label="مستوى الصوت (volume)"),
	gr.Dropdown(
	choices=["بدون تشكيل", "shakkelha", "catt_eo"],
	value="بدون تشكيل",
	label="تشكيل تلقائي للنص (Vowelizer)",
	),
	gr.Radio(
	choices=["fastpitch", "mixer128", "mixer80"],
	value="fastpitch",
	label="موديل Text→Mel (model_id)",
	),
	gr.Radio(
	choices=["hifigan", "vocos", "vocos44"],
	value="hifigan",
	label="Vocoder (vocoder_id)",
	),
	],
	outputs=[
	gr.Audio(type="numpy", label="الصوت الناتج"),
	gr.Textbox(label="الحالة", interactive=False),
	],
	title="Arabic TTS (ONNX / CPU)",
	description="نموذج tts_arabic لتحويل النص العربي إلى كلام على CPU.",
	)


	# --------------------------
	# تطبيق FastAPI + Endpoint /tts
	# --------------------------

	app = FastAPI()


	@app.post("/tts")
	async def tts_api(request: Request):
	"""
	POST /tts

	Headers:
	- x-api-key: ...
	- Content-Type: application/json

	Body JSON:
	{
	"text": "...",
	"speaker": "1",
	"pace": 1.0,
	"denoise": 0.005,
	"volume": 0.9,
	"vowelizer": "بدون تشكيل",
	"model_id": "fastpitch",
	"vocoder_id": "hifigan"
	}

	Response:
	- Binary audio/wav (StreamingResponse)
	"""
	key = request.headers.get("x-api-key")
	if key != API_KEY:
	raise HTTPException(status_code=401, detail="Invalid or missing API Key")

	body = await request.json()
	text = body.get("text", "")
	speaker = body.get("speaker", "1")
	pace = body.get("pace", 1.0)
	denoise = body.get("denoise", 0.005)
	volume = body.get("volume", 0.9)
	vowelizer = body.get("vowelizer", "بدون تشكيل")
	model_id = body.get("model_id", "fastpitch")
	vocoder_id = body.get("vocoder_id", "hifigan")

	audio, status = tts_core(
	text, speaker, pace, denoise, volume, vowelizer, model_id, vocoder_id
	)

	if audio is None:
	raise HTTPException(status_code=400, detail=status)

	sr, data = audio

	# ----- تحويل الـ numpy إلى ملف WAV في الذاكرة -----
	buffer = BytesIO()
	sf.write(buffer, data, sr, format="WAV")
	buffer.seek(0)

	headers = {
	"Content-Disposition": 'attachment; filename="tts.wav"'
	}

	# StreamingResponse يرجّع ملف صوتي حقيقي
	return StreamingResponse(buffer, media_type="audio/wav", headers=headers)


	# نركّب Gradio على الجذر "/"
	app = gr.mount_gradio_app(app, demo, path="/")


	if __name__ == "__main__":
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=7860)