Spaces:

LonewolfT141
/

Text_To_Speech_Model

Sleeping

App Files Files Community

Text_To_Speech_Model / app.py

LonewolfT141

Create app.py

1166ec4 verified 11 months ago

raw

history blame contribute delete

2.7 kB



	# ==================================
	# 2) IMPORT LIBRARIES
	# ==================================
	import gradio as gr
	import whisper
	import tempfile
	from zyphra import ZyphraClient # Assumes the Zyphra package provides this client

	# ==================================
	# 3) LOAD WHISPER MODEL
	# ==================================
	model = whisper.load_model("base")

	# ==================================
	# 4) DEFINE PROCESSING FUNCTION
	# ==================================
	def process_media(media_file):
	"""
	This function:
	- Transcribes and translates the uploaded audio/video into English using Whisper.
	- Uses ZyphraClient (synchronous) to convert the English text to speech.
	- Returns both the synthesized audio and the English subtitles.
	"""
	try:
	# Transcribe and translate the media into English
	result = model.transcribe(media_file, task="translate")
	english_transcription = result["text"]

	# ==================================
	# Zyphra TTS API CALL using ZyphraClient
	# ==================================
	api_key = "zsk-c8741b6d61d76f872442699c84ed180e98f43b2b2cf4ed8f8c8da72c70fcfbb3"
	with ZyphraClient(api_key=api_key) as client:
	# Get audio bytes for the given text; adjust speaking_rate if desired.
	audio_data = client.audio.speech.create(
	text=english_transcription,
	speaking_rate=15
	)

	# Write the returned audio data to a temporary file
	temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	temp_audio.write(audio_data)
	temp_audio.close()
	synthesized_audio = temp_audio.name

	return synthesized_audio, english_transcription

	except Exception as e:
	print("Error during processing:", e)
	return None, f"Error: {str(e)}"

	# ==================================
	# 5) BUILD GRADIO INTERFACE
	# ==================================
	interface = gr.Interface(
	fn=process_media,
	inputs=gr.File(label="Upload Audio or Video", file_types=["audio", "video"]),
	outputs=[
	gr.Audio(type="filepath", label="Synthesized English Audio"),
	gr.Textbox(label="English Subtitles")
	],
	title="Multilingual Media to English TTS Pipeline (Zyphra)",
	description=(
	"Upload an audio or video file in any language. The file is transcribed and translated into "
	"English using Whisper, then converted to speech via the Zyphra TTS service using ZyphraClient."
	)
	)

	# ==================================
	# 6) LAUNCH THE APP
	# ==================================
	interface.launch(debug=True)