Spaces:

mgokg
/

voicebot

Running

voicebot / app.py

Update app.py

1496089 verified about 1 year ago

1.6 kB

	import gradio as gr
	import torch
	from transformers import pipeline


	# Laden der Modelle (einmalig beim Start)
	device = "cuda" if torch.cuda.is_available() else "cpu"

	speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", device=device)
	#text_to_speech = pipeline("text-to-speech", model="facebook/seamless-streaming", device=device)

	def audio_to_audio_chatbot(audio):
	if audio is None:
	return None, "Bitte eine Audio-Datei hochladen."

	# 1. Speech-to-Text
	text = speech_to_text(audio)["text"]
	return text
	print(f"User: {text}")

	# 2. Text-to-Text (Hier wird ein einfacher Echo-Bot verwendet, kann durch ein komplexeres Modell ersetzt werden)
	response_text = f"Du hast gesagt: {text}"
	print(f"Bot: {response_text}")

	# 3. Text-to-Speech
	#speech = text_to_speech(response_text)
	#return speech["audio"], response_text

	#if __name__ == "__main__":
	"""
	iface = gr.Interface(
	fn=audio_to_audio_chatbot,
	inputs=gr.Audio(type="filepath"),
	outputs= gr.Textbox(),
	#outputs=[gr.Audio(), gr.Textbox()],
	title="Audio-zu-Audio-Chatbot (Streaming)",
	description="Spreche in das Mikrofon und der Bot antwortet mit einer Audio-Ausgabe.",
	live=True # Aktiviert Streaming
	)
	"""
	with gr.Blocks() as speech:
	with gr.Row():
	sr_outputs = gr.Textbox(label="Antwort")
	with gr.Row():
	sr_inputs = gr.Microphone(type="filepath")
	sr_inputs.change(audio_to_audio_chatbot, inputs=sr_inputs, outputs=sr_outputs)

	speech.launch()