| | import gradio as gr |
| | import torch |
| | from transformers import pipeline |
| |
|
| |
|
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| | speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo", device=device) |
| | |
| |
|
| | def audio_to_audio_chatbot(audio): |
| | if audio is None: |
| | return None, "Bitte eine Audio-Datei hochladen." |
| |
|
| | |
| | text = speech_to_text(audio)["text"] |
| | return text |
| | print(f"User: {text}") |
| |
|
| | |
| | response_text = f"Du hast gesagt: {text}" |
| | print(f"Bot: {response_text}") |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | """ |
| | iface = gr.Interface( |
| | fn=audio_to_audio_chatbot, |
| | inputs=gr.Audio(type="filepath"), |
| | outputs= gr.Textbox(), |
| | #outputs=[gr.Audio(), gr.Textbox()], |
| | title="Audio-zu-Audio-Chatbot (Streaming)", |
| | description="Spreche in das Mikrofon und der Bot antwortet mit einer Audio-Ausgabe.", |
| | live=True # Aktiviert Streaming |
| | ) |
| | """ |
| | with gr.Blocks() as speech: |
| | with gr.Row(): |
| | sr_outputs = gr.Textbox(label="Antwort") |
| | with gr.Row(): |
| | sr_inputs = gr.Microphone(type="filepath") |
| | sr_inputs.change(audio_to_audio_chatbot, inputs=sr_inputs, outputs=sr_outputs) |
| |
|
| | speech.launch() |
| |
|
| |
|