Spaces:

Engineer786
/

Voice-into-Voice_Chatbot

Sleeping

App Files Files Community

Engineer786 commited on Dec 15, 2024

Commit

dd57f2a

verified ·

1 Parent(s): 8dcf6d5

Create app.py

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+from gtts import gTTS
+from groq import Groq
+import whisper
+import tempfile
+import os
+# Initialize Groq client
+api="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"
+client = Groq(api_key=api)
+# Load the Whisper model locally
+whisper_model = whisper.load_model("base")  # Options: "tiny", "base", "small", "medium", "large"
+# Function to handle transcription, LLM response, and audio synthesis
+def voice_to_voice(audio_file):
+    try:
+        # 1. Transcribe the audio using the local Whisper model
+        result = whisper_model.transcribe(audio_file)
+        user_input = result["text"]
+        # 2. Interact with the LLM via Groq API
+        chat_completion = client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": user_input,
+                }
+            ],
+            model="llama3-8b-8192",
+            stream=False,
+        )
+        response_text = chat_completion.choices[0].message.content
+        # 3. Convert text response to speech using GTTS
+        tts = gTTS(response_text)
+        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+        tts.save(temp_audio_file.name)
+        return temp_audio_file.name, response_text
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+# Build Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
+    audio_input = gr.Audio(type="filepath", label="Speak Something")
+    audio_output = gr.Audio(label="Bot Response")
+    text_output = gr.Textbox(label="Transcription & Response")
+    btn = gr.Button("Process")
+    btn.click(voice_to_voice, inputs=audio_input, outputs=[audio_output, text_output])
+# Launch the interface
+demo.launch()