Spaces:

EngrGullu
/

chatbot

Sleeping

App Files Files Community

EngrGullu commited on Dec 20, 2024

Commit

1935cdf

verified ·

1 Parent(s): d01926e

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from io import BytesIO
+import gradio as gr
+from gtts import gTTS
+from pydub import AudioSegment
+import whisper
+import openai  # Using OpenAI as a replacement for Groq
+ GROQ_API_KEY = "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"
+client = Groq(api_key=GROQ_API_KEY)
+# Initialize models
+whisper_model = whisper.load_model("base")  # Load Whisper model
+# Define the voice-to-voice workflow
+def voice_to_voice(audio):
+    # 1. Transcribe audio using Whisper
+    transcription_result = whisper_model.transcribe(audio, fp16=False)
+    user_input = transcription_result["text"]
+    # 2. Get response from OpenAI's GPT (Replacing Groq's LLM)
+    response = openai.ChatCompletion.create(
+        model="gpt-4",
+        messages=[{"role": "user", "content": user_input}],
+    )
+    response_text = response.choices[0].message["content"]
+    # 3. Convert LLM response to audio using gTTS
+    tts = gTTS(text=response_text, lang="en")
+    audio_fp = BytesIO()
+    tts.write_to_fp(audio_fp)
+    audio_fp.seek(0)
+    # Convert gTTS output to a playable format using pydub
+    audio_segment = AudioSegment.from_file(audio_fp, format="mp3")
+    output_fp = BytesIO()
+    audio_segment.export(output_fp, format="mp3")
+    output_fp.seek(0)
+    return response_text, output_fp
+# Gradio interface
+iface = gr.Interface(
+    fn=voice_to_voice,
+    inputs=gr.Audio(type="filepath"),  # Removed 'source' argument
+    outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
+    live=True,
+    title="Real-Time Voice-to-Voice Chatbot",
+    description="Speak into the microphone and get a spoken response from the chatbot.",
+)
+# Launch Gradio app
+iface.launch()