Spaces:

UmerSajid
/

AudioBasedResponse

Sleeping

App Files Files Community

UmerSajid commited on Dec 16, 2024

Commit

9cef7ee

verified ·

1 Parent(s): ae97343

Create app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import whisper
+import gradio as gr
+from gtts import gTTS
+from groq import Groq
+# Set up Groq client (replace with your API key)
+client = Groq(api_key=os.environ.get("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"))
+# Load Whisper model
+whisper_model = whisper.load_model("base")
+# Function to process audio input
+def process_audio_realtime(audio_file):
+    """
+    Real-time processing of audio.
+    1. Transcribe audio with Whisper.
+    2. Process transcription using Llama.
+    3. Convert Llama output to audio using gTTS.
+    """
+    # Step 1: Transcribe the audio to text using Whisper
+    transcription = whisper_model.transcribe(audio_file)["text"]
+    # Step 2: Process transcription using Llama model via Groq API
+    llama_response = client.chat.completions.create(
+        messages=[{"role": "user", "content": transcription}],
+        model="llama3-8b-8192",  # Replace with your actual Llama model name
+        stream=False
+    ).choices[0].message.content
+    # Step 3: Convert Llama response to audio using gTTS
+    tts = gTTS(text=llama_response, lang="en")
+    audio_output_path = "generated_output.mp3"
+    tts.save(audio_output_path)
+    return llama_response, audio_output_path
+# Create Gradio interface for real-time simulation
+interface = gr.Interface(
+    fn=process_audio_realtime,
+    inputs=gr.Audio(type="filepath", label="Input Audio"),  # Removed `source` argument
+    outputs=[
+        gr.Textbox(label="Processed Text"),  # Display processed text in real-time
+        gr.Audio(type="filepath", label="Generated Audio")  # Output audio
+    ],
+    live=True,  # Enable real-time behavior
+    title="Real-Time Audio-to-Audio Application"
+)
+# Launch Gradio app
+interface.launch()