Spaces:

ibrahim313
/

AudioToAudioWithAi

Build error

App Files Files Community

ibrahim313 commited on Aug 18, 2024

Commit

19ab03c

verified ·

1 Parent(s): 56af164

Create app.py

Browse files

Files changed (1) hide show

app.py +129 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import tempfile
+import whisper
+import gradio as gr
+from gtts import gTTS
+from groq import Groq
+# Set up Groq API key
+os.environ['GROQ_API_KEY'] = 'your_groq_api_key_here'  # Replace with your actual Groq API key
+groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
+# Load Whisper model
+whisper_model = whisper.load_model("base")
+def process_audio(audio_file):
+    try:
+        # Transcribe audio using Whisper
+        result = whisper_model.transcribe(audio_file)
+        user_text = result['text']
+        # Generate response using Llama 8b model with Groq API
+        chat_completion = groq_client.chat.completions.create(
+            messages=[
+                {
+                    "role": "user",
+                    "content": user_text,
+                }
+            ],
+            model="llama3-8b-8192",
+        )
+        response_text = chat_completion.choices[0].message.content
+        # Convert response text to speech using gTTS
+        tts = gTTS(text=response_text, lang='en')
+        audio_file_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3').name
+        tts.save(audio_file_path)
+        return response_text, audio_file_path
+    except Exception as e:
+        return str(e), None
+# Define custom CSS for the Gradio interface
+css = """
+/* General body styling */
+body {
+    background: #f0f2f5; /* Light background for the app */
+    font-family: 'Arial', sans-serif;
+    color: #333;
+}
+/* Container styling */
+.gradio-container {
+    background: linear-gradient(135deg, #6e45e2, #88d3ce);
+    border-radius: 15px;
+    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.1);
+    padding: 20px;
+    margin: auto; /* Centering container */
+    width: 80%; /* Adjusted width for responsiveness */
+    max-width: 700px; /* Max width for larger screens */
+}
+/* Title styling */
+.gradio-title {
+    color: #0056b3; /* Deep blue color for the title */
+    text-align: center;
+    margin-bottom: 20px;
+    font-size: 28px;
+    font-weight: bold;
+}
+/* Input and output styling */
+.gradio-input, .gradio-output {
+    border-radius: 10px;
+    box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.15);
+}
+/* Textbox styling */
+.gradio-textbox {
+    border: 2px solid #6e45e2;
+    background: #fff;
+    color: #333;
+    padding: 10px;
+    font-size: 16px;
+}
+/* Number component styling */
+.gradio-number {
+    border: 2px solid #6e45e2;
+    background: #fff;
+    color: #333;
+    border-radius: 8px;
+    padding: 10px;
+}
+/* Button styling */
+.gradio-button {
+    background: #6e45e2; /* Deep blue color for button */
+    color: #fff;
+    border: none;
+    border-radius: 8px;
+    padding: 12px 24px;
+    font-size: 18px;
+    cursor: pointer;
+    transition: background 0.3s, transform 0.3s;
+}
+.gradio-button:hover {
+    background: #5a3d9c; /* Darker blue on hover */
+    transform: scale(1.05); /* Slightly enlarge on hover */
+}
+"""
+# Create Gradio interface with custom CSS
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
+    outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
+    live=True,
+    css=css,
+    title="Audio Transcription and Response Generator",
+    description="Upload an audio file to get a transcription and a response generated by the Llama 8b model.",
+    article="<h3>How to Use:</h3><ul><li>Upload an audio file.</li><li>Receive a transcribed text and response.</li></ul>"
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()