Abbas0786 commited on
Commit
6f18da0
·
verified ·
1 Parent(s): 1b5e193

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py CHANGED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
+ import io
6
+ from groq import Groq
7
+
8
+ GROQ_API_KEY = "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"
9
+
10
+ # Initialize the Groq client
11
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
12
+
13
+ # Load the Whisper model
14
+ model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
15
+
16
+ def process_audio(file_path):
17
+ try:
18
+ # Load the audio file
19
+ audio = whisper.load_audio(file_path)
20
+
21
+ # Transcribe the audio using Whisper
22
+ result = model.transcribe(audio)
23
+ text = result["text"]
24
+
25
+ # Generate a response using Groq
26
+ chat_completion = client.chat.completions.create(
27
+ messages=[{"role": "user", "content": text}],
28
+ model="llama3-8b-8192", # Replace with the correct model if necessary
29
+ )
30
+
31
+ # Access the response using dot notation
32
+ response_message = chat_completion.choices[0].message.content.strip()
33
+
34
+ # Convert the response text to speech
35
+ tts = gTTS(response_message)
36
+ response_audio_io = io.BytesIO()
37
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
38
+ response_audio_io.seek(0)
39
+
40
+ # Save audio to a file to ensure it's generated correctly
41
+ with open("response.mp3", "wb") as audio_file:
42
+ audio_file.write(response_audio_io.getvalue())
43
+
44
+ # Return the response text and the path to the saved audio file
45
+ return response_message, "response.mp3"
46
+
47
+ except Exception as e:
48
+ return f"An error occurred: {e}", None
49
+
50
+ iface = gr.Interface(
51
+ fn=process_audio,
52
+ inputs=gr.Audio(type="filepath"), # Use type="filepath"
53
+ outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
54
+ live=True
55
+ )
56
+
57
+ iface.launch()