Abbas133 commited on
Commit
0fdfaad
·
verified ·
1 Parent(s): c25b47c

Update App.py

Browse files
Files changed (1) hide show
  1. App.py +58 -0
App.py CHANGED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
+ import io
6
+ from groq import Groq
7
+
8
+ # Init the Groq API Key
9
+ GROQ_API_KEY = "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"
10
+
11
+ # Initialize the Groq client
12
+ client = Groq(api_key=GROQ_API_KEY)
13
+
14
+ # Load the Whisper model
15
+ model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
16
+
17
+ def process_audio(file_path):
18
+ try:
19
+ # Load the audio file
20
+ audio = whisper.load_audio(file_path)
21
+
22
+ # Transcribe the audio using Whisper
23
+ result = model.transcribe(audio)
24
+ text = result["text"]
25
+
26
+ # Generate a response using Groq
27
+ chat_completion = client.chat.completions.create(
28
+ messages=[{"role": "user", "content": text}],
29
+ model="llama3-8b-8192", # Replace with the correct model if necessary
30
+ )
31
+
32
+ # Access the response using dot notation
33
+ response_message = chat_completion.choices[0].message.content.strip()
34
+
35
+ # Convert the response text to speech
36
+ tts = gTTS(response_message)
37
+ response_audio_io = io.BytesIO()
38
+ tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
39
+ response_audio_io.seek(0)
40
+
41
+ # Save audio to a file to ensure it's generated correctly
42
+ with open("response.mp3", "wb") as audio_file:
43
+ audio_file.write(response_audio_io.getvalue())
44
+
45
+ # Return the response text and the path to the saved audio file
46
+ return response_message, "response.mp3"
47
+
48
+ except Exception as e:
49
+ return f"An error occurred: {e}", None
50
+
51
+ iface = gr.Interface(
52
+ fn=process_audio,
53
+ inputs=gr.Audio(type="filepath"), # Use type="filepath"
54
+ outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
55
+ live=True
56
+ )
57
+
58
+ iface.launch()