KittyMona commited on
Commit
a84fc4e
·
verified ·
1 Parent(s): dba938e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper
3
+ import scipy.io.wavfile as wav
4
+ from groq import Groq
5
+ from gtts import gTTS
6
+ import gradio as gr
7
+ from pydub import AudioSegment
8
+
9
+ # Load Whisper model (Use "small" or "medium" if "base" is too slow)
10
+ model = whisper.load_model("base")
11
+
12
+ # Set the Groq API key as an environment variable
13
+ os.environ["GROQ_API_KEY"] = "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" # Replace with your actual key
14
+
15
+ # Get the Groq API key from the environment variable
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
+ if not GROQ_API_KEY:
18
+ raise ValueError("❌ ERROR: Groq API key is missing! Set it in your environment.")
19
+
20
+ # Initialize the Groq client using the API key variable
21
+ client = Groq(api_key=GROQ_API_KEY)
22
+ # Function to transcribe audio using Whisper
23
+ def transcribe_audio(file_path):
24
+ try:
25
+ print(f"📂 Processing File: {file_path}")
26
+
27
+ # Convert audio to WAV (if needed)
28
+ audio = AudioSegment.from_file(file_path)
29
+ converted_path = "converted.wav"
30
+ audio.export(converted_path, format="wav")
31
+
32
+ # Run Whisper Transcription
33
+ result = model.transcribe(converted_path, fp16=False) # Use FP32 for CPU
34
+ return result["text"]
35
+
36
+ except Exception as e:
37
+ return f"❌ ERROR in Transcription: {str(e)}"
38
+
39
+ # Function to interact with Groq LLM
40
+ def chat_with_groq(text):
41
+ try:
42
+ chat_completion = client.chat.completions.create(
43
+ messages=[{"role": "user", "content": text}],
44
+ model="llama-3.3-70b-versatile"
45
+ )
46
+ return chat_completion.choices[0].message.content
47
+
48
+ except Exception as e:
49
+ return f"❌ ERROR in LLM Interaction: {str(e)}"
50
+
51
+ # Function to convert text to speech
52
+ def text_to_speech(text):
53
+ try:
54
+ tts = gTTS(text=text, lang="en")
55
+ filename = "output_audio.mp3"
56
+ tts.save(filename)
57
+ return filename
58
+
59
+ except Exception as e:
60
+ return f"❌ ERROR in TTS: {str(e)}"
61
+
62
+ # Main chatbot function (User Uploads Different Files)
63
+ def voice_chatbot(audio_file):
64
+ if not audio_file:
65
+ return "❌ Please upload an audio file!", None
66
+
67
+ # Process Speech-to-Text
68
+ text = transcribe_audio(audio_file)
69
+ if "ERROR" in text:
70
+ return text, None # Return error message
71
+
72
+ # Get AI response
73
+ response_text = chat_with_groq(text)
74
+ if "ERROR" in response_text:
75
+ return response_text, None # Return error message
76
+
77
+ # Convert response to speech
78
+ response_audio = text_to_speech(response_text)
79
+ if "ERROR" in response_audio:
80
+ return response_audio, None # Return error message
81
+
82
+ return response_text, response_audio
83
+
84
+ # Gradio UI for File Upload (No Default File)
85
+ iface = gr.Interface(
86
+ fn=voice_chatbot,
87
+ inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
88
+ outputs=["text", "audio"],
89
+ title="🎤 Real-Time Voice Chatbot",
90
+ description="Upload an audio file to transcribe and chat with AI.",
91
+ )
92
+
93
+ # Launch Gradio App
94
+ iface.launch()