asdf / app.py
muhammadshaheryar's picture
Upload app.py
a3ad220 verified
# Install the required libraries (run this on Colab)
!pip install git+https://github.com/openai/whisper.git
!pip install gradio
!pip install gTTS # You can use other TTS libraries if you prefer
!pip install groq
!pip install --upgrade pip setuptools wheel
!pip install git+https://github.com/openai/whisper.git --no-cache-dir
!pip install gradio --upgrade
!pip install gTTS --upgrade
!pip install groq
import whisper
import numpy as np
from groq import Groq
import os
from gtts import gTTS
import gradio as gr
# Initialize Whisper model
model = whisper.load_model("base")
# Function to transcribe audio using Whisper
def transcribe_audio(audio_file):
result = model.transcribe(audio_file)
return result['text']
# Function to interact with Groq LLM
def generate_response(transcription):
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": transcription}],
model="llama3-groq-8b-8192-tool-use-preview",
)
response = chat_completion.choices[0].message.content
return response
# Function to convert text into speech using gTTS
def text_to_speech(text):
tts = gTTS(text)
tts.save("output.mp3")
return "output.mp3"
# Main function to handle the chatbot interaction
def chatbot_interaction(audio):
# Transcribe the uploaded audio to text
transcription = transcribe_audio(audio)
print(f"Transcription: {transcription}")
# Generate a response from Groq's LLM
response = generate_response(transcription)
print(f"LLM Response: {response}")
# Convert the LLM response to speech
output_audio = text_to_speech(response)
return output_audio
# Gradio interface to deploy the chatbot
def chatbot_ui():
# Input: Audio file upload, Output: Audio file with the response
gr.Interface(fn=chatbot_interaction, inputs="audio", outputs="audio").launch()
# Run the chatbot UI
chatbot_ui()