import gradio as gr import whisper import os from gtts import gTTS from groq import Groq # Set up Groq API Key GROQ_API_KEY = os.getenv("api_key") # Ensure to set this in your Hugging Face Space Secrets client = Groq(api_key=GROQ_API_KEY) # Load Whisper model for transcription model = whisper.load_model("base") # Function to get LLM response from Groq def get_llm_response(user_input): try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", # Replace with your desired model ) return chat_completion.choices[0].message.content except Exception as e: return f"Error fetching LLM response: {e}" # Function to convert text to speech using gTTS def text_to_speech(text, output_audio="output_audio.mp3"): try: tts = gTTS(text) tts.save(output_audio) return output_audio except Exception as e: return f"Error generating audio: {e}" # Main function to handle audio input and output def chatbot(audio_path): try: # Step 1: Transcribe the audio using Whisper result = model.transcribe(audio_path) user_text = result["text"] # Step 2: Get LLM response from Groq response_text = get_llm_response(user_text) # Step 3: Convert the response text to speech output_audio = text_to_speech(response_text) return user_text, response_text, output_audio except Exception as e: return "Error processing audio", str(e), None # Define Gradio interface def gradio_interface(audio_file): user_text, response_text, output_audio = chatbot(audio_file) return user_text, response_text, output_audio interface = gr.Interface( fn=gradio_interface, inputs=gr.Audio(type="filepath", label="Upload Audio"), outputs=[ gr.Textbox(label="Transcription of User Input"), gr.Textbox(label="LLM Response"), gr.Audio(label="Generated Audio Response") ], title="AI Chatbot with Audio Input and Output", description=( "Upload an audio file to transcribe it using Whisper, " "get a response from an LLM (Groq), and generate an audio response using gTTS." ), ) if __name__ == "__main__": interface.launch()