Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| import os | |
| from gtts import gTTS | |
| from groq import Groq | |
| # Set up Groq API Key | |
| GROQ_API_KEY = os.getenv("api_key") # Ensure to set this in your Hugging Face Space Secrets | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Load Whisper model for transcription | |
| model = whisper.load_model("base") | |
| # Function to get LLM response from Groq | |
| def get_llm_response(user_input): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": user_input}], | |
| model="llama3-8b-8192", # Replace with your desired model | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error fetching LLM response: {e}" | |
| # Function to convert text to speech using gTTS | |
| def text_to_speech(text, output_audio="output_audio.mp3"): | |
| try: | |
| tts = gTTS(text) | |
| tts.save(output_audio) | |
| return output_audio | |
| except Exception as e: | |
| return f"Error generating audio: {e}" | |
| # Main function to handle audio input and output | |
| def chatbot(audio_path): | |
| try: | |
| # Step 1: Transcribe the audio using Whisper | |
| result = model.transcribe(audio_path) | |
| user_text = result["text"] | |
| # Step 2: Get LLM response from Groq | |
| response_text = get_llm_response(user_text) | |
| # Step 3: Convert the response text to speech | |
| output_audio = text_to_speech(response_text) | |
| return user_text, response_text, output_audio | |
| except Exception as e: | |
| return "Error processing audio", str(e), None | |
| # Define Gradio interface | |
| def gradio_interface(audio_file): | |
| user_text, response_text, output_audio = chatbot(audio_file) | |
| return user_text, response_text, output_audio | |
| interface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
| outputs=[ | |
| gr.Textbox(label="Transcription of User Input"), | |
| gr.Textbox(label="LLM Response"), | |
| gr.Audio(label="Generated Audio Response") | |
| ], | |
| title="AI Chatbot with Audio Input and Output", | |
| description=( | |
| "Upload an audio file to transcribe it using Whisper, " | |
| "get a response from an LLM (Groq), and generate an audio response using gTTS." | |
| ), | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() | |