import openai import speech_recognition as sr import os import gradio as gr from dotenv import load_dotenv from gtts import gTTS # Load your OpenAI API key from environment variable load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") # Initialize speech recognition recognizer = sr.Recognizer() # Function to convert speech to text def speech_to_text(audio): if not audio: # Check if audio is None or empty return "No audio input detected. Please provide a valid audio file." try: with sr.AudioFile(audio) as source: recognizer.adjust_for_ambient_noise(source) audio_data = recognizer.record(source) text = recognizer.recognize_google(audio_data) return text except sr.UnknownValueError: return "Sorry, I could not understand the audio." except sr.RequestError as e: return f"Could not request results; {e}" # Function to get GPT-3.5-turbo response def get_gpt_response(prompt): try: response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] ) return response['choices'][0]['message']['content'] except Exception as e: return f"Error: {e}" # Function to convert text to speech using gTTS def text_to_speech(response): tts = gTTS(response) tts.save("response.mp3") return "response.mp3" # Gradio function that integrates all components def chatbot(audio): user_input = speech_to_text(audio) if "Sorry" in user_input or "No audio input" in user_input: return user_input, None response = get_gpt_response(user_input) if response: audio_response = text_to_speech(response) return response, audio_response return "Error generating response.", None # Gradio interface iface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="AI Response")], live=True, title="Conversational Assistant", description="Upload an audio file or record your voice to get a response from the AI." ) # Launch the Gradio interface if __name__ == "__main__": iface.launch()