import openai import os from dotenv import load_dotenv # Load OpenAI API key from .env file load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Check if the API key is loaded correctly if not OPENAI_API_KEY: raise ValueError("❌ OpenAI API Key is missing! Please set it in your .env file.") # Initialize OpenAI client client = openai.OpenAI(api_key=OPENAI_API_KEY) def process_query(audio=None, text=None): """ Handles both audio and text input. 1. If audio is provided, transcribes it using Whisper. 2. If text is provided, processes it with GPT-4o. Returns: (transcript_text, audio_output_url) """ transcript = None if audio: try: with open(audio, "rb") as audio_file: response = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) transcript = response.text except Exception as e: transcript = f"Error in transcription: {str(e)}" # Use text input or transcribed text for GPT-4o response query_text = text if text else transcript if not query_text: return "No valid input provided!", None # Get response from GPT-4o try: gpt_response = client.chat.completions.create( model="gpt-4o", messages=[{"role": "system", "content": "You are an AI assistant."}, {"role": "user", "content": query_text}] ) ai_text_response = gpt_response.choices[0].message.content except Exception as e: ai_text_response = f"Error in GPT response: {str(e)}" # Generate AI voice response try: audio_response = client.audio.speech.create( model="tts-1", voice="alloy", input=ai_text_response ) audio_path = "response_audio.mp3" with open(audio_path, "wb") as audio_file: audio_file.write(audio_response.content) except Exception as e: audio_path = None ai_text_response += f"\nError in generating audio: {str(e)}" return ai_text_response, audio_path