Spaces:
Sleeping
Sleeping
File size: 2,214 Bytes
9a0b1ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import openai
import os
from dotenv import load_dotenv
# Load OpenAI API key from .env file
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Check if the API key is loaded correctly
if not OPENAI_API_KEY:
raise ValueError("❌ OpenAI API Key is missing! Please set it in your .env file.")
# Initialize OpenAI client
client = openai.OpenAI(api_key=OPENAI_API_KEY)
def process_query(audio=None, text=None):
"""
Handles both audio and text input.
1. If audio is provided, transcribes it using Whisper.
2. If text is provided, processes it with GPT-4o.
Returns: (transcript_text, audio_output_url)
"""
transcript = None
if audio:
try:
with open(audio, "rb") as audio_file:
response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
transcript = response.text
except Exception as e:
transcript = f"Error in transcription: {str(e)}"
# Use text input or transcribed text for GPT-4o response
query_text = text if text else transcript
if not query_text:
return "No valid input provided!", None
# Get response from GPT-4o
try:
gpt_response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "system", "content": "You are an AI assistant."},
{"role": "user", "content": query_text}]
)
ai_text_response = gpt_response.choices[0].message.content
except Exception as e:
ai_text_response = f"Error in GPT response: {str(e)}"
# Generate AI voice response
try:
audio_response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=ai_text_response
)
audio_path = "response_audio.mp3"
with open(audio_path, "wb") as audio_file:
audio_file.write(audio_response.content)
except Exception as e:
audio_path = None
ai_text_response += f"\nError in generating audio: {str(e)}"
return ai_text_response, audio_path
|