import gradio as gr import os from openai import OpenAI from datetime import datetime # Get dateTime strubg to build a filename reflecting the UserID + Timestamp dt = datetime.now() dt_string = str(dt) # Initialize OpenAI API client with API key client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Initialize empty history list to maintain the conversation context history_openai_format = [ {"role": "system", "content": os.getenv("PROMPT")} ] # Define the function that takes a name and text input to generate the speech def generate_speech(name, input_text): global history_openai_format # Use the global history variable to maintain state # Get dateTime string to build a filename reflecting the UserID + Timestamp dt = datetime.now() dt_string = str(dt) # Define the user ID and construct the filename for the user's history file user_id = name if name else "unidentified" # User identification, typically an email. user_id = user_id.lower() user_id = user_id.replace(" ", "") user_hist_file = "jcTSS-" + user_id + ".txt" # Filename where the user history will be stored. # Check if the user's history file exists if os.path.exists(user_hist_file): # If it exists, open and read its contents, then print it with open(user_hist_file, "r", encoding="UTF-8") as file: user_hist = file.read().strip() # Remove leading/trailing whitespace else: # If it does not exist, create the file and initialize it with the user ID with open(user_hist_file, "w", encoding="UTF-8") as file: file.write("User ID: " + user_id) # Append user message to history with the name included input_text1 = f"I'm {name}. " + input_text history_openai_format.append({"role": "user", "content": input_text1}) # Build completion with OpenAI using the accumulated history completion = client.chat.completions.create( model="gpt-3.5-turbo-1106", messages=history_openai_format ) # Extract generated text (response by the assistant) from completion message_content = completion.choices[0].message.content.strip() # Remove "Johnny" from the beginning of the assistant's message if present if message_content.lower().startswith("johnny"): # Strip the leading "Johnny" from the message content message_content = message_content[6:].strip() # Append assistant's message to history history_openai_format.append({"role": "assistant", "content": message_content}) # Use OpenAI's text-to-speech API to convert the text response to audio response = client.audio.speech.create( model="tts-1", voice="onyx", input=str(message_content) ) # Prepare the transcript for the Textbox output # Exclude the system message from the transcript and show the user's actual input # Add line spaces between user and assistant output, and between user/assistant exchanges transcript = "" transcript += "Date/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n" for i, msg in enumerate(history_openai_format): if msg['role'] != 'system': if msg['role'] == 'user': # Extract the actual user input from the message content instead of using the latest input_text # Assuming input_text1 = f"I'm {name}. " + input_text, you must remove the "I'm {name}. " part. user_input = msg['content'].split(". ", 1)[1] if ". " in msg['content'] else msg['content'] # Use the user_input for the Guest's content transcript += f"GUEST: {user_input}\n" else: transcript += f"JOHNNY: {msg['content']}\n\n" # Write the user and assistant messages to the history file after the exchange with open(user_hist_file, "a+", encoding="UTF-8") as file: file.write("\n\nDate/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")) for message in history_openai_format[-2:]: # Last 2 messages include the user and assistant responses file.write(f"\n{message['role'].title()}: {message['content']}") # Return the binary audio data and the transcript return response.content, transcript # Define the Gradio interface with inputs for name and user text iface = gr.Interface( fn=generate_speech, inputs=[ gr.Textbox(label="Your Name (REQUIRED):", placeholder="Enter your FIRST NAME"), gr.Textbox(label="Your question or comment for Johnny:") ], outputs=[gr.Audio(autoplay=True, label="Johnny's response:"), gr.Textbox(label="Transcript", max_lines=12, autoscroll="True", show_copy_button="True")], live=False ) # Launch the interface iface.launch(show_api=False)