JohnnyTTS / app.py
PhilSpiel's picture
Update app.py
1ae9b86
import gradio as gr
import os
from openai import OpenAI
from datetime import datetime
# Get dateTime strubg to build a filename reflecting the UserID + Timestamp
dt = datetime.now()
dt_string = str(dt)
# Initialize OpenAI API client with API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Initialize empty history list to maintain the conversation context
history_openai_format = [
{"role": "system", "content": os.getenv("PROMPT")}
]
# Define the function that takes a name and text input to generate the speech
def generate_speech(name, input_text):
global history_openai_format # Use the global history variable to maintain state
# Get dateTime string to build a filename reflecting the UserID + Timestamp
dt = datetime.now()
dt_string = str(dt)
# Define the user ID and construct the filename for the user's history file
user_id = name if name else "unidentified" # User identification, typically an email.
user_id = user_id.lower()
user_id = user_id.replace(" ", "")
user_hist_file = "jcTSS-" + user_id + ".txt" # Filename where the user history will be stored.
# Check if the user's history file exists
if os.path.exists(user_hist_file):
# If it exists, open and read its contents, then print it
with open(user_hist_file, "r", encoding="UTF-8") as file:
user_hist = file.read().strip() # Remove leading/trailing whitespace
else:
# If it does not exist, create the file and initialize it with the user ID
with open(user_hist_file, "w", encoding="UTF-8") as file:
file.write("User ID: " + user_id)
# Append user message to history with the name included
input_text1 = f"I'm {name}. " + input_text
history_openai_format.append({"role": "user", "content": input_text1})
# Build completion with OpenAI using the accumulated history
completion = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
messages=history_openai_format
)
# Extract generated text (response by the assistant) from completion
message_content = completion.choices[0].message.content.strip()
# Remove "Johnny" from the beginning of the assistant's message if present
if message_content.lower().startswith("johnny"):
# Strip the leading "Johnny" from the message content
message_content = message_content[6:].strip()
# Append assistant's message to history
history_openai_format.append({"role": "assistant", "content": message_content})
# Use OpenAI's text-to-speech API to convert the text response to audio
response = client.audio.speech.create(
model="tts-1",
voice="onyx",
input=str(message_content)
)
# Prepare the transcript for the Textbox output
# Exclude the system message from the transcript and show the user's actual input
# Add line spaces between user and assistant output, and between user/assistant exchanges
transcript = ""
transcript += "Date/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
for i, msg in enumerate(history_openai_format):
if msg['role'] != 'system':
if msg['role'] == 'user':
# Extract the actual user input from the message content instead of using the latest input_text
# Assuming input_text1 = f"I'm {name}. " + input_text, you must remove the "I'm {name}. " part.
user_input = msg['content'].split(". ", 1)[1] if ". " in msg['content'] else msg['content']
# Use the user_input for the Guest's content
transcript += f"GUEST: {user_input}\n"
else:
transcript += f"JOHNNY: {msg['content']}\n\n"
# Write the user and assistant messages to the history file after the exchange
with open(user_hist_file, "a+", encoding="UTF-8") as file:
file.write("\n\nDate/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
for message in history_openai_format[-2:]: # Last 2 messages include the user and assistant responses
file.write(f"\n{message['role'].title()}: {message['content']}")
# Return the binary audio data and the transcript
return response.content, transcript
# Define the Gradio interface with inputs for name and user text
iface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Textbox(label="Your Name (REQUIRED):", placeholder="Enter your FIRST NAME"),
gr.Textbox(label="Your question or comment for Johnny:")
],
outputs=[gr.Audio(autoplay=True, label="Johnny's response:"), gr.Textbox(label="Transcript", max_lines=12, autoscroll="True", show_copy_button="True")],
live=False
)
# Launch the interface
iface.launch(show_api=False)