JohnnyTTS

Sleeping

File size: 4,763 Bytes

3e942ea
fe5ee85
 
aad33d5
 
130da4d
 
 
cf24a85
130da4d
 
bd3f5eb
130da4d
 
1ae9b86
130da4d
444d537
130da4d
aba7eb1
130da4d
444d537
130da4d
bd3f5eb
 
 
130da4d
 
4f91351
 
130da4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484e1a8
130da4d
01d333a
130da4d
 
01d333a
a10389f
130da4d
c4e7a85
130da4d
 
14f0967
130da4d
 
14f0967
130da4d
c4e7a85
a10389f
130da4d
b65c921
130da4d
 
 
3d43559
b65c921
1ae9b86
130da4d
 
0387e9d
1ae9b86
0387e9d
 
130da4d
1ae9b86
 
 
 
 
130da4d
1ae9b86
130da4d
 
bd3f5eb
 
130da4d
bd3f5eb
 
130da4d
548bd9c
3d43559
130da4d
 
 
 
 
 
 
80eae57
130da4d

import gradio as gr
import os
from openai import OpenAI
from datetime import datetime

# Get dateTime strubg to build a filename reflecting the UserID + Timestamp
dt = datetime.now()
dt_string = str(dt)

# Initialize OpenAI API client with API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Initialize empty history list to maintain the conversation context
history_openai_format = [
    {"role": "system", "content": os.getenv("PROMPT")}
]

# Define the function that takes a name and text input to generate the speech
def generate_speech(name, input_text):
    global history_openai_format  # Use the global history variable to maintain state

    # Get dateTime string to build a filename reflecting the UserID + Timestamp
    dt = datetime.now()
    dt_string = str(dt)

    # Define the user ID and construct the filename for the user's history file
    user_id = name if name else "unidentified"  # User identification, typically an email.
    user_id = user_id.lower()
    user_id = user_id.replace(" ", "")
    user_hist_file = "jcTSS-" + user_id + ".txt"  # Filename where the user history will be stored.

    # Check if the user's history file exists
    if os.path.exists(user_hist_file):
        # If it exists, open and read its contents, then print it
        with open(user_hist_file, "r", encoding="UTF-8") as file:
            user_hist = file.read().strip()  # Remove leading/trailing whitespace
    else:
        # If it does not exist, create the file and initialize it with the user ID
        with open(user_hist_file, "w", encoding="UTF-8") as file:
            file.write("User ID: " + user_id)
    
    # Append user message to history with the name included
    input_text1 = f"I'm {name}. " + input_text
    history_openai_format.append({"role": "user", "content": input_text1})

    # Build completion with OpenAI using the accumulated history
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=history_openai_format
    )

    # Extract generated text (response by the assistant) from completion
    message_content = completion.choices[0].message.content.strip()

    # Remove "Johnny" from the beginning of the assistant's message if present
    if message_content.lower().startswith("johnny"):
        # Strip the leading "Johnny" from the message content
        message_content = message_content[6:].strip()

    # Append assistant's message to history
    history_openai_format.append({"role": "assistant", "content": message_content})

    # Use OpenAI's text-to-speech API to convert the text response to audio
    response = client.audio.speech.create(
        model="tts-1",
        voice="onyx",
        input=str(message_content)
    )

    # Prepare the transcript for the Textbox output
    # Exclude the system message from the transcript and show the user's actual input
    # Add line spaces between user and assistant output, and between user/assistant exchanges
    transcript = ""
    transcript += "Date/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n\n"
    for i, msg in enumerate(history_openai_format):
        if msg['role'] != 'system':
            if msg['role'] == 'user':
                # Extract the actual user input from the message content instead of using the latest input_text
                # Assuming input_text1 = f"I'm {name}. " + input_text, you must remove the "I'm {name}. " part.
                user_input = msg['content'].split(". ", 1)[1] if ". " in msg['content'] else msg['content']
                # Use the user_input for the Guest's content
                transcript += f"GUEST: {user_input}\n"
            else:
                transcript += f"JOHNNY: {msg['content']}\n\n"

    # Write the user and assistant messages to the history file after the exchange
    with open(user_hist_file, "a+", encoding="UTF-8") as file:
        file.write("\n\nDate/Time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        for message in history_openai_format[-2:]:  # Last 2 messages include the user and assistant responses
            file.write(f"\n{message['role'].title()}: {message['content']}")

    # Return the binary audio data and the transcript
    return response.content, transcript

# Define the Gradio interface with inputs for name and user text
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Textbox(label="Your Name (REQUIRED):", placeholder="Enter your FIRST NAME"),
        gr.Textbox(label="Your question or comment for Johnny:")
    ],
    outputs=[gr.Audio(autoplay=True, label="Johnny's response:"), gr.Textbox(label="Transcript", max_lines=12, autoscroll="True", show_copy_button="True")],
    live=False
)

# Launch the interface
iface.launch(show_api=False)