Update app.py
Browse files
app.py
CHANGED
|
@@ -10,25 +10,34 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
|
| 10 |
# Set the ElevenLabs API key using an environment variable
|
| 11 |
elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
completion = client.chat.completions.create(
|
| 17 |
model="gpt-3.5-turbo-1106",
|
| 18 |
-
messages=
|
| 19 |
-
{"role": "system",
|
| 20 |
-
"content": "You are Johnny Carson, interviewing a guest (the user) on the Tonight Show in 1978. While chatting with the user, you make occasional jokes using Johnny Carson's characteristic humor. Your knowledge of the world ends in 1978."},
|
| 21 |
-
{"role": "user", "content": input_text}
|
| 22 |
-
]
|
| 23 |
)
|
| 24 |
|
| 25 |
# Extract generated text (response by the assistant) from OpenAI's API response
|
| 26 |
-
message_content = completion.choices[0].message.content.strip()
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
|
| 31 |
-
#
|
|
|
|
| 32 |
url = "https://api.elevenlabs.io/v1/text-to-speech/eIH76iW9yjOWnOKIkd6y/stream"
|
| 33 |
headers = {
|
| 34 |
"Accept": "audio/mpeg",
|
|
@@ -36,18 +45,18 @@ def generate_speech(input_text):
|
|
| 36 |
"xi-api-key": elevenlabs_api_key
|
| 37 |
}
|
| 38 |
data = {
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
}
|
| 52 |
|
| 53 |
response = requests.post(url, json=data, headers=headers)
|
|
@@ -59,11 +68,14 @@ def generate_speech(input_text):
|
|
| 59 |
print("Error with ElevenLabs API:", response.status_code, response.text)
|
| 60 |
raise Exception(f"Failed to generate speech, status code: {response.status_code}, response: {response.text}")
|
| 61 |
|
| 62 |
-
# Define the Gradio interface
|
| 63 |
iface = gr.Interface(
|
| 64 |
fn=generate_speech,
|
| 65 |
-
inputs=
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
| 67 |
live=False
|
| 68 |
)
|
| 69 |
|
|
|
|
| 10 |
# Set the ElevenLabs API key using an environment variable
|
| 11 |
elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
|
| 12 |
|
| 13 |
+
# Initialize empty history list to maintain the conversation context
|
| 14 |
+
history_openai_format = [
|
| 15 |
+
{"role": "system",
|
| 16 |
+
"content": "You are Johnny Carson, interviewing a guest (the user) on the Tonight Show in 1978. While chatting with the user, you make occasional jokes using Johnny Carson's characteristic humor. Your knowledge of the world ends in 1978."}
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
# Define the function that takes a name and text input to generate the speech
|
| 20 |
+
def generate_speech(name, input_text):
|
| 21 |
+
# Include the user's name in the dialogue if provided
|
| 22 |
+
personalized_greeting = f"Welcome to the Tonight Show, {name}. " if name else ""
|
| 23 |
+
|
| 24 |
+
# Append user message to history with personalized greeting
|
| 25 |
+
history_openai_format.append({"role": "user", "content": personalized_greeting + input_text})
|
| 26 |
+
|
| 27 |
+
# Build completion with OpenAI using the accumulated history
|
| 28 |
completion = client.chat.completions.create(
|
| 29 |
model="gpt-3.5-turbo-1106",
|
| 30 |
+
messages=history_openai_format
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
|
| 33 |
# Extract generated text (response by the assistant) from OpenAI's API response
|
| 34 |
+
message_content = completion.choices[0].message.content.strip()
|
| 35 |
|
| 36 |
+
# Append assistant's message to history
|
| 37 |
+
history_openai_format.append({"role": "assistant", "content": message_content})
|
| 38 |
|
| 39 |
+
# Prepare the text for TTS conversion with ElevenLabs API settings
|
| 40 |
+
text_to_speech = message_content
|
| 41 |
url = "https://api.elevenlabs.io/v1/text-to-speech/eIH76iW9yjOWnOKIkd6y/stream"
|
| 42 |
headers = {
|
| 43 |
"Accept": "audio/mpeg",
|
|
|
|
| 45 |
"xi-api-key": elevenlabs_api_key
|
| 46 |
}
|
| 47 |
data = {
|
| 48 |
+
"text": text_to_speech,
|
| 49 |
+
"model_id": "eleven_multilingual_v2",
|
| 50 |
+
"voice_settings": {
|
| 51 |
+
"stability": 1.0,
|
| 52 |
+
"similarity_boost": 1.0,
|
| 53 |
+
"excitement": 0.9,
|
| 54 |
+
"speed": 1.1,
|
| 55 |
+
"volume": 80,
|
| 56 |
+
"pitch": 2.0,
|
| 57 |
+
"breathiness": 0.8,
|
| 58 |
+
"voice_id": "eIH76iW9yjOWnOKIkd6y"
|
| 59 |
+
}
|
| 60 |
}
|
| 61 |
|
| 62 |
response = requests.post(url, json=data, headers=headers)
|
|
|
|
| 68 |
print("Error with ElevenLabs API:", response.status_code, response.text)
|
| 69 |
raise Exception(f"Failed to generate speech, status code: {response.status_code}, response: {response.text}")
|
| 70 |
|
| 71 |
+
# Define the Gradio interface with inputs for name and user text
|
| 72 |
iface = gr.Interface(
|
| 73 |
fn=generate_speech,
|
| 74 |
+
inputs=[
|
| 75 |
+
gr.Textbox(label="Your Name (optional):", placeholder="Enter your name"),
|
| 76 |
+
gr.Textbox(label="Your question or comment:")
|
| 77 |
+
],
|
| 78 |
+
outputs=gr.Audio(autoplay=True),
|
| 79 |
live=False
|
| 80 |
)
|
| 81 |
|