TTS-Arena-JA / utils.py
kamahori's picture
Add eleven labs
87e0f69
import os
import json
import tempfile
from google.cloud import texttospeech
import requests
from pathlib import Path
from openai import OpenAI
def get_openai_tts(text, local_filename):
api_key = os.getenv("OPENAI_KEY")
client = OpenAI(api_key=api_key)
# speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="alloy",
input=text
)
response.stream_to_file(local_filename)
return local_filename
def get_google_credentials():
creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string
# create a temporary file
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
temp.write(creds_json_str) # write in json format
temp_filename = temp.name
return temp_filename
def get_google_tts(text, local_filename):
# Instantiates a client
client = texttospeech.TextToSpeechClient()
# Set the text input to be synthesized
synthesis_input = texttospeech.SynthesisInput(text=text)
# Build the voice request, select the language code ("en-US") and the ssml
# voice gender ("neutral")
voice = texttospeech.VoiceSelectionParams(
language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
# Select the type of audio file you want returned
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Perform the text-to-speech request on the text input with the selected
# voice parameters and audio file type
response = client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
# The response's audio_content is binary.
with open(local_filename, "wb") as out:
# Write the response to the output file.
out.write(response.audio_content)
print(f'Audio content written to file {local_filename}')
return local_filename
def get_elevenlabs_tts(text, local_filename):
"""
Call the Eleven Labs API to generate speech from text.
Args:
text (str): The text to convert to speech
local_filename (str): Path to save the generated audio file
Returns:
str: Path to the generated audio file
"""
api_key = os.getenv("ELEVENLABS_API_KEY")
# API endpoint for text-to-speech
url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" # Default voice ID (Rachel)
# Headers with API key
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": api_key
}
# Request body
data = {
"text": text,
"model_id": "eleven_multilingual_v2",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
# Make the request
response = requests.post(url, json=data, headers=headers)
# Check if the request was successful
response.raise_for_status()
# Save the audio content to the specified file
with open(local_filename, "wb") as f:
f.write(response.content)
return local_filename