Spaces:

kotoba-tech
/

TTS-Arena-JA

Runtime error

App Files Files Community

TTS-Arena-JA / utils.py

kamahori

Add eleven labs

87e0f69 10 months ago

raw

history blame contribute delete

3.24 kB

	import os
	import json
	import tempfile
	from google.cloud import texttospeech
	import requests

	from pathlib import Path
	from openai import OpenAI

	def get_openai_tts(text, local_filename):
	api_key = os.getenv("OPENAI_KEY")
	client = OpenAI(api_key=api_key)

	# speech_file_path = Path(__file__).parent / "speech.mp3"
	response = client.audio.speech.create(
	model="tts-1",
	voice="alloy",
	input=text
	)
	response.stream_to_file(local_filename)
	return local_filename

	def get_google_credentials():
	creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string

	# create a temporary file
	with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
	temp.write(creds_json_str) # write in json format
	temp_filename = temp.name

	return temp_filename

	def get_google_tts(text, local_filename):
	# Instantiates a client
	client = texttospeech.TextToSpeechClient()

	# Set the text input to be synthesized
	synthesis_input = texttospeech.SynthesisInput(text=text)

	# Build the voice request, select the language code ("en-US") and the ssml
	# voice gender ("neutral")
	voice = texttospeech.VoiceSelectionParams(
	language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
	)

	# Select the type of audio file you want returned
	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3
	)

	# Perform the text-to-speech request on the text input with the selected
	# voice parameters and audio file type
	response = client.synthesize_speech(
	input=synthesis_input, voice=voice, audio_config=audio_config
	)

	# The response's audio_content is binary.
	with open(local_filename, "wb") as out:
	# Write the response to the output file.
	out.write(response.audio_content)
	print(f'Audio content written to file {local_filename}')

	return local_filename

	def get_elevenlabs_tts(text, local_filename):
	"""
	Call the Eleven Labs API to generate speech from text.

	Args:
	text (str): The text to convert to speech
	local_filename (str): Path to save the generated audio file

	Returns:
	str: Path to the generated audio file
	"""
	api_key = os.getenv("ELEVENLABS_API_KEY")

	# API endpoint for text-to-speech
	url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" # Default voice ID (Rachel)

	# Headers with API key
	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": api_key
	}

	# Request body
	data = {
	"text": text,
	"model_id": "eleven_multilingual_v2",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	# Make the request
	response = requests.post(url, json=data, headers=headers)

	# Check if the request was successful
	response.raise_for_status()

	# Save the audio content to the specified file
	with open(local_filename, "wb") as f:
	f.write(response.content)

	return local_filename