Spaces:

kotoba-tech
/

TTS-Arena-JA

Runtime error

File size: 3,237 Bytes

import os
import json
import tempfile
from google.cloud import texttospeech
import requests

from pathlib import Path
from openai import OpenAI

def get_openai_tts(text, local_filename):
    api_key = os.getenv("OPENAI_KEY")
    client = OpenAI(api_key=api_key)

    # speech_file_path = Path(__file__).parent / "speech.mp3"
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=text
    )
    response.stream_to_file(local_filename)
    return local_filename

def get_google_credentials():
    creds_json_str = os.getenv("GCP_CREDENTIAL_JSON")  # get json credentials stored as a string

    # create a temporary file
    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
        temp.write(creds_json_str)  # write in json format
        temp_filename = temp.name

    return temp_filename

def get_google_tts(text, local_filename):
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )

    # The response's audio_content is binary.
    with open(local_filename, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print(f'Audio content written to file {local_filename}')

    return local_filename

def get_elevenlabs_tts(text, local_filename):
    """
    Call the Eleven Labs API to generate speech from text.
    
    Args:
        text (str): The text to convert to speech
        local_filename (str): Path to save the generated audio file
        
    Returns:
        str: Path to the generated audio file
    """
    api_key = os.getenv("ELEVENLABS_API_KEY")
    
    # API endpoint for text-to-speech
    url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM"  # Default voice ID (Rachel)
    
    # Headers with API key
    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": api_key
    }
    
    # Request body
    data = {
        "text": text,
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }
    
    # Make the request
    response = requests.post(url, json=data, headers=headers)
    
    # Check if the request was successful
    response.raise_for_status()
    
    # Save the audio content to the specified file
    with open(local_filename, "wb") as f:
        f.write(response.content)
    
    return local_filename