File size: 3,237 Bytes
0414b49
 
 
 
87e0f69
0414b49
c5c2329
 
0414b49
c5c2329
 
 
 
 
 
 
 
 
 
 
 
 
 
0414b49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87e0f69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0414b49
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import json
import tempfile
from google.cloud import texttospeech
import requests

from pathlib import Path
from openai import OpenAI

def get_openai_tts(text, local_filename):
    api_key = os.getenv("OPENAI_KEY")
    client = OpenAI(api_key=api_key)

    # speech_file_path = Path(__file__).parent / "speech.mp3"
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=text
    )
    response.stream_to_file(local_filename)
    return local_filename

def get_google_credentials():
    creds_json_str = os.getenv("GCP_CREDENTIAL_JSON")  # get json credentials stored as a string

    # create a temporary file
    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
        temp.write(creds_json_str)  # write in json format
        temp_filename = temp.name

    return temp_filename

def get_google_tts(text, local_filename):
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )

    # The response's audio_content is binary.
    with open(local_filename, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print(f'Audio content written to file {local_filename}')

    return local_filename

def get_elevenlabs_tts(text, local_filename):
    """
    Call the Eleven Labs API to generate speech from text.
    
    Args:
        text (str): The text to convert to speech
        local_filename (str): Path to save the generated audio file
        
    Returns:
        str: Path to the generated audio file
    """
    api_key = os.getenv("ELEVENLABS_API_KEY")
    
    # API endpoint for text-to-speech
    url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM"  # Default voice ID (Rachel)
    
    # Headers with API key
    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": api_key
    }
    
    # Request body
    data = {
        "text": text,
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }
    
    # Make the request
    response = requests.post(url, json=data, headers=headers)
    
    # Check if the request was successful
    response.raise_for_status()
    
    # Save the audio content to the specified file
    with open(local_filename, "wb") as f:
        f.write(response.content)
    
    return local_filename