File size: 1,208 Bytes
5d6c840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import riva.client
import riva.client.realtime
from riva.client.argparse_utils import add_asr_config_argparse_parameters, add_connection_argparse_parameters
import os
from dotenv import load_dotenv
from riva.client.proto.riva_audio_pb2 import AudioEncoding

# Load environment variables from .env file
load_dotenv()

uri = "grpc.nvcf.nvidia.com:443"

tts_auth = riva.client.Auth(
    uri=uri,
    use_ssl=True,
    metadata_args=[
        ["function-id", "877104f7-e885-42b9-8de8-f6e4c6303969"],
        ["authorization", f"Bearer {os.environ['NVIDIA_API']}"],
    ]
)

def riva_tts_service(inp_text: str, sample_rate_hz:int=44100) -> bytes: 
    global tts_service
    tts_service = riva.client.SpeechSynthesisService(tts_auth)
    resp = tts_service.synthesize(
        inp_text,
        "Magpie-Multilingual.EN-US.Mia", "en-US", sample_rate_hz=44100,
        encoding=AudioEncoding.LINEAR_PCM,
        # zero_shot_audio_prompt_file=args.zero_shot_audio_prompt_file,
        # zero_shot_quality=(20 if args.zero_shot_quality is None else args.zero_shot_quality),
        # custom_dictionary=custom_dictionary_input,
        # zero_shot_transcript=args.zero_shot_transcript,
    )
    return resp.audio