File size: 2,159 Bytes
5fd2b7a
f5d5c69
5fd2b7a
 
4666ab5
 
 
 
 
5fd2b7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28618b2
 
 
 
 
 
 
 
5fd2b7a
 
 
 
 
 
 
28618b2
5fd2b7a
 
 
 
 
 
 
 
 
51c9eb3
f5d5c69
 
51c9eb3
f5d5c69
 
 
5fd2b7a
 
 
4666ab5
 
 
5fd2b7a
4666ab5
 
 
5fd2b7a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import riva.client
import riva.client.realtime
from riva.client.argparse_utils import add_asr_config_argparse_parameters, add_connection_argparse_parameters
import os
from dotenv import load_dotenv


# Load environment variables from .env file
load_dotenv()

uri = "grpc.nvcf.nvidia.com:443"

auth = riva.client.Auth(
    uri=uri,
    use_ssl=True,
    metadata_args=[
        ["function-id", "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"],
        ["authorization", f"Bearer {os.environ['NVIDIA_API']}"],
    ]
)

asr_service = riva.client.ASRService(auth)

# offline_config = riva.client.RecognitionConfig(
#     encoding=riva.client.AudioEncoding.LINEAR_PCM,
#     max_alternatives=1,
#     enable_automatic_punctuation=True,
#     verbatim_transcripts=False,
# )

name_variants = [
    "deepak",        # standard spelling
    "dee-pak",       # phonetic-like spelling
    "deepuck",       # alternative spelling to match pronunciation
    "D IY P AH K",    # ARPAbet phonetic
]

offline_config = riva.client.RecognitionConfig(
        language_code="en-US",
        # model=args.model_name,
        max_alternatives=1,
        # profanity_filter=args.profanity_filter,
        enable_automatic_punctuation=True,
        verbatim_transcripts=False,
        speech_contexts=[{"phrases": name_variants, "boost": 20.0}]
        # enable_word_time_offsets=args.word_time_offsets or args.speaker_diarization,
    )



with open("./en-US_sample.wav", 'rb') as fh:
    data = fh.read()


def asr_transcribe(audio: bytes) -> str: 
    global offline_config, asr_service 
    response = asr_service.offline_recognize(audio, offline_config) 
    transcript:str = " ".join([result.alternatives[0].transcript for result in response.results])
    # print("Final transcript:", transcript)
    return transcript  

def foo():
    global data, offline_config, asr_service
    response = asr_service.offline_recognize(data, offline_config)
    transcript = " ".join([result.alternatives[0].transcript for result in response.results])
    # print("Final transcript:", transcript)
    return transcript  

if __name__ =="__main__": 
    transcript = foo() 
    print(transcript)