voiceBot / test1.py
Deepak Sahu
cosmetic updates
28618b2
import riva.client
import riva.client.realtime
from riva.client.argparse_utils import add_asr_config_argparse_parameters, add_connection_argparse_parameters
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
uri = "grpc.nvcf.nvidia.com:443"
auth = riva.client.Auth(
uri=uri,
use_ssl=True,
metadata_args=[
["function-id", "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"],
["authorization", f"Bearer {os.environ['NVIDIA_API']}"],
]
)
asr_service = riva.client.ASRService(auth)
# offline_config = riva.client.RecognitionConfig(
# encoding=riva.client.AudioEncoding.LINEAR_PCM,
# max_alternatives=1,
# enable_automatic_punctuation=True,
# verbatim_transcripts=False,
# )
name_variants = [
"deepak", # standard spelling
"dee-pak", # phonetic-like spelling
"deepuck", # alternative spelling to match pronunciation
"D IY P AH K", # ARPAbet phonetic
]
offline_config = riva.client.RecognitionConfig(
language_code="en-US",
# model=args.model_name,
max_alternatives=1,
# profanity_filter=args.profanity_filter,
enable_automatic_punctuation=True,
verbatim_transcripts=False,
speech_contexts=[{"phrases": name_variants, "boost": 20.0}]
# enable_word_time_offsets=args.word_time_offsets or args.speaker_diarization,
)
with open("./en-US_sample.wav", 'rb') as fh:
data = fh.read()
def asr_transcribe(audio: bytes) -> str:
global offline_config, asr_service
response = asr_service.offline_recognize(audio, offline_config)
transcript:str = " ".join([result.alternatives[0].transcript for result in response.results])
# print("Final transcript:", transcript)
return transcript
def foo():
global data, offline_config, asr_service
response = asr_service.offline_recognize(data, offline_config)
transcript = " ".join([result.alternatives[0].transcript for result in response.results])
# print("Final transcript:", transcript)
return transcript
if __name__ =="__main__":
transcript = foo()
print(transcript)