Spaces:
Sleeping
Sleeping
File size: 2,159 Bytes
5fd2b7a f5d5c69 5fd2b7a 4666ab5 5fd2b7a 28618b2 5fd2b7a 28618b2 5fd2b7a 51c9eb3 f5d5c69 51c9eb3 f5d5c69 5fd2b7a 4666ab5 5fd2b7a 4666ab5 5fd2b7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import riva.client
import riva.client.realtime
from riva.client.argparse_utils import add_asr_config_argparse_parameters, add_connection_argparse_parameters
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
uri = "grpc.nvcf.nvidia.com:443"
auth = riva.client.Auth(
uri=uri,
use_ssl=True,
metadata_args=[
["function-id", "b702f636-f60c-4a3d-a6f4-f3568c13bd7d"],
["authorization", f"Bearer {os.environ['NVIDIA_API']}"],
]
)
asr_service = riva.client.ASRService(auth)
# offline_config = riva.client.RecognitionConfig(
# encoding=riva.client.AudioEncoding.LINEAR_PCM,
# max_alternatives=1,
# enable_automatic_punctuation=True,
# verbatim_transcripts=False,
# )
name_variants = [
"deepak", # standard spelling
"dee-pak", # phonetic-like spelling
"deepuck", # alternative spelling to match pronunciation
"D IY P AH K", # ARPAbet phonetic
]
offline_config = riva.client.RecognitionConfig(
language_code="en-US",
# model=args.model_name,
max_alternatives=1,
# profanity_filter=args.profanity_filter,
enable_automatic_punctuation=True,
verbatim_transcripts=False,
speech_contexts=[{"phrases": name_variants, "boost": 20.0}]
# enable_word_time_offsets=args.word_time_offsets or args.speaker_diarization,
)
with open("./en-US_sample.wav", 'rb') as fh:
data = fh.read()
def asr_transcribe(audio: bytes) -> str:
global offline_config, asr_service
response = asr_service.offline_recognize(audio, offline_config)
transcript:str = " ".join([result.alternatives[0].transcript for result in response.results])
# print("Final transcript:", transcript)
return transcript
def foo():
global data, offline_config, asr_service
response = asr_service.offline_recognize(data, offline_config)
transcript = " ".join([result.alternatives[0].transcript for result in response.results])
# print("Final transcript:", transcript)
return transcript
if __name__ =="__main__":
transcript = foo()
print(transcript)
|