hassanaliemon commited on
Commit
d205faa
·
verified ·
1 Parent(s): 5773748

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -1
README.md CHANGED
@@ -1 +1,33 @@
1
- please [click here](https://github.com/hassanaliemon/BanglaASR/blob/main/asr_colab_infer.ipynb) and start inferencing at colab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```python
2
+ import librosa
3
+ import torch
4
+ import torchaudio
5
+ import numpy as np
6
+
7
+ from transformers import WhisperTokenizer
8
+ from transformers import WhisperProcessor
9
+ from transformers import WhisperFeatureExtractor
10
+ from transformers import WhisperForConditionalGeneration
11
+
12
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
+
14
+ audio_path = "https://huggingface.co/hassanaliemon/BanglaASR/resolve/main/test_audio/common_voice_bn_31255511.mp3"
15
+ model_path = "hassanaliemon/BanglaASR"
16
+
17
+
18
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model_path)
19
+ tokenizer = WhisperTokenizer.from_pretrained(model_path)
20
+ processor = WhisperProcessor.from_pretrained(model_path)
21
+ model = WhisperForConditionalGeneration.from_pretrained(model_path).to(device)
22
+
23
+
24
+ speech_array, sampling_rate = torchaudio.load(audio_path, format="mp3")
25
+ speech_array = speech_array[0].numpy()
26
+ speech_array = librosa.resample(np.asarray(speech_array), orig_sr=sampling_rate, target_sr=16000)
27
+ input_features = feature_extractor(speech_array, sampling_rate=16000, return_tensors="pt").input_features
28
+
29
+ predicted_ids = model.generate(inputs=input_features.to(device))[0]
30
+ transcription = processor.decode(predicted_ids, skip_special_tokens=True)
31
+
32
+ print(transcription)
33
+ ```