|
|
|
|
|
import torch |
|
|
from transformers import WhisperForConditionalGeneration, WhisperTokenizer, WhisperProcessor |
|
|
import librosa |
|
|
|
|
|
|
|
|
model = WhisperForConditionalGeneration.from_pretrained("Noobbbbb/whisper-small-bn") |
|
|
tokenizer = WhisperTokenizer.from_pretrained("Noobbbbb/whisper-small-bn") |
|
|
processor = WhisperProcessor.from_pretrained("openai/whisper-small") |
|
|
|
|
|
|
|
|
audio, sr = librosa.load("test_audio.wav", sr=16000) |
|
|
|
|
|
|
|
|
input_features = processor.feature_extractor(audio, sampling_rate=16000, return_tensors="pt").input_features |
|
|
with torch.no_grad(): |
|
|
generated_ids = model.generate(input_features, max_length=448) |
|
|
transcription = tokenizer.decode(generated_ids[0], skip_special_tokens=True) |
|
|
|
|
|
print(f"Transcription: {transcription}") |
|
|
|