File size: 1,170 Bytes
cd49044 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import torch
import librosa
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
# ----------------------------
# CONFIG
# ----------------------------
CHECKPOINT_PATH = "./wav2vec2-xlsr-khmer-300m/checkpoint-1800"
AUDIO_PATH = "data\wavs\00000.wav" # <-- change this to your test file
# ----------------------------
# LOAD MODEL AND PROCESSOR
# ----------------------------
print("Loading model and processor...")
processor = Wav2Vec2Processor.from_pretrained(CHECKPOINT_PATH)
model = Wav2Vec2ForCTC.from_pretrained(CHECKPOINT_PATH)
model.eval()
# ----------------------------
# LOAD AUDIO
# ----------------------------
print("Loading audio:", AUDIO_PATH)
speech, sr = librosa.load(AUDIO_PATH, sr=16000)
inputs = processor(
speech,
sampling_rate=16000,
return_tensors="pt",
padding=True
)
with torch.no_grad():
logits = model(inputs.input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
print("\n===============================")
print("🔊 Transcription Result:")
print(transcription)
print("===============================")
|