whisper-tiny-khm / test.py
dynann's picture
Training in progress, step 2000
cd49044 verified
import torch
import librosa
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
# ----------------------------
# CONFIG
# ----------------------------
CHECKPOINT_PATH = "./wav2vec2-xlsr-khmer-300m/checkpoint-1800"
AUDIO_PATH = "data\wavs\00000.wav" # <-- change this to your test file
# ----------------------------
# LOAD MODEL AND PROCESSOR
# ----------------------------
print("Loading model and processor...")
processor = Wav2Vec2Processor.from_pretrained(CHECKPOINT_PATH)
model = Wav2Vec2ForCTC.from_pretrained(CHECKPOINT_PATH)
model.eval()
# ----------------------------
# LOAD AUDIO
# ----------------------------
print("Loading audio:", AUDIO_PATH)
speech, sr = librosa.load(AUDIO_PATH, sr=16000)
inputs = processor(
speech,
sampling_rate=16000,
return_tensors="pt",
padding=True
)
with torch.no_grad():
logits = model(inputs.input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
print("\n===============================")
print("๐Ÿ”Š Transcription Result:")
print(transcription)
print("===============================")