dynann commited on
Commit
cd49044
·
verified ·
1 Parent(s): 47cd528

Training in progress, step 2000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc39930d550260ed3caa68556ac30b3cd86001344701142b2c6cde0de254b78b
3
  size 151061672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79aac629fb37b9df3448fb1e38f2bb8318af505c54923a20e139d8585a27518f
3
  size 151061672
runs/Oct20_21-52-43_MSI/events.out.tfevents.1760971968.MSI.20964.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03937d4f749070503d8d4ef4f74618479fb96d0b95ba6fde9ce7dad2ca6c2f77
3
+ size 7497
runs/Oct20_22-31-22_MSI/events.out.tfevents.1760974284.MSI.21632.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e65879cead6274643d3b4b88cb6de13c22406b19b90fd7ef55e5264df39172
3
+ size 7814
test.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import librosa
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
+
5
+ # ----------------------------
6
+ # CONFIG
7
+ # ----------------------------
8
+ CHECKPOINT_PATH = "./wav2vec2-xlsr-khmer-300m/checkpoint-1800"
9
+ AUDIO_PATH = "data\wavs\00000.wav" # <-- change this to your test file
10
+
11
+ # ----------------------------
12
+ # LOAD MODEL AND PROCESSOR
13
+ # ----------------------------
14
+ print("Loading model and processor...")
15
+ processor = Wav2Vec2Processor.from_pretrained(CHECKPOINT_PATH)
16
+ model = Wav2Vec2ForCTC.from_pretrained(CHECKPOINT_PATH)
17
+ model.eval()
18
+
19
+ # ----------------------------
20
+ # LOAD AUDIO
21
+ # ----------------------------
22
+ print("Loading audio:", AUDIO_PATH)
23
+ speech, sr = librosa.load(AUDIO_PATH, sr=16000)
24
+ inputs = processor(
25
+ speech,
26
+ sampling_rate=16000,
27
+ return_tensors="pt",
28
+ padding=True
29
+ )
30
+
31
+ with torch.no_grad():
32
+ logits = model(inputs.input_values).logits
33
+
34
+ predicted_ids = torch.argmax(logits, dim=-1)
35
+ transcription = processor.batch_decode(predicted_ids)[0]
36
+ print("\n===============================")
37
+ print("🔊 Transcription Result:")
38
+ print(transcription)
39
+ print("===============================")
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c640c80817fc43b61eec874b7a1d7aa65b4c3e117ac173e261392ff7ede642bc
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a226b0a6cbb4db927178bbdbe3080a7ef6031ba804aa943211ce2e8160456958
3
  size 5969