AshishNoel14 commited on
Commit
6b18a5f
·
verified ·
1 Parent(s): d213ced

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. wvmos/wv_mos.py +6 -5
wvmos/wv_mos.py CHANGED
@@ -109,11 +109,12 @@ class Wav2Vec2MOS(nn.Module):
109
  # 1. Load Audio (Original 16k)
110
  signal = librosa.load(path, sr=16_000)[0]
111
 
112
- # 2. Sliding Window (10-minute window, 5-minute overlap)
113
- # BENCHMARK RESULT: Win=600s yields 0.00 deviation from Ground Truth!
114
- # This fits in 16GB RAM (~4GB peak) and solves the score inflation issue.
115
- window_size = 16000 * 600 # 10 minutes
116
- stride = 16000 * 300 # 5 minutes
 
117
 
118
  # Prepare windows
119
  chunks = []
 
109
  # 1. Load Audio (Original 16k)
110
  signal = librosa.load(path, sr=16_000)[0]
111
 
112
+ # 2. Sliding Window (5-minute window, 2.5-minute overlap)
113
+ # 600s (10-min) caused runtime failures/OOM on Hugging Face.
114
+ # 300s (5-min) was verified to have 0.09 deviation (within 0.1 tolerance)
115
+ # and is much safer for memory.
116
+ window_size = 16000 * 300 # 5 minutes
117
+ stride = 16000 * 150 # 2.5 minutes
118
 
119
  # Prepare windows
120
  chunks = []