krislette commited on
Commit
61f21af
·
1 Parent(s): 6530321

Auto-deploy from GitHub: 7c591156b27da3e33cf2a35fbb1d3fdf593c7e3f

Browse files
Dockerfile CHANGED
@@ -51,6 +51,7 @@ ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
51
  ENV NUMBA_DISABLE_JIT=0
52
  ENV MUSICLIME_NUM_SAMPLES=1000
53
  ENV MUSICLIME_NUM_FEATURES=10
 
54
 
55
  # Hugging Face Spaces specific, expose port 7860
56
  EXPOSE 7860
 
51
  ENV NUMBA_DISABLE_JIT=0
52
  ENV MUSICLIME_NUM_SAMPLES=1000
53
  ENV MUSICLIME_NUM_FEATURES=10
54
+ ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
55
 
56
  # Hugging Face Spaces specific, expose port 7860
57
  EXPOSE 7860
src/musiclime/wrapper.py CHANGED
@@ -1,6 +1,7 @@
1
  import time
2
  import joblib
3
  import numpy as np
 
4
 
5
  from src.preprocessing.preprocessor import single_preprocessing
6
  from src.spectttra.spectttra_trainer import spectttra_train
@@ -86,7 +87,12 @@ class MusicLIMEPredictor:
86
  # Step 2: Batch feature extraction
87
  start_time = time.time()
88
  print("[MusicLIME] Extracting audio features (batch)...")
89
- audio_features_batch = spectttra_train(processed_audios) # (batch, 384)
 
 
 
 
 
90
  audio_time = time.time() - start_time
91
  print(
92
  green_bold(
@@ -99,6 +105,11 @@ class MusicLIMEPredictor:
99
  lyrics_features_batch = l2vec_train(
100
  self.llm2vec_model, processed_lyrics
101
  ) # (batch, 2048)
 
 
 
 
 
102
  lyrics_time = time.time() - start_time
103
  print(
104
  green_bold(
 
1
  import time
2
  import joblib
3
  import numpy as np
4
+ import torch
5
 
6
  from src.preprocessing.preprocessor import single_preprocessing
7
  from src.spectttra.spectttra_trainer import spectttra_train
 
87
  # Step 2: Batch feature extraction
88
  start_time = time.time()
89
  print("[MusicLIME] Extracting audio features (batch)...")
90
+ audio_features_batch = spectttra_train(processed_audios)
91
+
92
+ # Clear GPU cache after audio processing
93
+ if torch.cuda.is_available():
94
+ torch.cuda.empty_cache()
95
+
96
  audio_time = time.time() - start_time
97
  print(
98
  green_bold(
 
105
  lyrics_features_batch = l2vec_train(
106
  self.llm2vec_model, processed_lyrics
107
  ) # (batch, 2048)
108
+
109
+ # Clear GPU cache after lyrics processing
110
+ if torch.cuda.is_available():
111
+ torch.cuda.empty_cache()
112
+
113
  lyrics_time = time.time() - start_time
114
  print(
115
  green_bold(
src/spectttra/spectttra_trainer.py CHANGED
@@ -166,35 +166,50 @@ def spectttra_train(audio_tensors):
166
  model = _MODEL
167
  device = _DEVICE
168
 
169
- # Refactors the loop to be a much faster single-batch operation
170
- try:
171
- waveforms_batch = torch.cat(audio_tensors, dim=0).to(
172
- device, dtype=torch.float32
173
- )
174
- except Exception as e:
175
  print(
176
- f"[INFO] Error during tensor concatenation, falling back to loop. Fix preprocessing for speed. Error: {e}"
177
  )
178
- batch_list = [spectttra_predict(w) for w in audio_tensors]
179
- return np.array(batch_list)
180
 
181
- with torch.no_grad():
182
- melspec = feat_ext(waveforms_batch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- # Ensure melspec shape matches model's expectation
185
- expected_frames = model.input_temp_dim # expected_frames is 3744
186
- if melspec.shape[2] > expected_frames:
187
- melspec = melspec[:, :, :expected_frames]
188
- elif melspec.shape[2] < expected_frames:
189
- padding = expected_frames - melspec.shape[2]
190
- melspec = torch.nn.functional.pad(melspec, (0, padding))
191
 
 
192
  if device.type == "cuda":
193
- with torch.cuda.amp.autocast(enabled=True):
194
- tokens = model(melspec)
195
- pooled = tokens.mean(dim=1)
196
- else:
197
- tokens = model(melspec)
198
- pooled = tokens.mean(dim=1)
199
 
200
- return pooled.cpu().numpy()
 
166
  model = _MODEL
167
  device = _DEVICE
168
 
169
+ # Chunk processing: Process in smaller batches
170
+ chunk_size = 50
171
+ all_embeddings = []
172
+
173
+ for i in range(0, len(audio_tensors), chunk_size):
174
+ chunk = audio_tensors[i : i + chunk_size]
175
  print(
176
+ f"[INFO] Processing chunk {i//chunk_size + 1}/{(len(audio_tensors)-1)//chunk_size + 1} ({len(chunk)} samples)"
177
  )
 
 
178
 
179
+ try:
180
+ waveforms_batch = torch.cat(chunk, dim=0).to(device).float()
181
+ except Exception as e:
182
+ print(
183
+ f"[INFO] Error during tensor concatenation, falling back to loop. Error: {e}"
184
+ )
185
+ batch_list = [spectttra_predict(w) for w in chunk]
186
+ all_embeddings.extend(batch_list)
187
+ continue
188
+
189
+ with torch.no_grad():
190
+ melspec = feat_ext(waveforms_batch)
191
+
192
+ # Ensure melspec shape matches model's expectation
193
+ expected_frames = model.input_temp_dim
194
+ if melspec.shape[2] > expected_frames:
195
+ melspec = melspec[:, :, :expected_frames]
196
+ elif melspec.shape[2] < expected_frames:
197
+ padding = expected_frames - melspec.shape[2]
198
+ melspec = torch.nn.functional.pad(melspec, (0, padding))
199
+
200
+ if device.type == "cuda":
201
+ with torch.cuda.amp.autocast(enabled=True):
202
+ tokens = model(melspec)
203
+ pooled = tokens.mean(dim=1)
204
+ else:
205
+ tokens = model(melspec)
206
+ pooled = tokens.mean(dim=1)
207
 
208
+ chunk_embeddings = pooled.cpu().numpy()
209
+ all_embeddings.append(chunk_embeddings)
 
 
 
 
 
210
 
211
+ # Clear GPU cache after each chunk
212
  if device.type == "cuda":
213
+ torch.cuda.empty_cache()
 
 
 
 
 
214
 
215
+ return np.vstack(all_embeddings)