krislette commited on
Commit
9444dd6
·
1 Parent(s): 11a030b

Auto-deploy from GitHub: c58d63fae21b59bebcd6268e0b9ecb36714b289a

Browse files
Files changed (2) hide show
  1. src/musiclime/wrapper.py +7 -5
  2. src/utils/dataset.py +9 -9
src/musiclime/wrapper.py CHANGED
@@ -141,12 +141,14 @@ class MusicLIMEPredictor:
141
  pca_model = joblib.load("models/fusion/pca.pkl")
142
  reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
143
 
 
 
144
  # Step 5: Apply scaler to PCA-scaled lyrics batch
145
- print("[MusicLIME] Reapplying scaler to PCA-scaled batch")
146
- pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
147
- reduced_lyrics_batch = pca_scaler.transform(
148
- reduced_lyrics_batch
149
- ) # (batch, 512)
150
 
151
  # Step 6: Concatenate features
152
  combined_features_batch = np.concatenate(
 
141
  pca_model = joblib.load("models/fusion/pca.pkl")
142
  reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
143
 
144
+ # NOTE: Scaling after PCA produces underperforming models compared to non-scaling.
145
+ # One can toggle it on for experimentation/testing purposes.
146
  # Step 5: Apply scaler to PCA-scaled lyrics batch
147
+ # print("[MusicLIME] Reapplying scaler to PCA-scaled batch")
148
+ # pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
149
+ # reduced_lyrics_batch = pca_scaler.transform(
150
+ # reduced_lyrics_batch
151
+ # ) # (batch, 512)
152
 
153
  # Step 6: Concatenate features
154
  combined_features_batch = np.concatenate(
src/utils/dataset.py CHANGED
@@ -1,13 +1,12 @@
1
- from sklearn.preprocessing import StandardScaler, MinMaxScaler
2
  from sklearn.model_selection import train_test_split
3
- from src.utils.config_loader import AUDIO_SCALER, LYRICS_SCALER, PCA_SCALER
4
  from sklearn.decomposition import IncrementalPCA
5
  from src.utils.config_loader import PCA_MODEL
6
 
7
  import joblib
8
  import numpy as np
9
  import logging
10
- import pandas as pd
11
 
12
  logging.basicConfig(
13
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -115,12 +114,13 @@ def scale_pca(data: dict):
115
  X_test_lyrics = ipca.transform(X_test_lyrics)
116
  X_val_lyrics = ipca.transform(X_val_lyrics)
117
 
118
- # NOTE: Scaling after PCA produces underperforming models compared to non-scaling. One can toggle it on for experimentation/testing purposes.
119
- #pca_lyric_scaler = StandardScaler().fit(X_train_lyrics)
 
120
 
121
- #X_train_lyrics = pca_lyric_scaler.transform(X_train_lyrics)
122
- #X_test_lyrics = pca_lyric_scaler.transform(X_test_lyrics)
123
- #X_val_lyrics = pca_lyric_scaler.transform(X_val_lyrics)
124
 
125
  # Concatenate them back to their original form, but scaled
126
  X_train = np.concatenate([X_train_audio, X_train_lyrics], axis=1)
@@ -129,7 +129,7 @@ def scale_pca(data: dict):
129
 
130
  joblib.dump(ipca, PCA_MODEL)
131
  # Save the trained scalers for prediction
132
- joblib.dump(pca_lyric_scaler, PCA_SCALER)
133
 
134
  data = {
135
  "train": (X_train, y_train),
 
1
+ from sklearn.preprocessing import StandardScaler
2
  from sklearn.model_selection import train_test_split
3
+ from src.utils.config_loader import AUDIO_SCALER, LYRICS_SCALER #, PCA_SCALER
4
  from sklearn.decomposition import IncrementalPCA
5
  from src.utils.config_loader import PCA_MODEL
6
 
7
  import joblib
8
  import numpy as np
9
  import logging
 
10
 
11
  logging.basicConfig(
12
  level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 
114
  X_test_lyrics = ipca.transform(X_test_lyrics)
115
  X_val_lyrics = ipca.transform(X_val_lyrics)
116
 
117
+ # NOTE: Scaling after PCA produces underperforming models compared to non-scaling.
118
+ # One can toggle it on for experimentation/testing purposes.
119
+ # pca_lyric_scaler = StandardScaler().fit(X_train_lyrics)
120
 
121
+ # X_train_lyrics = pca_lyric_scaler.transform(X_train_lyrics)
122
+ # X_test_lyrics = pca_lyric_scaler.transform(X_test_lyrics)
123
+ # X_val_lyrics = pca_lyric_scaler.transform(X_val_lyrics)
124
 
125
  # Concatenate them back to their original form, but scaled
126
  X_train = np.concatenate([X_train_audio, X_train_lyrics], axis=1)
 
129
 
130
  joblib.dump(ipca, PCA_MODEL)
131
  # Save the trained scalers for prediction
132
+ # joblib.dump(pca_lyric_scaler, PCA_SCALER)
133
 
134
  data = {
135
  "train": (X_train, y_train),