Spaces:
Running
Running
Auto-deploy from GitHub: c58d63fae21b59bebcd6268e0b9ecb36714b289a
Browse files- src/musiclime/wrapper.py +7 -5
- src/utils/dataset.py +9 -9
src/musiclime/wrapper.py
CHANGED
|
@@ -141,12 +141,14 @@ class MusicLIMEPredictor:
|
|
| 141 |
pca_model = joblib.load("models/fusion/pca.pkl")
|
| 142 |
reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
|
| 143 |
|
|
|
|
|
|
|
| 144 |
# Step 5: Apply scaler to PCA-scaled lyrics batch
|
| 145 |
-
print("[MusicLIME] Reapplying scaler to PCA-scaled batch")
|
| 146 |
-
pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
|
| 147 |
-
reduced_lyrics_batch = pca_scaler.transform(
|
| 148 |
-
|
| 149 |
-
) # (batch, 512)
|
| 150 |
|
| 151 |
# Step 6: Concatenate features
|
| 152 |
combined_features_batch = np.concatenate(
|
|
|
|
| 141 |
pca_model = joblib.load("models/fusion/pca.pkl")
|
| 142 |
reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
|
| 143 |
|
| 144 |
+
# NOTE: Scaling after PCA produces underperforming models compared to non-scaling.
|
| 145 |
+
# One can toggle it on for experimentation/testing purposes.
|
| 146 |
# Step 5: Apply scaler to PCA-scaled lyrics batch
|
| 147 |
+
# print("[MusicLIME] Reapplying scaler to PCA-scaled batch")
|
| 148 |
+
# pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
|
| 149 |
+
# reduced_lyrics_batch = pca_scaler.transform(
|
| 150 |
+
# reduced_lyrics_batch
|
| 151 |
+
# ) # (batch, 512)
|
| 152 |
|
| 153 |
# Step 6: Concatenate features
|
| 154 |
combined_features_batch = np.concatenate(
|
src/utils/dataset.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
-
from sklearn.preprocessing import StandardScaler
|
| 2 |
from sklearn.model_selection import train_test_split
|
| 3 |
-
from src.utils.config_loader import AUDIO_SCALER, LYRICS_SCALER
|
| 4 |
from sklearn.decomposition import IncrementalPCA
|
| 5 |
from src.utils.config_loader import PCA_MODEL
|
| 6 |
|
| 7 |
import joblib
|
| 8 |
import numpy as np
|
| 9 |
import logging
|
| 10 |
-
import pandas as pd
|
| 11 |
|
| 12 |
logging.basicConfig(
|
| 13 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
@@ -115,12 +114,13 @@ def scale_pca(data: dict):
|
|
| 115 |
X_test_lyrics = ipca.transform(X_test_lyrics)
|
| 116 |
X_val_lyrics = ipca.transform(X_val_lyrics)
|
| 117 |
|
| 118 |
-
# NOTE: Scaling after PCA produces underperforming models compared to non-scaling.
|
| 119 |
-
#
|
|
|
|
| 120 |
|
| 121 |
-
#X_train_lyrics = pca_lyric_scaler.transform(X_train_lyrics)
|
| 122 |
-
#X_test_lyrics = pca_lyric_scaler.transform(X_test_lyrics)
|
| 123 |
-
#X_val_lyrics = pca_lyric_scaler.transform(X_val_lyrics)
|
| 124 |
|
| 125 |
# Concatenate them back to their original form, but scaled
|
| 126 |
X_train = np.concatenate([X_train_audio, X_train_lyrics], axis=1)
|
|
@@ -129,7 +129,7 @@ def scale_pca(data: dict):
|
|
| 129 |
|
| 130 |
joblib.dump(ipca, PCA_MODEL)
|
| 131 |
# Save the trained scalers for prediction
|
| 132 |
-
joblib.dump(pca_lyric_scaler, PCA_SCALER)
|
| 133 |
|
| 134 |
data = {
|
| 135 |
"train": (X_train, y_train),
|
|
|
|
| 1 |
+
from sklearn.preprocessing import StandardScaler
|
| 2 |
from sklearn.model_selection import train_test_split
|
| 3 |
+
from src.utils.config_loader import AUDIO_SCALER, LYRICS_SCALER #, PCA_SCALER
|
| 4 |
from sklearn.decomposition import IncrementalPCA
|
| 5 |
from src.utils.config_loader import PCA_MODEL
|
| 6 |
|
| 7 |
import joblib
|
| 8 |
import numpy as np
|
| 9 |
import logging
|
|
|
|
| 10 |
|
| 11 |
logging.basicConfig(
|
| 12 |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
|
|
| 114 |
X_test_lyrics = ipca.transform(X_test_lyrics)
|
| 115 |
X_val_lyrics = ipca.transform(X_val_lyrics)
|
| 116 |
|
| 117 |
+
# NOTE: Scaling after PCA produces underperforming models compared to non-scaling.
|
| 118 |
+
# One can toggle it on for experimentation/testing purposes.
|
| 119 |
+
# pca_lyric_scaler = StandardScaler().fit(X_train_lyrics)
|
| 120 |
|
| 121 |
+
# X_train_lyrics = pca_lyric_scaler.transform(X_train_lyrics)
|
| 122 |
+
# X_test_lyrics = pca_lyric_scaler.transform(X_test_lyrics)
|
| 123 |
+
# X_val_lyrics = pca_lyric_scaler.transform(X_val_lyrics)
|
| 124 |
|
| 125 |
# Concatenate them back to their original form, but scaled
|
| 126 |
X_train = np.concatenate([X_train_audio, X_train_lyrics], axis=1)
|
|
|
|
| 129 |
|
| 130 |
joblib.dump(ipca, PCA_MODEL)
|
| 131 |
# Save the trained scalers for prediction
|
| 132 |
+
# joblib.dump(pca_lyric_scaler, PCA_SCALER)
|
| 133 |
|
| 134 |
data = {
|
| 135 |
"train": (X_train, y_train),
|