krislette commited on
Commit
91f3c16
·
1 Parent(s): 2b37a16

Auto-deploy from GitHub: f3f5c5a060663701fed0a46fe5870df177796716

Browse files
scripts/explain.py CHANGED
@@ -26,12 +26,6 @@ def musiclime(audio_data, lyrics_text):
26
  explainer = MusicLIMEExplainer()
27
  predictor = MusicLIMEPredictor()
28
 
29
- # Truncate raw audio to 2 minutes before any processing
30
- target_samples = int(2 * 60 * 22050)
31
- if len(audio_data) > target_samples:
32
- # Keep first 2 minutes
33
- audio_data = audio_data[:target_samples]
34
-
35
  # Then generate explanations
36
  explanation = explainer.explain_instance(
37
  audio=audio_data,
 
26
  explainer = MusicLIMEExplainer()
27
  predictor = MusicLIMEPredictor()
28
 
 
 
 
 
 
 
29
  # Then generate explanations
30
  explanation = explainer.explain_instance(
31
  audio=audio_data,
scripts/explain_runner.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ from scripts.explain import musiclime
3
+
4
+ # Load test audio and lyrics
5
+ audio_path = "data/external/sample_1.mp3"
6
+ lyrics_path = "data/external/sample_1.txt"
7
+
8
+ # Load audio
9
+ audio_data, sr = librosa.load(audio_path)
10
+
11
+ # Load lyrics
12
+ with open(lyrics_path, "r", encoding="utf-8") as f:
13
+ lyrics_text = f.read()
14
+
15
+ print("Running MusicLIME explanation...")
16
+ result = musiclime(audio_data, lyrics_text)
17
+
18
+ print("\n=== EXPLANATION RESULTS ===")
19
+ print(
20
+ f"Prediction: {result['prediction']['class_name']} ({result['prediction']['confidence']:.3f})"
21
+ )
22
+ print(f"Runtime: {result['summary']['runtime_seconds']:.2f}s")
23
+
24
+ print("\n=== TOP FEATURES (by absolute importance) ===")
25
+ for feature in result["explanations"]:
26
+ print(
27
+ f"Rank {feature['rank']}: {feature['modality']} | Weight: {feature['weight']:.4f} | Importance: {feature['importance']:.4f}"
28
+ )
29
+ print(f" Feature: {feature['feature_text'][:80]}...")
30
+ print()
scripts/predict.py CHANGED
@@ -37,13 +37,9 @@ def predict_pipeline(audio_file, lyrics):
37
  # 2.) Preprocess both audio and lyrics
38
  audio, lyrics = single_preprocessing(audio_file, lyrics)
39
 
40
- # Truncate to 2 minutes to match explain pipeline
41
- target_samples = int(2 * 60 * 22050)
42
- if len(audio) > target_samples:
43
- audio = audio[:target_samples]
44
-
45
  # 3.) Call the train method for both models
46
  audio_features = spectttra_predict(audio)
 
47
  lyrics_features = l2vec_single_train(llm2vec_model, lyrics)
48
 
49
  # 4.) Scale the vectors using Z-Score
@@ -52,10 +48,6 @@ def predict_pipeline(audio_file, lyrics):
52
  # 5.) Reduce the lyrics using saved PCA model
53
  reduced_lyrics = load_pca_model(lyrics_features)
54
 
55
- # 6.) Apply PCA scaler to PCA-reduced lyrics
56
- pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
57
- reduced_lyrics = pca_scaler.transform(reduced_lyrics)
58
-
59
  # 6.) Concatenate the vectors of audio_features + lyrics_features
60
  results = np.concatenate([audio_features, reduced_lyrics], axis=1)
61
 
 
37
  # 2.) Preprocess both audio and lyrics
38
  audio, lyrics = single_preprocessing(audio_file, lyrics)
39
 
 
 
 
 
 
40
  # 3.) Call the train method for both models
41
  audio_features = spectttra_predict(audio)
42
+ audio_features = audio_features.reshape(1, -1)
43
  lyrics_features = l2vec_single_train(llm2vec_model, lyrics)
44
 
45
  # 4.) Scale the vectors using Z-Score
 
48
  # 5.) Reduce the lyrics using saved PCA model
49
  reduced_lyrics = load_pca_model(lyrics_features)
50
 
 
 
 
 
51
  # 6.) Concatenate the vectors of audio_features + lyrics_features
52
  results = np.concatenate([audio_features, reduced_lyrics], axis=1)
53
 
scripts/predict_runner.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ from scripts.predict import predict_pipeline
3
+
4
+ # Load test audio and lyrics
5
+ audio_path = "data/external/sample_1.mp3"
6
+ lyrics_path = "data/external/sample_1.txt"
7
+
8
+ # Load audio
9
+ audio_data, sr = librosa.load(audio_path)
10
+
11
+ # Load lyrics
12
+ with open(lyrics_path, "r", encoding="utf-8") as f:
13
+ lyrics_text = f.read()
14
+
15
+ print("Running prediction pipeline...")
16
+ prediction = predict_pipeline(audio_data, lyrics_text)
17
+
18
+ print(f"\n=== PREDICTION RESULT ===")
19
+ print(f"Prediction: {prediction}")
src/models/mlp.py CHANGED
@@ -438,13 +438,11 @@ class MLPClassifier:
438
 
439
  probabilities = np.array(probabilities).flatten()
440
  # Threshold at 0.5
441
- predictions = (probabilities > 0.5).astype(int)
442
 
443
  return probabilities, predictions
444
 
445
- def predict_single(
446
- self, features: np.ndarray, temperature: float = 2.5
447
- ) -> Tuple[float, int, str]:
448
  """
449
  Predict whether a single song is AI-generated or human-composed.
450
 
@@ -488,17 +486,17 @@ class MLPClassifier:
488
  self.model.eval()
489
  with torch.no_grad():
490
  features_tensor = torch.FloatTensor(features).to(self.device)
491
- outputs = self.model(features_tensor)
492
- probabilities = outputs.item() # Just use raw output
 
493
 
494
  # Extract single results
495
- prediction = int(probabilities >= 0.5)
496
  label = "Human-Composed" if prediction == 1 else "AI-Generated"
497
- probability = (
498
- probabilities * 100 if prediction == 1 else (1 - probabilities) * 100
499
- )
500
 
501
- return probability, prediction, label
 
 
502
 
503
  def predict_batch(self, features: np.ndarray, return_details: bool = False) -> Dict:
504
  """
 
438
 
439
  probabilities = np.array(probabilities).flatten()
440
  # Threshold at 0.5
441
+ predictions = (probabilities >= 0.5).astype(int)
442
 
443
  return probabilities, predictions
444
 
445
+ def predict_single(self, features: np.ndarray) -> Tuple[float, int, str]:
 
 
446
  """
447
  Predict whether a single song is AI-generated or human-composed.
448
 
 
486
  self.model.eval()
487
  with torch.no_grad():
488
  features_tensor = torch.FloatTensor(features).to(self.device)
489
+ probability = self.model(features_tensor).item()
490
+
491
+ probability = np.clip(probability, 0.0, 1.0)
492
 
493
  # Extract single results
494
+ prediction = int(probability >= 0.5)
495
  label = "Human-Composed" if prediction == 1 else "AI-Generated"
 
 
 
496
 
497
+ confidence = probability * 100 if prediction == 1 else (1 - probability) * 100
498
+
499
+ return confidence, prediction, label
500
 
501
  def predict_batch(self, features: np.ndarray, return_details: bool = False) -> Dict:
502
  """
src/musiclime/explainer.py CHANGED
@@ -84,7 +84,7 @@ class MusicLIMEExplainer:
84
  # These are for debugging only I have to see THAT progress
85
  print("[MusicLIME] Starting MusicLIME explanation...")
86
  print(
87
- f"[MusicLIME] Audio length: {len(audio)/44100:.1f}s, Temporal segments: {temporal_segments}"
88
  )
89
  print(f"[MusicLIME] Lyrics lines: {len(lyrics.split(chr(10)))}")
90
 
@@ -214,7 +214,6 @@ class MusicLIMEExplainer:
214
  # Get predictions
215
  print(f"[MusicLIME] Getting predictions for {len(texts)} samples...")
216
  predictions = predict_fn(texts, audios)
217
- prediction_time = time.time() - start_time
218
 
219
  # Show the original prediction (first row is always the unperturbed original)
220
  original_prediction = predictions[0]
 
84
  # These are for debugging only I have to see THAT progress
85
  print("[MusicLIME] Starting MusicLIME explanation...")
86
  print(
87
+ f"[MusicLIME] Audio length: {len(audio)/22050:.1f}s, Temporal segments: {temporal_segments}"
88
  )
89
  print(f"[MusicLIME] Lyrics lines: {len(lyrics.split(chr(10)))}")
90
 
 
214
  # Get predictions
215
  print(f"[MusicLIME] Getting predictions for {len(texts)} samples...")
216
  predictions = predict_fn(texts, audios)
 
217
 
218
  # Show the original prediction (first row is always the unperturbed original)
219
  original_prediction = predictions[0]
src/musiclime/wrapper.py CHANGED
@@ -71,7 +71,7 @@ class MusicLIMEPredictor:
71
  processed_audios = []
72
  processed_lyrics = []
73
 
74
- for i, (text, audio) in enumerate(zip(texts, audios)):
75
  processed_audio, processed_lyric = single_preprocessing(audio, text)
76
  processed_audios.append(processed_audio)
77
  processed_lyrics.append(processed_lyric)
@@ -127,21 +127,14 @@ class MusicLIMEPredictor:
127
  pca_model = joblib.load("models/fusion/pca.pkl")
128
  reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
129
 
130
- # Step 5: Apply scaler to PCA-scaled lyrics batch
131
- print("[MusicLIME] Reapplying scaler to PCA-scaled batch")
132
- pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
133
- reduced_lyrics_batch = pca_scaler.transform(
134
- reduced_lyrics_batch
135
- ) # (batch, 512)
136
-
137
- # Step 6: Concatenate features
138
  combined_features_batch = np.concatenate(
139
  [scaled_audio_batch, reduced_lyrics_batch], axis=1
140
  ) # (batch, sum of lyrics & audio vector dims)
141
  scaling_time = time.time() - start_time
142
  print(green_bold(f"[MusicLIME] Scaling completed in {scaling_time:.2f}s"))
143
 
144
- # Step 7: Batch MLP prediction
145
  start_time = time.time()
146
  print("[MusicLIME] Running MLP predictions (batch)...")
147
  if self.classifier is None:
 
71
  processed_audios = []
72
  processed_lyrics = []
73
 
74
+ for _, (text, audio) in enumerate(zip(texts, audios)):
75
  processed_audio, processed_lyric = single_preprocessing(audio, text)
76
  processed_audios.append(processed_audio)
77
  processed_lyrics.append(processed_lyric)
 
127
  pca_model = joblib.load("models/fusion/pca.pkl")
128
  reduced_lyrics_batch = pca_model.transform(scaled_lyrics_batch) # (batch, 512)
129
 
130
+ # Step 5: Concatenate features
 
 
 
 
 
 
 
131
  combined_features_batch = np.concatenate(
132
  [scaled_audio_batch, reduced_lyrics_batch], axis=1
133
  ) # (batch, sum of lyrics & audio vector dims)
134
  scaling_time = time.time() - start_time
135
  print(green_bold(f"[MusicLIME] Scaling completed in {scaling_time:.2f}s"))
136
 
137
+ # Step 6: Batch MLP prediction
138
  start_time = time.time()
139
  print("[MusicLIME] Running MLP predictions (batch)...")
140
  if self.classifier is None:
src/utils/dataset.py CHANGED
@@ -9,7 +9,9 @@ import numpy as np
9
  import logging
10
  import pandas as pd
11
 
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
 
@@ -51,18 +53,20 @@ def dataset_splitter(X: np.ndarray, Y: np.ndarray, ids: np.ndarray = None):
51
  X_train, y_train, test_size=0.2222, random_state=42, stratify=y_train
52
  )
53
 
54
- logger.info(f"Train: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}")
 
 
55
 
56
  data = {
57
  "train": (X_train, y_train),
58
- "val": (X_val, y_val),
59
- "test": (X_test, y_test),
60
  }
61
 
62
  return data
63
 
64
 
65
- def scale_pca(data : dict):
66
  """
67
  Script that scales the splits, and applies PCA to the lyrics vector.
68
 
@@ -79,8 +83,8 @@ def scale_pca(data : dict):
79
 
80
  # Destructure the dictionary to get data split
81
  X_train, y_train = data["train"]
82
- X_val, y_val = data["val"]
83
- X_test, y_test = data["test"]
84
 
85
  # Segment the concatenated embedding to audio and lyrics
86
  X_train_audio, X_train_lyrics = X_train[:, :384], X_train[:, 384:]
@@ -104,7 +108,7 @@ def scale_pca(data : dict):
104
  batch_size = 1000
105
 
106
  for i in range(0, X_train_lyrics.shape[0], batch_size):
107
- ipca.partial_fit(X_train_lyrics[i:i + batch_size])
108
 
109
  # Transform in batches
110
  X_train_lyrics = ipca.transform(X_train_lyrics)
@@ -136,7 +140,7 @@ def scale_pca(data : dict):
136
  return data
137
 
138
 
139
- def scale_pca_lyrics(data : dict):
140
  """
141
  Script that scales the splits, and applies PCA to the lyrics vector.
142
 
@@ -153,8 +157,8 @@ def scale_pca_lyrics(data : dict):
153
 
154
  # Destructure the dictionary to get data split
155
  X_train, y_train = data["train"]
156
- X_val, y_val = data["val"]
157
- X_test, y_test = data["test"]
158
 
159
  lyric_scaler = StandardScaler().fit(X_train)
160
  joblib.dump(lyric_scaler, LYRICS_SCALER)
@@ -168,7 +172,7 @@ def scale_pca_lyrics(data : dict):
168
  batch_size = 1000
169
 
170
  for i in range(0, X_train.shape[0], batch_size):
171
- ipca.partial_fit(X_train[i:i + batch_size])
172
 
173
  # Transform in batches
174
  X_train = ipca.transform(X_train)
@@ -186,7 +190,7 @@ def scale_pca_lyrics(data : dict):
186
  return data
187
 
188
 
189
- def scale(data : dict):
190
  """
191
  Script that scales the splits, and applies PCA to the lyrics vector.
192
 
@@ -203,8 +207,8 @@ def scale(data : dict):
203
 
204
  # Destructure the dictionary to get data split
205
  X_train, y_train = data["train"]
206
- X_val, y_val = data["val"]
207
- X_test, y_test = data["test"]
208
 
209
  audio_scaler = StandardScaler(with_mean=False).fit(X_train)
210
  joblib.dump(audio_scaler, AUDIO_SCALER)
@@ -222,6 +226,7 @@ def scale(data : dict):
222
 
223
  return data
224
 
 
225
  def dataset_scaler(audio: np.ndarray, lyrics: np.ndarray):
226
  """
227
  Method to scale both audio and lyric vectors using Z-Score.
@@ -279,7 +284,7 @@ def instance_scaler(audio: np.ndarray, lyrics: np.ndarray):
279
  audio_scaler = joblib.load(AUDIO_SCALER)
280
  lyric_scaler = joblib.load(LYRICS_SCALER)
281
 
282
- scaled_audio = audio_scaler.transform([audio])
283
  scaled_lyric = lyric_scaler.transform(lyrics)
284
 
285
- return scaled_audio, scaled_lyric
 
9
  import logging
10
  import pandas as pd
11
 
12
+ logging.basicConfig(
13
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
14
+ )
15
  logger = logging.getLogger(__name__)
16
 
17
 
 
53
  X_train, y_train, test_size=0.2222, random_state=42, stratify=y_train
54
  )
55
 
56
+ logger.info(
57
+ f"Train: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}"
58
+ )
59
 
60
  data = {
61
  "train": (X_train, y_train),
62
+ "val": (X_val, y_val),
63
+ "test": (X_test, y_test),
64
  }
65
 
66
  return data
67
 
68
 
69
+ def scale_pca(data: dict):
70
  """
71
  Script that scales the splits, and applies PCA to the lyrics vector.
72
 
 
83
 
84
  # Destructure the dictionary to get data split
85
  X_train, y_train = data["train"]
86
+ X_val, y_val = data["val"]
87
+ X_test, y_test = data["test"]
88
 
89
  # Segment the concatenated embedding to audio and lyrics
90
  X_train_audio, X_train_lyrics = X_train[:, :384], X_train[:, 384:]
 
108
  batch_size = 1000
109
 
110
  for i in range(0, X_train_lyrics.shape[0], batch_size):
111
+ ipca.partial_fit(X_train_lyrics[i : i + batch_size])
112
 
113
  # Transform in batches
114
  X_train_lyrics = ipca.transform(X_train_lyrics)
 
140
  return data
141
 
142
 
143
+ def scale_pca_lyrics(data: dict):
144
  """
145
  Script that scales the splits, and applies PCA to the lyrics vector.
146
 
 
157
 
158
  # Destructure the dictionary to get data split
159
  X_train, y_train = data["train"]
160
+ X_val, y_val = data["val"]
161
+ X_test, y_test = data["test"]
162
 
163
  lyric_scaler = StandardScaler().fit(X_train)
164
  joblib.dump(lyric_scaler, LYRICS_SCALER)
 
172
  batch_size = 1000
173
 
174
  for i in range(0, X_train.shape[0], batch_size):
175
+ ipca.partial_fit(X_train[i : i + batch_size])
176
 
177
  # Transform in batches
178
  X_train = ipca.transform(X_train)
 
190
  return data
191
 
192
 
193
+ def scale(data: dict):
194
  """
195
  Script that scales the splits, and applies PCA to the lyrics vector.
196
 
 
207
 
208
  # Destructure the dictionary to get data split
209
  X_train, y_train = data["train"]
210
+ X_val, y_val = data["val"]
211
+ X_test, y_test = data["test"]
212
 
213
  audio_scaler = StandardScaler(with_mean=False).fit(X_train)
214
  joblib.dump(audio_scaler, AUDIO_SCALER)
 
226
 
227
  return data
228
 
229
+
230
  def dataset_scaler(audio: np.ndarray, lyrics: np.ndarray):
231
  """
232
  Method to scale both audio and lyric vectors using Z-Score.
 
284
  audio_scaler = joblib.load(AUDIO_SCALER)
285
  lyric_scaler = joblib.load(LYRICS_SCALER)
286
 
287
+ scaled_audio = audio_scaler.transform(audio)
288
  scaled_lyric = lyric_scaler.transform(lyrics)
289
 
290
+ return scaled_audio, scaled_lyric