krislette commited on
Commit
c84f2c4
·
1 Parent(s): c51ad28

Auto-deploy from GitHub: cb4a769f21149a39309d7602af027f4cc33f773b

Browse files
app/server.py CHANGED
@@ -14,7 +14,7 @@ from app.utils import load_server_config, load_model_config, download_youtube_au
14
 
15
  # Model/XAI-related imports
16
  from scripts.explain import musiclime
17
- from scripts.predict import predict_pipeline
18
 
19
  # Other imports
20
  import io
@@ -148,7 +148,7 @@ async def predict_music(
148
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
149
 
150
  # Call MLP predict runner script
151
- results = predict_pipeline(audio_data, lyrics)
152
 
153
  return PredictionResponse(
154
  status="success",
 
14
 
15
  # Model/XAI-related imports
16
  from scripts.explain import musiclime
17
+ from scripts.predict import predict_multimodal
18
 
19
  # Other imports
20
  import io
 
148
  raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
149
 
150
  # Call MLP predict runner script
151
+ results = predict_multimodal(audio_data, lyrics)
152
 
153
  return PredictionResponse(
154
  status="success",
scripts/predict.py CHANGED
@@ -1,16 +1,15 @@
1
- from src.preprocessing.preprocessor import single_preprocessing
2
  from src.spectttra.spectttra_trainer import spectttra_predict
3
  from src.llm2vectrain.model import load_llm2vec_model
4
  from src.llm2vectrain.llm2vec_trainer import l2vec_single_train, load_pca_model
5
  from src.models.mlp import build_mlp, load_config
6
- from src.utils.dataset import instance_scaler
7
 
8
- import joblib
9
  import numpy as np
10
  import pandas as pd
11
 
12
 
13
- def predict_pipeline(audio_file, lyrics):
14
  """
15
  Predict script which includes preprocessing, feature extraction, and
16
  training the MLP model for a single data sample.
@@ -73,6 +72,57 @@ def predict_pipeline(audio_file, lyrics):
73
  }
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  if __name__ == "__main__":
77
  # Example usage (replace with real inputs, place song inside data/raw.)
78
  data = pd.read_csv("data/raw/predict_data_final.csv")
@@ -80,7 +130,7 @@ if __name__ == "__main__":
80
  result = []
81
  label = []
82
  for row in data.itertuples():
83
- prediction = predict_pipeline(row.song, row.lyrics)
84
  result.append(
85
  {
86
  "song": row.song,
 
1
+ from src.preprocessing.preprocessor import single_preprocessing, single_audio_preprocessing
2
  from src.spectttra.spectttra_trainer import spectttra_predict
3
  from src.llm2vectrain.model import load_llm2vec_model
4
  from src.llm2vectrain.llm2vec_trainer import l2vec_single_train, load_pca_model
5
  from src.models.mlp import build_mlp, load_config
6
+ from src.utils.dataset import instance_scaler, audio_instance_scaler
7
 
 
8
  import numpy as np
9
  import pandas as pd
10
 
11
 
12
+ def predict_multimodal(audio_file, lyrics):
13
  """
14
  Predict script which includes preprocessing, feature extraction, and
15
  training the MLP model for a single data sample.
 
72
  }
73
 
74
 
75
+ def predict_unimodal(audio_file):
76
+ """
77
+ Predict script of AUDIO only which includes preprocessing, feature extraction, and
78
+ training the MLP model for a single data sample.
79
+
80
+ Parameters
81
+ ----------
82
+ audio : audio_object
83
+ Audio object file
84
+
85
+ Returns
86
+ -------
87
+ prediction : str
88
+ A string result of the prediction
89
+
90
+ label : int
91
+ A numerical representation of the prediction
92
+ """
93
+
94
+ # 1.) Preprocess the audio
95
+ audio = single_audio_preprocessing(audio_file)
96
+
97
+ # 2.) Call the inference method from SpecTTTra
98
+ audio_features = spectttra_predict(audio)
99
+ audio_features = audio_features.reshape(1, -1)
100
+
101
+ # 4.) Scale the vector using Z-Score
102
+ audio_features = audio_instance_scaler(audio_features)
103
+
104
+ # 5.) Load MLP Classifier
105
+ config = load_config("config/model_config.yml")
106
+ classifier = build_mlp(input_dim=audio_features.shape[1], config=config)
107
+
108
+ # 6.) Load trained weights
109
+ model_path = "models/spectttra/mlp_best.pth"
110
+ classifier.load_model(model_path)
111
+ classifier.model.eval()
112
+
113
+ # 8.) Run prediction
114
+ confidence, prediction, label, probability = classifier.predict_single(
115
+ audio_features.flatten()
116
+ )
117
+
118
+ return {
119
+ "confidence": confidence,
120
+ "prediction": prediction,
121
+ "label": label,
122
+ "probability": probability,
123
+ }
124
+
125
+
126
  if __name__ == "__main__":
127
  # Example usage (replace with real inputs, place song inside data/raw.)
128
  data = pd.read_csv("data/raw/predict_data_final.csv")
 
130
  result = []
131
  label = []
132
  for row in data.itertuples():
133
+ prediction = predict_multimodal(row.song, row.lyrics)
134
  result.append(
135
  {
136
  "song": row.song,
scripts/predict_runner.py CHANGED
@@ -1,19 +1,41 @@
1
  import librosa
2
- from scripts.predict import predict_pipeline
3
 
4
- # Load test audio and lyrics
5
- audio_path = "data/external/sample_1.mp3"
6
- lyrics_path = "data/external/sample_1.txt"
7
 
8
- # Load audio
9
- audio_data, sr = librosa.load(audio_path)
 
 
10
 
11
- # Load lyrics
12
- with open(lyrics_path, "r", encoding="utf-8") as f:
13
- lyrics_text = f.read()
14
 
15
- print("Running prediction pipeline...")
16
- prediction = predict_pipeline(audio_data, lyrics_text)
 
17
 
18
- print(f"\n=== PREDICTION RESULT ===")
19
- print(f"Prediction: {prediction}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import librosa
2
+ from scripts.predict import predict_multimodal, predict_unimodal
3
 
 
 
 
4
 
5
+ def predict_multimodal_runner(sample: str):
6
+ # Load test audio and lyrics
7
+ audio_path = f"data/external/{sample}.mp3"
8
+ lyrics_path = f"data/external/{sample}.txt"
9
 
10
+ # Load audio
11
+ audio_data, sr = librosa.load(audio_path)
 
12
 
13
+ # Load lyrics
14
+ with open(lyrics_path, "r", encoding="utf-8") as f:
15
+ lyrics_text = f.read()
16
 
17
+ print("Running prediction pipeline...")
18
+ prediction = predict_multimodal(audio_data, lyrics_text)
19
+
20
+ print(f"\n=== PREDICTION RESULT ===")
21
+ print(f"Prediction: {prediction}")
22
+
23
+
24
+ def predict_unimodal_runner(sample: str):
25
+ # Load test audio and lyrics
26
+ audio_path = f"data/raw/{sample}.mp3"
27
+
28
+ # Load audio
29
+ audio_data, sr = librosa.load(audio_path)
30
+
31
+ print("Running prediction pipeline...")
32
+ prediction = predict_unimodal(audio_data)
33
+
34
+ print(f"\n=== PREDICTION RESULT ===")
35
+ print(f"Prediction: {prediction}")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ sample = "fake_sunshine"
40
+
41
+ predict_unimodal_runner(sample)
src/preprocessing/preprocessor.py CHANGED
@@ -120,6 +120,30 @@ def single_preprocessing(audio, lyric: str):
120
  return processed_song, processed_lyric
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def dataset_read(batch_size=20):
124
  """
125
  Reads the main dataset, splits it into the train/test/valid split, and computes
 
120
  return processed_song, processed_lyric
121
 
122
 
123
+ def single_audio_preprocessing(audio):
124
+ """
125
+ Preprocesses a single record of audio
126
+
127
+ Parameters
128
+ ----------
129
+ audio : audio_object
130
+ Audio object file
131
+
132
+ Returns
133
+ -------
134
+ processed_song : tensor
135
+ Tensor version of the audio
136
+
137
+ """
138
+ # Instantiate preprocessor classes
139
+ audio_preprocessor = AudioPreprocessor(script="predict")
140
+
141
+ # Preprocess both song and lyrics
142
+ processed_song = audio_preprocessor(file=audio)
143
+
144
+ return processed_song
145
+
146
+
147
  def dataset_read(batch_size=20):
148
  """
149
  Reads the main dataset, splits it into the train/test/valid split, and computes
src/utils/dataset.py CHANGED
@@ -288,3 +288,26 @@ def instance_scaler(audio: np.ndarray, lyrics: np.ndarray):
288
  scaled_lyric = lyric_scaler.transform(lyrics)
289
 
290
  return scaled_audio, scaled_lyric
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  scaled_lyric = lyric_scaler.transform(lyrics)
289
 
290
  return scaled_audio, scaled_lyric
291
+
292
+
293
+ def audio_instance_scaler(audio: np.ndarray):
294
+ """
295
+ Method to scale the single input audio
296
+
297
+ Parameters
298
+ ----------
299
+ audio : np.array
300
+ Instance of an audio feature
301
+
302
+ Returns
303
+ -------
304
+ scaled_audio : np.array
305
+ Array of scaled audio feature
306
+ """
307
+
308
+ # Apply scaler to the single inputs
309
+ audio_scaler = joblib.load(AUDIO_SCALER)
310
+
311
+ scaled_audio = audio_scaler.transform(audio)
312
+
313
+ return scaled_audio