Spaces:

krislette
/

bach-or-bot

Sleeping

App Files Files Community

krislette commited on Oct 21, 2025

Commit

c84f2c4

1 Parent(s): c51ad28

Auto-deploy from GitHub: cb4a769f21149a39309d7602af027f4cc33f773b

Browse files

Files changed (5) hide show

app/server.py +2 -2
scripts/predict.py +55 -5
scripts/predict_runner.py +35 -13
src/preprocessing/preprocessor.py +24 -0
src/utils/dataset.py +23 -0

app/server.py CHANGED Viewed

@@ -14,7 +14,7 @@ from app.utils import load_server_config, load_model_config, download_youtube_au
 # Model/XAI-related imports
 from scripts.explain import musiclime
-from scripts.predict import predict_pipeline
 # Other imports
 import io
@@ -148,7 +148,7 @@ async def predict_music(
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
         # Call MLP predict runner script
-        results = predict_pipeline(audio_data, lyrics)
         return PredictionResponse(
             status="success",

 # Model/XAI-related imports
 from scripts.explain import musiclime
+from scripts.predict import predict_multimodal
 # Other imports
 import io
             raise HTTPException(status_code=400, detail=f"Invalid audio file: {str(e)}")
         # Call MLP predict runner script
+        results = predict_multimodal(audio_data, lyrics)
         return PredictionResponse(
             status="success",

scripts/predict.py CHANGED Viewed

@@ -1,16 +1,15 @@
-from src.preprocessing.preprocessor import single_preprocessing
 from src.spectttra.spectttra_trainer import spectttra_predict
 from src.llm2vectrain.model import load_llm2vec_model
 from src.llm2vectrain.llm2vec_trainer import l2vec_single_train, load_pca_model
 from src.models.mlp import build_mlp, load_config
-from src.utils.dataset import instance_scaler
-import joblib
 import numpy as np
 import pandas as pd
-def predict_pipeline(audio_file, lyrics):
     """
     Predict script which includes preprocessing, feature extraction, and
     training the MLP model for a single data sample.
@@ -73,6 +72,57 @@ def predict_pipeline(audio_file, lyrics):
     }
 if __name__ == "__main__":
     # Example usage (replace with real inputs, place song inside data/raw.)
     data = pd.read_csv("data/raw/predict_data_final.csv")
@@ -80,7 +130,7 @@ if __name__ == "__main__":
     result = []
     label = []
     for row in data.itertuples():
-        prediction = predict_pipeline(row.song, row.lyrics)
         result.append(
             {
                 "song": row.song,

+from src.preprocessing.preprocessor import single_preprocessing, single_audio_preprocessing
 from src.spectttra.spectttra_trainer import spectttra_predict
 from src.llm2vectrain.model import load_llm2vec_model
 from src.llm2vectrain.llm2vec_trainer import l2vec_single_train, load_pca_model
 from src.models.mlp import build_mlp, load_config
+from src.utils.dataset import instance_scaler, audio_instance_scaler
 import numpy as np
 import pandas as pd
+def predict_multimodal(audio_file, lyrics):
     """
     Predict script which includes preprocessing, feature extraction, and
     training the MLP model for a single data sample.
     }
+def predict_unimodal(audio_file):
+    """
+    Predict script of AUDIO only which includes preprocessing, feature extraction, and
+    training the MLP model for a single data sample.
+    Parameters
+    ----------
+    audio : audio_object
+        Audio object file
+    Returns
+    -------
+    prediction : str
+        A string result of the prediction
+    label : int
+        A numerical representation of the prediction
+    """
+    # 1.) Preprocess the audio
+    audio = single_audio_preprocessing(audio_file)
+    # 2.) Call the inference method from SpecTTTra
+    audio_features = spectttra_predict(audio)
+    audio_features = audio_features.reshape(1, -1)
+    # 4.) Scale the vector using Z-Score
+    audio_features = audio_instance_scaler(audio_features)
+    # 5.) Load MLP Classifier
+    config = load_config("config/model_config.yml")
+    classifier = build_mlp(input_dim=audio_features.shape[1], config=config)
+    # 6.) Load trained weights
+    model_path = "models/spectttra/mlp_best.pth"
+    classifier.load_model(model_path)
+    classifier.model.eval()
+    # 8.) Run prediction
+    confidence, prediction, label, probability = classifier.predict_single(
+        audio_features.flatten()
+    )
+    return {
+        "confidence": confidence,
+        "prediction": prediction,
+        "label": label,
+        "probability": probability,
+    }
 if __name__ == "__main__":
     # Example usage (replace with real inputs, place song inside data/raw.)
     data = pd.read_csv("data/raw/predict_data_final.csv")
     result = []
     label = []
     for row in data.itertuples():
+        prediction = predict_multimodal(row.song, row.lyrics)
         result.append(
             {
                 "song": row.song,

scripts/predict_runner.py CHANGED Viewed

@@ -1,19 +1,41 @@
 import librosa
-from scripts.predict import predict_pipeline
-# Load test audio and lyrics
-audio_path = "data/external/sample_1.mp3"
-lyrics_path = "data/external/sample_1.txt"
-# Load audio
-audio_data, sr = librosa.load(audio_path)
-# Load lyrics
-with open(lyrics_path, "r", encoding="utf-8") as f:
-    lyrics_text = f.read()
-print("Running prediction pipeline...")
-prediction = predict_pipeline(audio_data, lyrics_text)
-print(f"\n=== PREDICTION RESULT ===")
-print(f"Prediction: {prediction}")

 import librosa
+from scripts.predict import predict_multimodal, predict_unimodal
+def predict_multimodal_runner(sample: str):
+    # Load test audio and lyrics
+    audio_path = f"data/external/{sample}.mp3"
+    lyrics_path = f"data/external/{sample}.txt"
+    # Load audio
+    audio_data, sr = librosa.load(audio_path)
+    # Load lyrics
+    with open(lyrics_path, "r", encoding="utf-8") as f:
+        lyrics_text = f.read()
+    print("Running prediction pipeline...")
+    prediction = predict_multimodal(audio_data, lyrics_text)
+    print(f"\n=== PREDICTION RESULT ===")
+    print(f"Prediction: {prediction}")
+def predict_unimodal_runner(sample: str):
+    # Load test audio and lyrics
+    audio_path = f"data/raw/{sample}.mp3"
+    # Load audio
+    audio_data, sr = librosa.load(audio_path)
+    print("Running prediction pipeline...")
+    prediction = predict_unimodal(audio_data)
+    print(f"\n=== PREDICTION RESULT ===")
+    print(f"Prediction: {prediction}")
+if __name__ == "__main__":
+    sample = "fake_sunshine"
+    predict_unimodal_runner(sample)

src/preprocessing/preprocessor.py CHANGED Viewed

@@ -120,6 +120,30 @@ def single_preprocessing(audio, lyric: str):
     return processed_song, processed_lyric
 def dataset_read(batch_size=20):
     """
     Reads the main dataset, splits it into the train/test/valid split, and computes

     return processed_song, processed_lyric
+def single_audio_preprocessing(audio):
+    """
+    Preprocesses a single record of audio
+    Parameters
+    ----------
+    audio : audio_object
+        Audio object file
+    Returns
+    -------
+    processed_song : tensor
+        Tensor version of the audio
+    """
+    # Instantiate preprocessor classes
+    audio_preprocessor = AudioPreprocessor(script="predict")
+    # Preprocess both song and lyrics
+    processed_song = audio_preprocessor(file=audio)
+    return processed_song
 def dataset_read(batch_size=20):
     """
     Reads the main dataset, splits it into the train/test/valid split, and computes

src/utils/dataset.py CHANGED Viewed

@@ -288,3 +288,26 @@ def instance_scaler(audio: np.ndarray, lyrics: np.ndarray):
     scaled_lyric = lyric_scaler.transform(lyrics)
     return scaled_audio, scaled_lyric

     scaled_lyric = lyric_scaler.transform(lyrics)
     return scaled_audio, scaled_lyric
+def audio_instance_scaler(audio: np.ndarray):
+    """
+    Method to scale the single input audio
+    Parameters
+    ----------
+    audio : np.array
+        Instance of an audio feature
+    Returns
+    -------
+    scaled_audio : np.array
+        Array of scaled audio feature
+    """
+    # Apply scaler to the single inputs
+    audio_scaler = joblib.load(AUDIO_SCALER)
+    scaled_audio = audio_scaler.transform(audio)
+    return scaled_audio