Spaces:

krislette
/

bach-or-bot

Sleeping

App Files Files Community

krislette commited on Oct 11, 2025

Commit

7633e2f

1 Parent(s): 75d43d2

Auto-deploy from GitHub: 7bbe4e79d2cd5e035a2fc8cda464b3cd867300d5

Browse files

Files changed (7) hide show

scripts/predict.py +4 -2
src/musiclime/explainer.py +144 -2
src/musiclime/factorization.py +95 -0
src/musiclime/print_utils.py +13 -0
src/musiclime/text_utils.py +78 -0
src/musiclime/wrapper.py +42 -8
src/preprocessing/lyrics_preprocessor.py +14 -62

scripts/predict.py CHANGED Viewed

@@ -5,6 +5,7 @@ from src.llm2vectrain.llm2vec_trainer import l2vec_single_train, load_pca_model
 from src.models.mlp import build_mlp, load_config
 from src.utils.dataset import instance_scaler
 import numpy as np
 import pandas as pd
@@ -47,8 +48,9 @@ def predict_pipeline(audio_file, lyrics):
     # 5.) Reduce the lyrics using saved PCA model
     reduced_lyrics = load_pca_model(lyrics_features)
-    # Scale the vectors using Z-Score again
-    audio_features, reduced_lyrics = instance_scaler(audio_features, reduced_lyrics)
     # 6.) Concatenate the vectors of audio_features + lyrics_features
     results = np.concatenate([audio_features, reduced_lyrics], axis=1)

 from src.models.mlp import build_mlp, load_config
 from src.utils.dataset import instance_scaler
+import joblib
 import numpy as np
 import pandas as pd
     # 5.) Reduce the lyrics using saved PCA model
     reduced_lyrics = load_pca_model(lyrics_features)
+    # 6.) Apply PCA scaler to PCA-reduced lyrics
+    pca_scaler = joblib.load("models/fusion/pca_scaler.pkl")
+    reduced_lyrics = pca_scaler.transform(reduced_lyrics)
     # 6.) Concatenate the vectors of audio_features + lyrics_features
     results = np.concatenate([audio_features, reduced_lyrics], axis=1)

src/musiclime/explainer.py CHANGED Viewed

@@ -15,7 +15,32 @@ from src.musiclime.print_utils import green_bold
 class MusicLIMEExplainer:
     def __init__(self, kernel_width=25, random_state=None):
         self.random_state = check_random_state(random_state)
         def kernel(d, kernel_width):
@@ -33,6 +58,29 @@ class MusicLIMEExplainer:
         labels=(1,),
         temporal_segments=10,
     ):
         # These are for debugging only I have to see THAT progress
         print("[MusicLIME] Starting MusicLIME explanation...")
         print(
@@ -93,6 +141,29 @@ class MusicLIMEExplainer:
         return explanation
     def _generate_neighborhood(self, audio_fact, text_fact, predict_fn, num_samples):
         n_audio = audio_fact.get_number_components()
         n_text = text_fact.num_words()
         total_features = n_audio + n_text
@@ -192,7 +263,48 @@ class MusicLIMEExplainer:
 class MusicLIMEExplanation:
     def __init__(self, audio_factorization, text_factorization, data, predictions):
         self.audio_factorization = audio_factorization
         self.text_factorization = text_factorization
         self.data = data
@@ -203,7 +315,21 @@ class MusicLIMEExplanation:
         self.local_pred = {}
     def get_explanation(self, label, num_features=10):
-        """Get top features for explanation"""
         if label not in self.local_exp:
             return []
@@ -231,7 +357,23 @@ class MusicLIMEExplanation:
         return explanations
     def save_to_json(self, filepath, song_info=None, num_features=10):
-        """Save explanation results to JSON file"""
         results_dir = Path("results")
         results_dir.mkdir(exist_ok=True)

 class MusicLIMEExplainer:
+    """
+    LIME-based explainer for multimodal music classification models.
+    Generates local explanations for AI vs Human music classification by
+    perturbing audio (source separation) and lyrics (line removal) components
+    and analyzing their impact on model predictions.
+    Attributes
+    ----------
+    random_state : RandomState
+        Random number generator for reproducible perturbations
+    base : LimeBase
+        Core LIME explanation engine with exponential kernel
+    """
     def __init__(self, kernel_width=25, random_state=None):
+        """
+        Initialize MusicLIME explainer with kernel parameters.
+        Parameters
+        ----------
+        kernel_width : int, default=25
+            Width parameter for the exponential kernel function
+        random_state : int or RandomState, optional
+            Random seed for reproducible perturbations
+        """
         self.random_state = check_random_state(random_state)
         def kernel(d, kernel_width):
         labels=(1,),
         temporal_segments=10,
     ):
+        """
+        Generate LIME explanations for a music instance using audio and lyrics.
+        Parameters
+        ----------
+        audio : array-like
+            Raw audio waveform data
+        lyrics : str
+            Song lyrics as text string
+        predict_fn : callable
+            Prediction function that takes (texts, audios) and returns probabilities (wrapper)
+        num_samples : int, default=1000
+            Number of perturbed samples to generate for LIME
+        labels : tuple, default=(1,)
+            Target labels to explain (0=AI-Generated, 1=Human-Composed)
+        temporal_segments : int, default=10
+            Number of temporal segments for audio factorization
+        Returns
+        -------
+        MusicLIMEExplanation
+            Explanation object containing feature importance weights
+        """
         # These are for debugging only I have to see THAT progress
         print("[MusicLIME] Starting MusicLIME explanation...")
         print(
         return explanation
     def _generate_neighborhood(self, audio_fact, text_fact, predict_fn, num_samples):
+        """
+        Generate perturbed samples and predictions for LIME explanation.
+        Parameters
+        ----------
+        audio_fact : OpenUnmixFactorization
+            Audio factorization object for source separation
+        text_fact : LineIndexedString
+            Text factorization object for line-based perturbations
+        predict_fn : callable
+            Model prediction function
+        num_samples : int
+            Number of perturbations to generate
+        Returns
+        -------
+        data : ndarray
+            Binary perturbation masks (num_samples, total_features)
+        predictions : ndarray
+            Model predictions for perturbed instances
+        distances : ndarray
+            Cosine distances from original instance
+        """
         n_audio = audio_fact.get_number_components()
         n_text = text_fact.num_words()
         total_features = n_audio + n_text
 class MusicLIMEExplanation:
+    """
+    Container for MusicLIME explanation results and analysis methods.
+    Stores factorizations, perturbation data, and LIME-fitted explanations
+    for a single music instance. Provides methods to extract top features
+    and export results to structured formats.
+    Attributes
+    ----------
+    audio_factorization : OpenUnmixFactorization
+        Audio source separation components
+    text_factorization : LineIndexedString
+        Lyrics line segmentation components
+    data : ndarray
+        Binary perturbation masks used for explanation
+    predictions : ndarray
+        Model predictions for all perturbations
+    intercept : dict
+        LIME model intercepts by label
+    local_exp : dict
+        Feature importance weights by label
+    score : dict
+        LIME model R² scores by label
+    local_pred : dict
+        Local model predictions by label
+    """
     def __init__(self, audio_factorization, text_factorization, data, predictions):
+        """
+        Initialize explanation object with factorizations and prediction data.
+        Parameters
+        ----------
+        audio_factorization : OpenUnmixFactorization
+            Audio source separation components
+        text_factorization : LineIndexedString
+            Text line segmentation components
+        data : ndarray
+            Binary perturbation masks used for explanation
+        predictions : ndarray
+            Model predictions for all perturbations
+        """
         self.audio_factorization = audio_factorization
         self.text_factorization = text_factorization
         self.data = data
         self.local_pred = {}
     def get_explanation(self, label, num_features=10):
+        """
+        Extract top feature explanations for a specific label.
+        Parameters
+        ----------
+        label : int
+            Target label to explain (0=AI-Generated, 1=Human-Composed)
+        num_features : int, default=10
+            Number of top features to return
+        Returns
+        -------
+        list of dict
+            Feature explanations with type, feature description, and weight
+        """
         if label not in self.local_exp:
             return []
         return explanations
     def save_to_json(self, filepath, song_info=None, num_features=10):
+        """
+        Save explanation results to structured JSON file.
+        Parameters
+        ----------
+        filepath : str
+            Output filename for JSON results
+        song_info : dict, optional
+            Additional metadata about the song
+        num_features : int, default=10
+            Number of top features to include in output
+        Returns
+        -------
+        Path
+            Path to the saved JSON file
+        """
         results_dir = Path("results")
         results_dir.mkdir(exist_ok=True)

src/musiclime/factorization.py CHANGED Viewed

@@ -6,7 +6,42 @@ from src.musiclime.print_utils import green_bold
 class OpenUnmixFactorization:
     def __init__(self, audio, temporal_segmentation_params=10, composition_fn=None):
         print("[MusicLIME] Initializing OpenUnmix factorization...")
         self.audio = audio
         self.target_sr = 44100
@@ -49,6 +84,21 @@ class OpenUnmixFactorization:
         )
     def _compute_segments(self, signal, n_segments):
         audio_length = len(signal)
         samples_per_segment = audio_length // n_segments
@@ -60,6 +110,16 @@ class OpenUnmixFactorization:
         return segments
     def _separate_sources(self):
         waveform = np.expand_dims(self.audio, axis=1)
         # Load openunmix .pth files from local dir
@@ -81,6 +141,12 @@ class OpenUnmixFactorization:
         return components, names
     def _prepare_temporal_components(self):
         # Create temporal-source combinations
         self.components = []
         self.final_component_names = []
@@ -93,12 +159,41 @@ class OpenUnmixFactorization:
                 self.final_component_names.append(f"{self.component_names[c]}_T{s}")
     def get_number_components(self):
         return len(self.components)
     def get_ordered_component_names(self):
         return self.final_component_names
     def compose_model_input(self, component_indices):
         if len(component_indices) == 0:
             return np.zeros_like(self.audio)

 class OpenUnmixFactorization:
+    """
+    Audio factorization using OpenUnmix source separation with temporal segmentation.
+    Decomposes audio into interpretable components by separating sources
+    (vocals, bass, drums, other) and segmenting each across time windows.
+    Creates temporal-source combinations for fine-grained audio explanations.
+    Attributes
+    ----------
+    audio : ndarray
+        Original audio waveform
+    temporal_segments : list of tuple
+        Time window boundaries for segmentation
+    original_components : list of ndarray
+        Raw separated audio sources
+    component_names : list of str
+        Names of separated sources
+    components : list of ndarray
+        Final temporal-source component combinations
+    final_component_names : list of str
+        Names of temporal-source combinations
+    """
     def __init__(self, audio, temporal_segmentation_params=10, composition_fn=None):
+        """
+        Initialize audio factorization using OpenUnmix source separation with temporal segmentation.
+        Parameters
+        ----------
+        audio : array-like
+            Raw audio waveform data at 44.1kHz sample rate
+        temporal_segmentation_params : int, default=10
+            Number of temporal segments to divide the audio into
+        composition_fn : callable, optional
+            Custom function for composing separated sources (unused for now)
+        """
         print("[MusicLIME] Initializing OpenUnmix factorization...")
         self.audio = audio
         self.target_sr = 44100
         )
     def _compute_segments(self, signal, n_segments):
+        """
+        Divide audio signal into equal temporal segments for factorization.
+        Parameters
+        ----------
+        signal : array-like
+            Input audio waveform
+        n_segments : int
+            Number of temporal segments to create
+        Returns
+        -------
+        list of tuple
+            List of (start, end) sample indices for each segment
+        """
         audio_length = len(signal)
         samples_per_segment = audio_length // n_segments
         return segments
     def _separate_sources(self):
+        """
+        Perform source separation using OpenUnmix to extract instrument components.
+        Returns
+        -------
+        components : list of ndarray
+            Separated audio sources (vocals, bass, drums, other)
+        names : list of str
+            Names of the separated source components
+        """
         waveform = np.expand_dims(self.audio, axis=1)
         # Load openunmix .pth files from local dir
         return components, names
     def _prepare_temporal_components(self):
+        """
+        Create temporal-source combinations by applying each source to each time segment.
+        Creates components like 'vocals_T0', 'drums_T5' representing specific
+        instruments active only in specific temporal windows.
+        """
         # Create temporal-source combinations
         self.components = []
         self.final_component_names = []
                 self.final_component_names.append(f"{self.component_names[c]}_T{s}")
     def get_number_components(self):
+        """
+        Get total number of factorized components (sources x temporal segments).
+        Returns
+        -------
+        int
+            Total number of temporal-source component combinations
+        """
         return len(self.components)
     def get_ordered_component_names(self):
+        """
+        Get ordered list of component names for explanation display.
+        Returns
+        -------
+        list of str
+            Component names in format '{source}_T{segment}' (e.g., 'vocals_T3')
+        """
         return self.final_component_names
     def compose_model_input(self, component_indices):
+        """
+        Reconstruct audio by summing selected temporal-source components.
+        Parameters
+        ----------
+        component_indices : array-like
+            Indices of components to include in reconstruction
+        Returns
+        -------
+        ndarray
+            Reconstructed audio waveform from selected components
+        """
         if len(component_indices) == 0:
             return np.zeros_like(self.audio)

src/musiclime/print_utils.py CHANGED Viewed

@@ -1,2 +1,15 @@
 def green_bold(text):
     return f"\033[1;32m{text}\033[0m"

 def green_bold(text):
+    """
+    Format text with green bold ANSI color codes for terminal output.
+    Parameters
+    ----------
+    text : str
+        Text string to format
+    Returns
+    -------
+    str
+        Text wrapped with ANSI escape codes for green bold formatting
+    """
     return f"\033[1;32m{text}\033[0m"

src/musiclime/text_utils.py CHANGED Viewed

@@ -4,7 +4,38 @@ from lime.lime_text import IndexedString
 class LineIndexedString(IndexedString):
     def __init__(self, raw_string, bow=True, mask_string=None):
         self.raw = raw_string
         self.mask_string = mask_string
         self.bow = bow
@@ -18,6 +49,19 @@ class LineIndexedString(IndexedString):
         self.string_start = [0] * len(self.as_list)
     def _split_by_lines(self, text):
         lines = text.split("\n")
         processed_lines = []
@@ -31,6 +75,19 @@ class LineIndexedString(IndexedString):
         return processed_lines
     def inverse_removing(self, words_to_remove):
         # Keep lines not in words_to_remove
         kept_lines = [
             self.as_list[i]
@@ -40,7 +97,28 @@ class LineIndexedString(IndexedString):
         return "\n".join(kept_lines)
     def num_words(self):
         return len(self.as_list)
     def word(self, id_):
         return self.as_list[id_]

 class LineIndexedString(IndexedString):
+    """
+    Line-based text indexing for lyrics perturbation in MusicLIME.
+    Extends LIME's IndexedString to work with lyrics lines instead of words,
+    to enable more meaningful perturbations for song lyrics. Filters out
+    metadata and focuses on actual lyrical content.
+    Attributes
+    ----------
+    raw : str
+        Original raw lyrics text
+    as_list : list of str
+        Processed lyrics lines without metadata
+    as_np : ndarray
+        NumPy array of lyrics lines
+    positions : list of int
+        Line position indices for LIME compatibility
+    """
     def __init__(self, raw_string, bow=True, mask_string=None):
+        """
+        Initialize line-based text indexing for lyrics perturbation in MusicLIME.
+        Parameters
+        ----------
+        raw_string : str
+            Raw lyrics text to be processed
+        bow : bool, default=True
+            Bag-of-words flag (maintained for LIME compatibility)
+        mask_string : str, optional
+            String to use for masking removed lines
+        """
         self.raw = raw_string
         self.mask_string = mask_string
         self.bow = bow
         self.string_start = [0] * len(self.as_list)
     def _split_by_lines(self, text):
+        """
+        Split lyrics text into meaningful lines, filtering out metadata.
+        Parameters
+        ----------
+        text : str
+            Raw lyrics text with potential metadata
+        Returns
+        -------
+        list of str
+            Processed lyrics lines with metadata removed
+        """
         lines = text.split("\n")
         processed_lines = []
         return processed_lines
     def inverse_removing(self, words_to_remove):
+        """
+        Reconstruct lyrics text by removing specified line indices.
+        Parameters
+        ----------
+        words_to_remove : array-like
+            Indices of lyrics lines to remove from reconstruction
+        Returns
+        -------
+        str
+            Reconstructed lyrics text with specified lines removed
+        """
         # Keep lines not in words_to_remove
         kept_lines = [
             self.as_list[i]
         return "\n".join(kept_lines)
     def num_words(self):
+        """
+        Get total number of lyrics lines (called 'words' for LIME compatibility).
+        Returns
+        -------
+        int
+            Number of lyrics lines available for perturbation
+        """
         return len(self.as_list)
     def word(self, id_):
+        """
+        Get lyrics line content by index.
+        Parameters
+        ----------
+        id_ : int
+            Index of the lyrics line to retrieve
+        Returns
+        -------
+        str
+            Content of the specified lyrics line
+        """
         return self.as_list[id_]

src/musiclime/wrapper.py CHANGED Viewed

@@ -11,7 +11,31 @@ from src.musiclime.print_utils import green_bold
 class MusicLIMEPredictor:
     def __init__(self):
         print("[MusicLIME] Loading models for MusicLIME...")
         self.llm2vec_model = load_llm2vec_model()
         config = load_config("config/model_config.yml")
@@ -20,14 +44,24 @@ class MusicLIMEPredictor:
     def __call__(self, texts, audios):
         """
-        Predict function for MusicLIME
-        Args:
-            texts: List of lyric strings
-            audios: Array of audio waveforms
-        Returns:
-            Array of prediction probabilities
         """
         print(f"[MusicLIME] Processing {len(texts)} samples with batch functions...")

 class MusicLIMEPredictor:
+    """
+    Batch prediction wrapper for MusicLIME explanations.
+    Integrates the complete Bach or Bot pipeline (SpecTTTra + LLM2Vec + MLP)
+    into a single callable for LIME perturbation processing. Optimized for
+    batch processing of multiple perturbed audio-lyrics pairs with detailed
+    timing analysis.
+    Attributes
+    ----------
+    llm2vec_model : LLM2Vec
+        Pre-loaded LLM2Vec model for lyrics feature extraction
+    classifier : MLPClassifier
+        Lazy-loaded MLP classifier for final predictions
+    config : dict
+        Model configuration parameters
+    """
     def __init__(self):
+        """
+        Initialize MusicLIME prediction wrapper with pre-trained models.
+        Loads LLM2Vec model and MLP configuration for batch processing
+        of perturbed audio-lyrics pairs during LIME explanation.
+        """
         print("[MusicLIME] Loading models for MusicLIME...")
         self.llm2vec_model = load_llm2vec_model()
         config = load_config("config/model_config.yml")
     def __call__(self, texts, audios):
         """
+        Batch prediction function for MusicLIME perturbations.
+        Processes multiple perturbed audio-lyrics pairs through the complete
+        pipeline: preprocessing -> feature extraction -> scaling -> MLP prediction.
+        Optimized for batch processing of LIME perturbations.
+        Parameters
+        ----------
+        texts : list of str
+            List of perturbed lyrics strings from LIME
+        audios : list of array-like
+            List of perturbed audio waveforms from LIME
+        Returns
+        -------
+        ndarray
+            Prediction probabilities in format [[P(AI), P(Human)], ...]
+            for each input pair, shape (n_samples, 2)
         """
         print(f"[MusicLIME] Processing {len(texts)} samples with batch functions...")

src/preprocessing/lyrics_preprocessor.py CHANGED Viewed

@@ -1,19 +1,19 @@
 import re
 class LyricsPreprocessor:
     """
-    A preprocessing class for cleaning and preparing song lyrics
     for LLM2Vec.
     Parameters
     ----------
     keep_case : bool, optional (default=True)
         If False, converts all lyrics to lowercase.
     keep_punctuation : bool, optional (default=True)
         If False, removes all punctuation from lyrics.
     Usage
     -----
     >>> preprocessor = LyricsPreprocessor(keep_case=False, keep_punctuation=False)
@@ -21,9 +21,10 @@ class LyricsPreprocessor:
     >>> print(processed)
     "Hello, world! Sing along"
     """
     def __init__(self, keep_case=True, keep_punctuation=True):
         self.keep_case = keep_case
-        self.keep_punctuation= keep_punctuation
     def __call__(self, lyrics: str):
         """
@@ -42,83 +43,34 @@ class LyricsPreprocessor:
         Returns
         -------
         str
         a cleaned lyric string
         """
         lyrics_cleaned = ""
         # Split lyrics by lines
-        lyric_array = lyrics.split('\n')
         for line in lyric_array:
             line = line.strip()
             # Skip unimportant lines like [Chorus] or (Verse)
-            if not line or re.match(r'^\[.*\]$', line) or re.match(r'^\(.*\)$', line):
                 continue
             # Case handling
             if not self.keep_case:
                 line = line.lower()
             # Punctuation handling
             if not self.keep_punctuation:
-                line = re.sub(r'[^\w\s]', '', line)
             # Normalize to lowercase and split into words
             words = line.split()
-            lyrics_cleaned += ' '.join(words) + ' '
         lyrics_cleaned = lyrics_cleaned.strip()
         return lyrics_cleaned
-    def musiclime_lyrics_extractor(self, lyrics: str):
-        """
-        Preprocess the input lyrics text.
-        Steps:
-        1. Removes empty lines or lines with metadata (e.g., [Chorus], (Verse)).
-        2. Applies case handling and punctuation removal based on settings.
-        3. Segments the lyrics into multiple lines.
-        3. Builds a list of lines from the lyrics
-        Parameters
-        ----------
-        lyrics : str
-            Raw lyrics text.
-        Returns
-        -------
-        line_segmented_lyrics : list
-            List of lines from the lyrics, processed using the class.
-        """
-        # Instantiate line lyrics list
-        line_segmented_lyrics = []
-        # Split lyrics by lines
-        lyric_array = lyrics.split('\n')
-        for line in lyric_array:
-            line = line.strip()
-            # Skip unimportant lines like [Chorus] or (Verse)
-            if not line or re.match(r'^\[.*\]$', line) or re.match(r'^\(.*\)$', line):
-                continue
-            # Case handling
-            if not self.keep_case:
-                line = line.lower()
-            # Punctuation handling
-            if not self.keep_punctuation:
-                line = re.sub(r'[^\w\s]', '', line)
-            # Append line to line segmented lyrics list
-            line_segmented_lyrics.append(line)
-        return line_segmented_lyrics

 import re
 class LyricsPreprocessor:
     """
+    A preprocessing class for cleaning and preparing song lyrics
     for LLM2Vec.
     Parameters
     ----------
     keep_case : bool, optional (default=True)
         If False, converts all lyrics to lowercase.
     keep_punctuation : bool, optional (default=True)
         If False, removes all punctuation from lyrics.
     Usage
     -----
     >>> preprocessor = LyricsPreprocessor(keep_case=False, keep_punctuation=False)
     >>> print(processed)
     "Hello, world! Sing along"
     """
     def __init__(self, keep_case=True, keep_punctuation=True):
         self.keep_case = keep_case
+        self.keep_punctuation = keep_punctuation
     def __call__(self, lyrics: str):
         """
         Returns
         -------
         str
         a cleaned lyric string
         """
         lyrics_cleaned = ""
         # Split lyrics by lines
+        lyric_array = lyrics.split("\n")
         for line in lyric_array:
             line = line.strip()
             # Skip unimportant lines like [Chorus] or (Verse)
+            if not line or re.match(r"^\[.*\]$", line) or re.match(r"^\(.*\)$", line):
                 continue
             # Case handling
             if not self.keep_case:
                 line = line.lower()
             # Punctuation handling
             if not self.keep_punctuation:
+                line = re.sub(r"[^\w\s]", "", line)
             # Normalize to lowercase and split into words
             words = line.split()
+            lyrics_cleaned += " ".join(words) + " "
         lyrics_cleaned = lyrics_cleaned.strip()
         return lyrics_cleaned