tlmk22
/

QuefrencyGuardian

@@ -1,3 +1,5 @@
 import torchaudio
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
@@ -6,17 +8,10 @@ from fast_model import FastModelHuggingFace
 repo_id = "tlmk22/QuefrencyGuardian"
 fast_model = FastModelHuggingFace.from_pretrained(repo_id)
-# Example: predicting on a single WAV file
-wav_path = "wave_example/chainsaw.wav"
-waveform, sampling_rate = torchaudio.load(wav_path)  # Charger le fichier audio
-if sampling_rate != 12000:
-    resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=12000)
-    waveform = resampler(waveform)
 # Perform predictions for a single WAV file
 map_labels = {0: "chainsaw", 1: "environment"}
-wav_prediction = fast_model.predict(waveform)
-print(f"Prediction : {map_labels[wav_prediction]}")
 # Example: predicting on a Hugging Face dataset
 dataset = load_dataset("rfcx/frugalai")

+from pathlib import Path
 import torchaudio
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 repo_id = "tlmk22/QuefrencyGuardian"
 fast_model = FastModelHuggingFace.from_pretrained(repo_id)
 # Perform predictions for a single WAV file
 map_labels = {0: "chainsaw", 1: "environment"}
+wav_prediction = fast_model.predict("wav_example/chainsaw.wav", device="cpu")
+print(f"Prediction : {map_labels[wav_prediction[0]]}")
 # Example: predicting on a Hugging Face dataset
 dataset = load_dataset("rfcx/frugalai")

fast_model.py CHANGED Viewed

@@ -117,12 +117,9 @@ class FastModel:
             If the dataset is empty or invalid.
         """
         features, labels = [], []
-        for audio, label in batch_audio_loader(
             dataset,
-            waveform_duration=self.audio_processing_params["duration"],
             batch_size=batch_size,
-            padding_method=self.audio_processing_params["padding_method"],
-            device=self.device,
         ):
             feature = self.get_features(audio)
             features.append(feature)
@@ -157,12 +154,9 @@ class FastModel:
         if not self.model:
             raise NotFittedError("LGBM model is not fitted yet.")
         features = []
-        for audio, _ in batch_audio_loader(
             dataset,
-            waveform_duration=self.audio_processing_params["duration"],
             batch_size=batch_size,
-            padding_method=self.audio_processing_params["padding_method"],
-            device=self.device,
         ):
             feature = self.get_features(audio)
             features.append(feature)
@@ -207,115 +201,118 @@ class FastModel:
             dim=1,
         )
-def batch_audio_loader(
-    dataset: Dataset,
-    waveform_duration: int = 3,
-    batch_size: int = 1,
-    sr: int = 12000,
-    device: Literal["cpu", "cuda"] = "cpu",
-    padding_method: None | Literal["zero", "reflect", "replicate", "circular"] = None,
-    offset: int = 0,
-):
-    """Optimized loader for audio data from a dataset for training or inference in batches.
-    Parameters
-    ----------
-    dataset : Dataset
-        The dataset containing audio samples and labels.
-    waveform_duration : int, optional
-        Desired duration of the audio waveforms in seconds (default is 3).
-    batch_size : int, optional
-        Number of audio samples per batch (default is 1).
-    sr : int, optional
-        Target sampling rate for audio processing (default is 12000).
-    device : str, optional
-        Device for processing ("cpu" or "cuda") (default is "cpu").
-    padding_method : str, optional
-        Method to pad audio waveforms smaller than the desired size (e.g., "zero", "reflect").
-    offset : int, optional
-        Number of samples to skip before processing the first audio sample (default is 0).
-    Yields
-    ------
-    tuple (Tensor, Tensor)
-        A tuple (batch_audios, batch_labels), where:
-        - batch_audios is a torch.tensor of processed audio waveforms.
-        - batch_labels is a torch.tensor of corresponding audio labels.
-    Raises
-    ------
-    ValueError
-        If an unsupported sampling rate is encountered in the dataset.
-    """
-    def process_resampling(resample_buffer, resample_indices, batch_audios, sr, target_sr):
-        if resample_buffer:
-            resampler = torchaudio.transforms.Resample(
-                orig_freq=sr, new_freq=target_sr, lowpass_filter_width=6
-            )
-            resampled = resampler(torch.stack(resample_buffer))
-            for idx, original_idx in enumerate(resample_indices):
-                batch_audios[original_idx] = resampled[idx]
-    device = torch.device("cuda" if device == "cuda" and torch.cuda.is_available() else "cpu")
-    batch_audios, batch_labels = [], []
-    resample_24000, resample_24000_indices = [], []
-    for i in range(len(dataset)):
-        pa_subtable = query_table(dataset._data, i, indices=dataset._indices)
-        wav_bytes = pa_subtable[0][0][0].as_py()
-        sampling_rate = struct.unpack("<I", wav_bytes[24:28])[0]
-        if sampling_rate not in [sr, sr * 2]:
-            raise ValueError(
-                f"Unsupported sampling rate: {sampling_rate}Hz. Only {sr}Hz and {sr * 2}Hz are allowed."
-            )
-        data_size = struct.unpack("<I", wav_bytes[40:44])[0] // 2
-        if data_size == 0:
-            batch_audios.append(torch.zeros(int(waveform_duration * SR)))
-        else:
-            try:
-                waveform = (
-                    torch.frombuffer(wav_bytes[44:], dtype=torch.int16, offset=offset)[
-                        : int(waveform_duration * sampling_rate)
-                    ].float()
-                    / 32767
                 )
-            except Exception as e:
-                continue  # May append during fit for small audios. offset is set to 0 during predict.
-            waveform = apply_padding(
-                waveform, int(waveform_duration * sampling_rate), padding_method
-            )
-            if sampling_rate == sr:
-                batch_audios.append(waveform)
-            elif sampling_rate == 2 * sr:
-                resample_24000.append(waveform)
-                resample_24000_indices.append(len(batch_audios))
-                batch_audios.append(None)
-        batch_labels.append(pa_subtable[1][0].as_py())
-        if len(batch_audios) == batch_size:
-            # Perform resampling once and take advantage of Torch's vectorization capabilities.
-            process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)
-            batch_audios_on_device = torch.stack(batch_audios).to(device)
-            batch_labels_on_device = torch.tensor(batch_labels).to(device)
-            yield batch_audios_on_device, batch_labels_on_device
-            batch_audios, batch_labels = [], []
-            resample_24000, resample_24000_indices = [], []
-    if batch_audios:
-        process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)
-        batch_audios_on_device = torch.stack(batch_audios).to(device)
-        batch_labels_on_device = torch.tensor(batch_labels).to(device)
-        yield batch_audios_on_device, batch_labels_on_device
 def apply_padding(

             If the dataset is empty or invalid.
         """
         features, labels = [], []
+        for audio, label in self.batch_audio_loader(
             dataset,
             batch_size=batch_size,
         ):
             feature = self.get_features(audio)
             features.append(feature)
         if not self.model:
             raise NotFittedError("LGBM model is not fitted yet.")
         features = []
+        for audio, _ in self.batch_audio_loader(
             dataset,
             batch_size=batch_size,
         ):
             feature = self.get_features(audio)
             features.append(feature)
             dim=1,
         )
+    def batch_audio_loader(
+        self,
+        dataset: Dataset,
+        batch_size: int = 1,
+        offset: int = 0,
+    ):
+        """Optimized loader for audio data from a dataset for training or inference in batches.
+        Parameters
+        ----------
+        dataset : Dataset
+            The dataset containing audio samples and labels.
+        waveform_duration : int, optional
+            Desired duration of the audio waveforms in seconds (default is 3).
+        batch_size : int, optional
+            Number of audio samples per batch (default is 1).
+        sr : int, optional
+            Target sampling rate for audio processing (default is 12000).
+        device : str, optional
+            Device for processing ("cpu" or "cuda") (default is "cpu").
+        padding_method : str, optional
+            Method to pad audio waveforms smaller than the desired size (e.g., "zero", "reflect").
+        offset : int, optional
+            Number of samples to skip before processing the first audio sample (default is 0).
+        Yields
+        ------
+        tuple (Tensor, Tensor)
+            A tuple (batch_audios, batch_labels), where:
+            - batch_audios is a torch.tensor of processed audio waveforms.
+            - batch_labels is a torch.tensor of corresponding audio labels.
+        Raises
+        ------
+        ValueError
+            If an unsupported sampling rate is encountered in the dataset.
+        """
+        def process_resampling(resample_buffer, resample_indices, batch_audios, sr, target_sr):
+            if resample_buffer:
+                resampler = torchaudio.transforms.Resample(
+                    orig_freq=sr, new_freq=target_sr, lowpass_filter_width=6
+                )
+                resampled = resampler(torch.stack(resample_buffer))
+                for idx, original_idx in enumerate(resample_indices):
+                    batch_audios[original_idx] = resampled[idx]
+        # For readability
+        sr = self.audio_processing_params["sample_rate"]
+        waveform_duration = self.audio_processing_params["duration"]
+        padding_method = self.audio_processing_params["padding_method"]
+        device = torch.device(
+            "cuda" if self.device == "cuda" and torch.cuda.is_available() else "cpu"
+        )
+        batch_audios, batch_labels = [], []
+        resample_24000, resample_24000_indices = [], []
+        for i in range(len(dataset)):
+            pa_subtable = query_table(dataset._data, i, indices=dataset._indices)
+            wav_bytes = pa_subtable[0][0][0].as_py()
+            sampling_rate = struct.unpack("<I", wav_bytes[24:28])[0]
+            if sampling_rate not in [sr, sr * 2]:
+                raise ValueError(
+                    f"Unsupported sampling rate: {sampling_rate}Hz. Only {sr}Hz and {sr * 2}Hz are allowed."
                 )
+            data_size = struct.unpack("<I", wav_bytes[40:44])[0] // 2
+            if data_size == 0:
+                batch_audios.append(torch.zeros(int(waveform_duration * SR)))
+            else:
+                try:
+                    waveform = (
+                        torch.frombuffer(wav_bytes[44:], dtype=torch.int16, offset=offset)[
+                            : int(waveform_duration * sampling_rate)
+                        ].float()
+                        / 32767
+                    )
+                except Exception as e:
+                    continue  # May append during fit for small audios. offset is set to 0 during predict.
+                waveform = apply_padding(
+                    waveform, int(waveform_duration * sampling_rate), padding_method
+                )
+                if sampling_rate == sr:
+                    batch_audios.append(waveform)
+                elif sampling_rate == 2 * sr:
+                    resample_24000.append(waveform)
+                    resample_24000_indices.append(len(batch_audios))
+                    batch_audios.append(None)
+            batch_labels.append(pa_subtable[1][0].as_py())
+            if len(batch_audios) == batch_size:
+                # Perform resampling once and take advantage of Torch's vectorization capabilities.
+                process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)
+                batch_audios_on_device = torch.stack(batch_audios).to(device)
+                batch_labels_on_device = torch.tensor(batch_labels).to(device)
+                yield batch_audios_on_device, batch_labels_on_device
+                batch_audios, batch_labels = [], []
+                resample_24000, resample_24000_indices = [], []
+        if batch_audios:
+            process_resampling(resample_24000, resample_24000_indices, batch_audios, sr * 2, SR)
+            batch_audios_on_device = torch.stack(batch_audios).to(device)
+            batch_labels_on_device = torch.tensor(batch_labels).to(device)
+            yield batch_audios_on_device, batch_labels_on_device
 def apply_padding(

pipeline.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3243c0fd7f6cafa8492132711b0376da91838029cfe1362e2fc19ee6bf847894
-size 834063

 version https://git-lfs.github.com/spec/v1
+oid sha256:37040a799b897c6902c1b520cf902223f145c5d61831f0c316317a9d999d8d61
+size 834075