tlmk22
/

QuefrencyGuardian

@@ -1,5 +1,5 @@
    {
-       "model_class": "FastModelHuggingFace",
        "framework": "PyTorch + LightGBM",
        "audio_processing_params": {
            "sample_rate": 12000,

    {
+       "model_type": "audio-spectrogram-transformer",
        "framework": "PyTorch + LightGBM",
        "audio_processing_params": {
            "sample_rate": 12000,

example_usage_fastmodel_hf.py CHANGED Viewed

@@ -1,12 +1,12 @@
-from pathlib import Path
-import torchaudio
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 from fast_model import FastModelHuggingFace
-repo_id = "tlmk22/QuefrencyGuardian"
-fast_model = FastModelHuggingFace.from_pretrained(repo_id)
 # Perform predictions for a single WAV file
 map_labels = {0: "chainsaw", 1: "environment"}

 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
+from transformers import AutoModel
 from fast_model import FastModelHuggingFace
+model_name = "tlmk22/QuefrencyGuardian"
+fast_model = AutoModel.from_pretrained(model_name)
+# fast_model = FastModelHuggingFace.from_pretrained(repo_id)
 # Perform predictions for a single WAV file
 map_labels = {0: "chainsaw", 1: "environment"}

fast_model.py CHANGED Viewed

@@ -202,10 +202,7 @@ class FastModel:
         )
     def batch_audio_loader(
-        self,
-        dataset: Dataset,
-        batch_size: int = 1,
-        offset: int = 0,
     ):
         """Optimized loader for audio data from a dataset for training or inference in batches.
@@ -254,7 +251,7 @@ class FastModel:
         padding_method = self.audio_processing_params["padding_method"]
         device = torch.device(
-            "cuda" if self.device == "cuda" and torch.cuda.is_available() else "cpu"
         )
         batch_audios, batch_labels = [], []
         resample_24000, resample_24000_indices = [], []
@@ -485,9 +482,7 @@ class FastModelHuggingFace:
             features = []
             for batch_audios, _ in self.pipeline.batch_audio_loader(
                 input_data,
-                waveform_duration=self.pipeline.audio_processing_params["duration"],
                 batch_size=batch_size,
-                padding_method=self.pipeline.audio_processing_params["padding_method"],
                 device=device,
             ):
                 batch_features = self.pipeline.get_features(batch_audios)

         )
     def batch_audio_loader(
+        self, dataset: Dataset, batch_size: int = 1, offset: int = 0, device="cpu"
     ):
         """Optimized loader for audio data from a dataset for training or inference in batches.
         padding_method = self.audio_processing_params["padding_method"]
         device = torch.device(
+            "cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
         )
         batch_audios, batch_labels = [], []
         resample_24000, resample_24000_indices = [], []
             features = []
             for batch_audios, _ in self.pipeline.batch_audio_loader(
                 input_data,
                 batch_size=batch_size,
                 device=device,
             ):
                 batch_features = self.pipeline.get_features(batch_audios)