tlemagueresse
commited on
Commit
·
1583464
1
Parent(s):
280e76e
Define model_type in the config
Browse files- config.json +1 -1
- example_usage_fastmodel_hf.py +5 -5
- fast_model.py +2 -7
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
"framework": "PyTorch + LightGBM",
|
| 4 |
"audio_processing_params": {
|
| 5 |
"sample_rate": 12000,
|
|
|
|
| 1 |
{
|
| 2 |
+
"model_type": "audio-spectrogram-transformer",
|
| 3 |
"framework": "PyTorch + LightGBM",
|
| 4 |
"audio_processing_params": {
|
| 5 |
"sample_rate": 12000,
|
example_usage_fastmodel_hf.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
-
from pathlib import Path
|
| 2 |
-
|
| 3 |
-
import torchaudio
|
| 4 |
from datasets import load_dataset
|
| 5 |
from sklearn.metrics import accuracy_score
|
|
|
|
|
|
|
| 6 |
from fast_model import FastModelHuggingFace
|
| 7 |
|
| 8 |
-
|
| 9 |
-
fast_model =
|
|
|
|
| 10 |
|
| 11 |
# Perform predictions for a single WAV file
|
| 12 |
map_labels = {0: "chainsaw", 1: "environment"}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from datasets import load_dataset
|
| 2 |
from sklearn.metrics import accuracy_score
|
| 3 |
+
from transformers import AutoModel
|
| 4 |
+
|
| 5 |
from fast_model import FastModelHuggingFace
|
| 6 |
|
| 7 |
+
model_name = "tlmk22/QuefrencyGuardian"
|
| 8 |
+
fast_model = AutoModel.from_pretrained(model_name)
|
| 9 |
+
# fast_model = FastModelHuggingFace.from_pretrained(repo_id)
|
| 10 |
|
| 11 |
# Perform predictions for a single WAV file
|
| 12 |
map_labels = {0: "chainsaw", 1: "environment"}
|
fast_model.py
CHANGED
|
@@ -202,10 +202,7 @@ class FastModel:
|
|
| 202 |
)
|
| 203 |
|
| 204 |
def batch_audio_loader(
|
| 205 |
-
self,
|
| 206 |
-
dataset: Dataset,
|
| 207 |
-
batch_size: int = 1,
|
| 208 |
-
offset: int = 0,
|
| 209 |
):
|
| 210 |
"""Optimized loader for audio data from a dataset for training or inference in batches.
|
| 211 |
|
|
@@ -254,7 +251,7 @@ class FastModel:
|
|
| 254 |
padding_method = self.audio_processing_params["padding_method"]
|
| 255 |
|
| 256 |
device = torch.device(
|
| 257 |
-
"cuda" if
|
| 258 |
)
|
| 259 |
batch_audios, batch_labels = [], []
|
| 260 |
resample_24000, resample_24000_indices = [], []
|
|
@@ -485,9 +482,7 @@ class FastModelHuggingFace:
|
|
| 485 |
features = []
|
| 486 |
for batch_audios, _ in self.pipeline.batch_audio_loader(
|
| 487 |
input_data,
|
| 488 |
-
waveform_duration=self.pipeline.audio_processing_params["duration"],
|
| 489 |
batch_size=batch_size,
|
| 490 |
-
padding_method=self.pipeline.audio_processing_params["padding_method"],
|
| 491 |
device=device,
|
| 492 |
):
|
| 493 |
batch_features = self.pipeline.get_features(batch_audios)
|
|
|
|
| 202 |
)
|
| 203 |
|
| 204 |
def batch_audio_loader(
|
| 205 |
+
self, dataset: Dataset, batch_size: int = 1, offset: int = 0, device="cpu"
|
|
|
|
|
|
|
|
|
|
| 206 |
):
|
| 207 |
"""Optimized loader for audio data from a dataset for training or inference in batches.
|
| 208 |
|
|
|
|
| 251 |
padding_method = self.audio_processing_params["padding_method"]
|
| 252 |
|
| 253 |
device = torch.device(
|
| 254 |
+
"cuda" if device == "cuda" and torch.cuda.is_available() else "cpu"
|
| 255 |
)
|
| 256 |
batch_audios, batch_labels = [], []
|
| 257 |
resample_24000, resample_24000_indices = [], []
|
|
|
|
| 482 |
features = []
|
| 483 |
for batch_audios, _ in self.pipeline.batch_audio_loader(
|
| 484 |
input_data,
|
|
|
|
| 485 |
batch_size=batch_size,
|
|
|
|
| 486 |
device=device,
|
| 487 |
):
|
| 488 |
batch_features = self.pipeline.get_features(batch_audios)
|