MaHaWo commited on
Commit
1582553
·
1 Parent(s): f93c7f1

add model and preprocessor code

Browse files
birdnet_custom_v2.4/model.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+
4
+ try:
5
+ import tflite_runtime.interpreter as tflite
6
+ except Exception:
7
+ from tensorflow import lite as tflite
8
+
9
+ from birdnetlib.analyzer import AnalyzerConfigurationError
10
+
11
+ from iSparrow.sparrow_model_base import ModelBase
12
+ from iSparrow import utils
13
+
14
+
15
+ class Model(ModelBase):
16
+
17
+ def _check_classifier_path_integrity(
18
+ self, classifier_model_path: str, classifier_labels_path: str
19
+ ):
20
+ """checks if custom classifier/labels are both given if one is present and the files they point to exist"""
21
+
22
+ if (classifier_model_path is not None and classifier_labels_path is None) or (
23
+ classifier_model_path is None and classifier_labels_path is not None
24
+ ):
25
+ raise AnalyzerConfigurationError(
26
+ "Model and label file paths must be specified to use a custom classifier"
27
+ )
28
+
29
+ if (
30
+ classifier_model_path is not None
31
+ and Path(classifier_model_path).exists() is False
32
+ ):
33
+ raise AnalyzerConfigurationError(
34
+ f"Custom classifier model could not be found at the provided path {classifier_model_path}"
35
+ )
36
+
37
+ if (
38
+ classifier_model_path is not None
39
+ and Path(classifier_labels_path).exists() is False
40
+ ):
41
+ raise AnalyzerConfigurationError(
42
+ f"Custom classifier labels could not be found at the provided path {classifier_labels_path}"
43
+ )
44
+
45
+ #
46
+ def __init__(
47
+ self,
48
+ default_model_path: str = None,
49
+ model_path: str = None,
50
+ sigmoid_sensitivity: float = 1.0,
51
+ num_threads: int = 1,
52
+ ):
53
+
54
+ self.default_model_path = str(Path(default_model_path) / "model.tflite")
55
+ self.default_labels_path = str(Path(default_model_path) / "labels.txt")
56
+
57
+ classifier_model_path = str(Path(model_path) / "model.tflite")
58
+ classifier_labels_path = str(Path(model_path) / "labels.txt")
59
+
60
+ self.sensitivity = sigmoid_sensitivity
61
+
62
+ # check custom classifier paths through function due to higher complexity
63
+ self._check_classifier_path_integrity(
64
+ classifier_model_path, classifier_labels_path
65
+ )
66
+ # need to call this custom because the super class has no prefix..
67
+ self.custom_classifier = None
68
+ self.custom_input_layer_index = None
69
+ self.custom_output_layer_index = None
70
+
71
+ self.input_layer_index = None
72
+ self.output_layer_index = None
73
+
74
+ # use the super class for handling the default models and load the custom ones in this one
75
+ super().__init__(
76
+ "birdnet_custom",
77
+ model_path=classifier_model_path,
78
+ labels_path=classifier_labels_path,
79
+ num_threads=num_threads,
80
+ sensitivity=sigmoid_sensitivity,
81
+ )
82
+
83
+ def load_model(self):
84
+ """
85
+ load_model Load the default model for making feature embeddings and the custom classifier for classifying them into species.
86
+
87
+ """
88
+
89
+ # this overrides the base method because we need to load the default models to provide
90
+ # the feature embeddings and the custom classifier to apply to them to get the actual
91
+ # classification
92
+
93
+ # load the default model
94
+ self.model = utils.load_model_from_file_tflite(
95
+ self.default_model_path, num_threads=self.num_threads
96
+ )
97
+ self.model.allocate_tensors()
98
+
99
+ # Get input and output tensors.
100
+ input_details = self.model.get_input_details()
101
+ output_details = self.model.get_output_details()
102
+
103
+ # Get input tensor index
104
+ self.input_layer_index = input_details[0]["index"]
105
+
106
+ # Get feature embeddings
107
+ self.output_layer_index = output_details[0]["index"] - 1
108
+ print("Default classifier loaded")
109
+
110
+ # now load the custom classifier
111
+ self.custom_classifier = tflite.Interpreter(
112
+ model_path=str(self.model_path), num_threads=self.num_threads
113
+ )
114
+ self.custom_classifier.allocate_tensors()
115
+
116
+ # Get input and output tensors.
117
+ custom_input_details = self.custom_classifier.get_input_details()
118
+ custom_output_details = self.custom_classifier.get_output_details()
119
+
120
+ self.custom_input_layer_index = custom_input_details[0]["index"]
121
+ self.custom_output_layer_index = custom_output_details[0]["index"]
122
+
123
+ print("Custom classifier loaded")
124
+
125
+ def load_species_list(self):
126
+ # TODO
127
+ pass
128
+
129
+ def get_embeddings(self, data: np.array) -> np.array:
130
+ """
131
+ get_embeddings Extract feature embedding from audio file without immediatelly classifying the species.
132
+ These can in a second step be used with a custom classifier to find species not
133
+ included in the default training data.
134
+
135
+ Args:
136
+ data (np.array): Preprocessed audio snippet to extract features from
137
+
138
+ Returns:
139
+ np.array: Feature embedding produces by the default birdnet CNN.
140
+ """
141
+ print(" get embeddings")
142
+ self.model.resize_tensor_input(
143
+ self.input_layer_index, [len(data), *data[0].shape]
144
+ )
145
+
146
+ self.model.allocate_tensors()
147
+
148
+ # Extract feature embeddings
149
+ self.model.set_tensor(self.input_layer_index, np.array(data, dtype="float32"))
150
+
151
+ self.model.invoke()
152
+
153
+ features = self.model.get_tensor(self.output_layer_index)
154
+
155
+ return features
156
+
157
+ def predict(self, sample: np.array) -> np.array:
158
+ """
159
+ predict Make inference about the bird species for the preprocessed data passed to this function as arguments.
160
+
161
+ Args:
162
+ data (np.array): list of preprocessed data chunks
163
+ Returns:
164
+ list: List of (label, inferred_probability)
165
+ """
166
+ data = np.array([sample], dtype="float32")
167
+
168
+ input_details = self.custom_classifier.get_input_details()
169
+
170
+ input_size = input_details[0]["shape"][-1]
171
+
172
+ feature_vector = self.get_embeddings(data) if input_size != 144000 else data
173
+
174
+ self.custom_classifier.resize_tensor_input(
175
+ self.custom_input_layer_index,
176
+ [len(feature_vector), *feature_vector[0].shape],
177
+ )
178
+
179
+ self.custom_classifier.allocate_tensors()
180
+
181
+ # Make a prediction
182
+ self.custom_classifier.set_tensor(
183
+ self.custom_input_layer_index, np.array(feature_vector, dtype="float32")
184
+ )
185
+
186
+ self.custom_classifier.invoke()
187
+
188
+ prediction = self.custom_classifier.get_tensor(self.custom_output_layer_index)
189
+
190
+ # map to probabilities
191
+ confidence = self._sigmoid(np.array(prediction), -self.sensitivity)
192
+
193
+ return confidence
194
+
195
+ @classmethod
196
+ def from_cfg(cls, sparrow_dir: str, cfg: dict):
197
+ """
198
+ from_cfg Create a new instance from a dictionary containing keyword arguments. Usually loaded from a config file.
199
+
200
+ Args:
201
+ sparrow_dir (str): Installation directory of the Sparrow package
202
+ cfg (dict): Dictionary containing the keyword arguments
203
+
204
+ Returns:
205
+ Model: New model instance created with the supplied kwargs.
206
+ """
207
+
208
+ # preprocess config because we need two models here
209
+ cfg["default_model_path"] = str(
210
+ Path(sparrow_dir) / Path("models") / Path("birdnet_default")
211
+ )
212
+ cfg["model_path"] = str(
213
+ Path(sparrow_dir) / Path("models") / Path(cfg["model_path"])
214
+ )
215
+ return cls(**cfg)
birdnet_custom_v2.4/preprocessor.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import iSparrow.preprocessor_base as ppb
3
+
4
+
5
+ class Preprocessor(ppb.PreprocessorBase):
6
+ """
7
+ Preprocessor Preprocess audio data into resampled chunks for analysis.
8
+
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ sample_rate: int = 48000,
14
+ overlap: float = 0.0,
15
+ sample_secs: int = 3.0,
16
+ resample_type: str = "kaiser_fast",
17
+ ):
18
+ """
19
+ __init__ Construct a new preprocesssor for custom birdnet classifiers from given parameters, and use defaults for the ones not present.
20
+
21
+ Args:
22
+ sample_rate (int, optional): The sample rate used to resample the read audio file. Defaults to 48000.
23
+ overlap (float, optional): Overlap between chunks to be analyzed. Defaults to 0.0.
24
+ sample_secs (int, optional): Length of chunks to be analyzed at once. Defaults to 3.0.
25
+ resample_type (str, optional): Resampling method used when reading from file. Defaults to "kaiser_fast".
26
+ """
27
+
28
+ super().__init__(
29
+ "birdnet_custom",
30
+ sample_rate=sample_rate,
31
+ overlap=overlap,
32
+ sample_secs=sample_secs,
33
+ resample_type=resample_type,
34
+ )
35
+
36
+ def process_audio_data(self, rawdata: np.ndarray) -> list:
37
+ """
38
+ process_audio_data Process raw, resampled audio data into chunks that then can be analyzed
39
+
40
+ Args:
41
+ data (np.ndarray): raw, resampled audio data as returned from 'read_audio'
42
+
43
+ Returns:
44
+ list: chunked audio data
45
+ """
46
+ print("process audio data custom")
47
+ seconds = self.sample_secs
48
+ minlen = 1.5
49
+
50
+ self.chunks = []
51
+
52
+ for i in range(
53
+ 0, len(rawdata), int((seconds - self.overlap) * self.sample_rate)
54
+ ):
55
+
56
+ split = rawdata[i : (i + int(seconds * self.actual_sampling_rate))]
57
+
58
+ # End of signal?
59
+ if len(split) < int(minlen * self.actual_sampling_rate):
60
+ break
61
+
62
+ # Signal chunk too short? Fill with zeros.
63
+ if len(split) < int(self.actual_sampling_rate * seconds):
64
+ temp = np.zeros((int(self.actual_sampling_rate * seconds)))
65
+ temp[: len(split)] = split
66
+ split = temp
67
+
68
+ self.chunks.append(split)
69
+
70
+ print(
71
+ "process audio data custom: complete, read ",
72
+ str(len(self.chunks)),
73
+ "chunks.",
74
+ )
75
+
76
+ return self.chunks
77
+
78
+ @classmethod
79
+ def from_cfg(cls, cfg: dict):
80
+ """
81
+ from_cfg Construct a new preprocessor from a given dictionary. This represents typically a config node read from a YAML file.
82
+
83
+ Args:
84
+ cfg (dict): Config node read from a YAML file
85
+
86
+ Returns: new preprocessor instance
87
+ """
88
+ allowed = [
89
+ "sample_rate",
90
+ "overlap",
91
+ "sample_secs",
92
+ "resample_type",
93
+ "duration",
94
+ "actual_sampling_rate",
95
+ ]
96
+
97
+ if len([key for key in cfg if key not in allowed]) > 0:
98
+ raise RuntimeError("Erroneous keyword arguments in preprocessor config")
99
+
100
+ return cls(**cfg)
birdnet_default_v2.4/model.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+
4
+ from iSparrow.sparrow_model_base import ModelBase
5
+
6
+ # from iSparrow import utils
7
+
8
+
9
+ class Model(ModelBase):
10
+ """
11
+ Model Model class representing the the default birdnet model. Does currently not support custom species list or species prediction based on coordinates.
12
+
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ model_path: str,
18
+ num_threads: int = 1,
19
+ sigmoid_sensitivity: float = 1.0,
20
+ species_list_file: str = None,
21
+ ):
22
+ """
23
+ __init__ Create a new model instance that uses birdnet-analyzer models for bird species classification
24
+
25
+ Args:
26
+ model_path (str): Path to the location of the model file to be loaded
27
+ num_threads (int, optional): Number of threads used for inference. Defaults to 1.
28
+ sigmoid_sensitivity (float, optional): Parameter of the sigmoid function used to compute probabilities. Defaults to 1.0.
29
+
30
+ Raises:
31
+ AnalyzerConfigurationError: The model file 'model.tflite' doesn't exist at the given path.
32
+ AnalyzerConfigurationError: The labels file 'labels.txt' doesn't exist at the given path.
33
+ """
34
+
35
+ labels_path = str(Path(model_path) / "labels.txt")
36
+
37
+ model_path = str(Path(model_path) / "model.tflite")
38
+
39
+ # base class loads the model and labels
40
+ super().__init__(
41
+ "birdnet_default",
42
+ model_path,
43
+ labels_path,
44
+ num_threads=num_threads,
45
+ sensitivity=sigmoid_sensitivity,
46
+ )
47
+
48
+ # store input and output index to not have to retrieve them each time an inference is made
49
+ input_details = self.model.get_input_details()
50
+
51
+ output_details = self.model.get_output_details()
52
+
53
+ # Get input tensor index
54
+ self.input_layer_index = input_details[0]["index"]
55
+
56
+ # Get classification output or feature embeddings as output, depending on presence fo custom classifier
57
+ self.output_layer_index = output_details[0]["index"]
58
+
59
+ def load_species_list(self):
60
+ # TODO
61
+ pass
62
+
63
+ def predict(self, sample: np.array) -> np.array:
64
+ """
65
+ predict Make inference about the bird species for the preprocessed data passed to this function as arguments.
66
+
67
+ Args:
68
+ data (np.array): list of preprocessed data chunks
69
+ Returns:
70
+ list: List of (label, inferred_probability)
71
+ """
72
+ data = np.array([sample], dtype="float32")
73
+
74
+ self.model.resize_tensor_input(
75
+ self.input_layer_index, [len(data), *data[0].shape]
76
+ )
77
+ self.model.allocate_tensors()
78
+
79
+ # Make a prediction (Audio only for now)
80
+ self.model.set_tensor(self.input_layer_index, np.array(data, dtype="float32"))
81
+ self.model.invoke()
82
+
83
+ prediction = self.model.get_tensor(self.output_layer_index)
84
+
85
+ confidence = self._sigmoid(np.array(prediction), sensitivity=-self.sensitivity)
86
+
87
+ return confidence
88
+
89
+ @classmethod
90
+ def from_cfg(cls, sparrow_folder: str, cfg: dict):
91
+ """
92
+ from_cfg Create a new instance from a dictionary containing keyword arguments. Usually loaded from a config file.
93
+
94
+ Args:
95
+ sparrow_dir (str): Installation directory of the Sparrow package
96
+ cfg (dict): Dictionary containing the keyword arguments
97
+
98
+ Returns:
99
+ Model: New model instance created with the supplied kwargs.
100
+ """
101
+ cfg["model_path"] = str(
102
+ Path(sparrow_folder) / Path("models") / cfg["model_path"]
103
+ )
104
+
105
+ return cls(**cfg)
birdnet_default_v2.4/preprocessor.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import iSparrow.preprocessor_base as ppb
3
+
4
+
5
+ class Preprocessor(ppb.PreprocessorBase):
6
+ """
7
+ Preprocessor Preprocess audio data into resampled chunks for analysis.
8
+
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ sample_rate: int = 48000,
14
+ overlap: float = 0.0,
15
+ sample_secs: int = 3.0,
16
+ resample_type: str = "kaiser_fast",
17
+ ):
18
+ """
19
+ __init__ Construct a new preprocesssor for custom birdnet classifiers from given parameters, and use defaults for the ones not present.
20
+
21
+ Args:
22
+ sample_rate (int, optional): The sample rate used to resample the read audio file. Defaults to 48000.
23
+ overlap (float, optional): Overlap between chunks to be analyzed. Defaults to 0.0.
24
+ sample_secs (int, optional): Length of chunks to be analyzed at once. Defaults to 3.0.
25
+ resample_type (str, optional): Resampling method used when reading from file. Defaults to "kaiser_fast".
26
+ """
27
+
28
+ super().__init__(
29
+ "birdnet_default",
30
+ sample_rate=sample_rate,
31
+ overlap=overlap,
32
+ sample_secs=sample_secs,
33
+ resample_type=resample_type,
34
+ )
35
+
36
+ def process_audio_data(self, rawdata: np.ndarray) -> list:
37
+ """
38
+ process_audio_data Process raw, resampled audio data into chunks that then can be analyzed
39
+
40
+ Args:
41
+ data (np.ndarray): raw, resampled audio data as returned from 'read_audio'
42
+
43
+ Returns:
44
+ list: chunked audio data
45
+ """
46
+ print("process audio data default")
47
+ seconds = self.sample_secs
48
+ minlen = 1.5
49
+
50
+ self.chunks = []
51
+
52
+ for i in range(
53
+ 0, len(rawdata), int((seconds - self.overlap) * self.sample_rate)
54
+ ):
55
+
56
+ split = rawdata[i : (i + int(seconds * self.actual_sampling_rate))]
57
+
58
+ # End of signal?
59
+ if len(split) < int(minlen * self.actual_sampling_rate):
60
+ break
61
+
62
+ # Signal chunk too short? Fill with zeros.
63
+ if len(split) < int(self.actual_sampling_rate * seconds):
64
+ temp = np.zeros((int(self.actual_sampling_rate * seconds)))
65
+ temp[: len(split)] = split
66
+ split = temp
67
+
68
+ self.chunks.append(split)
69
+
70
+ print(
71
+ "process audio data default: complete, read ",
72
+ str(len(self.chunks)),
73
+ "chunks.",
74
+ )
75
+
76
+ return self.chunks
77
+
78
+ @classmethod
79
+ def from_cfg(cls, cfg: dict):
80
+ """
81
+ from_cfg Construct a new preprocessor from a given dictionary. This represents typically a config node read from a YAML file.
82
+
83
+ Args:
84
+ cfg (dict): Config node read from a YAML file
85
+
86
+ Returns: new preprocessor instance
87
+ """
88
+ allowed = [
89
+ "sample_rate",
90
+ "overlap",
91
+ "sample_secs",
92
+ "resample_type",
93
+ "duration",
94
+ "actual_sampling_rate",
95
+ ]
96
+
97
+ if len([key for key in cfg if key not in allowed]) > 0:
98
+ raise RuntimeError("Erroneous keyword arguments in preprocessor config")
99
+
100
+ return cls(**cfg)
google_bird_classification/model.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from iSparrow.sparrow_model_base import ModelBase
5
+ import pandas as pd
6
+
7
+
8
+ class Model(ModelBase):
9
+
10
+ def __init__(self, model_path: str, num_threads: int = 1, species_list_file=None):
11
+ """
12
+ __init__ Create a new Model instance using the google perch model.
13
+
14
+ Args:
15
+ model_path (str): Path to the model file to load from disk
16
+ num_threads (int): The number of threads used for inference. Currently not used for this model.
17
+ """
18
+ labels_path = str(Path(model_path) / "labels.txt")
19
+
20
+ model_path = str(Path(model_path) / "saved_model.pb")
21
+
22
+ self.class_mask = None # used later
23
+
24
+ super().__init__(
25
+ "google_perch",
26
+ model_path,
27
+ labels_path,
28
+ num_threads=num_threads,
29
+ # sensitivity kwarg doesn't exist here
30
+ ) # num_threads doesn't do anything here.
31
+
32
+ def predict(self, data: np.array):
33
+ """
34
+ predict Make inference about the bird species for the preprocessed data passed to this function as arguments.
35
+
36
+ Args:
37
+ data (np.array): list of preprocessed data chunks
38
+ Returns:
39
+ list: List of (label, inferred_probability)
40
+ """
41
+
42
+ results = self.labels.copy()
43
+
44
+ # README: this should be parallelized??
45
+ logits, embeddings = self.model.infer_tf(
46
+ np.array(
47
+ [
48
+ data,
49
+ ]
50
+ )
51
+ )
52
+
53
+ results = tf.nn.softmax(logits).numpy()
54
+ return results
55
+
56
+ @classmethod
57
+ def from_cfg(cls, sparrow_folder: str, cfg: dict):
58
+ """
59
+ from_cfg Create a new instance from a dictionary containing keyword arguments. Usually loaded from a config file.
60
+
61
+ Args:
62
+ sparrow_dir (str): Installation directory of the Sparrow package
63
+ cfg (dict): Dictionary containing the keyword arguments
64
+
65
+ Returns:
66
+ Model: New model instance created with the supplied kwargs.
67
+ """
68
+
69
+ cfg["model_path"] = str(
70
+ Path(sparrow_folder) / Path("models") / Path(cfg["model_path"])
71
+ )
72
+
73
+ return cls(**cfg)
google_bird_classification/preprocessor.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+
4
+ from tensorflow.signal import frame as tf_split_signal_into_chunks
5
+
6
+ from iSparrow import preprocessor_base as ppb
7
+
8
+
9
+ # README: work in progress - will be completed in separate issue
10
+ class Preprocessor(ppb.PreprocessorBase):
11
+ """
12
+ Preprocessor Preprocess audio data into resampled chunks for analysis.
13
+
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ sample_rate: int = 32000,
19
+ sample_secs: float = 5.0,
20
+ resample_type: str = "kaiser_fast",
21
+ ):
22
+ """
23
+ __init__ Construct a new preprocesssor for custom birdnet classifiers from given parameters, and use defaults for the ones not present.
24
+
25
+ Args:
26
+ sample_rate (int, optional): The sample rate used to resample the read audio file. Defaults to 48000.
27
+ sample_secs (float, optional): Length of chunks to be analyzed at once. Defaults to 3.0.
28
+ resample_type (str, optional): Resampling method used when reading from file. Defaults to "kaiser_fast".
29
+ """
30
+ # README: this class does not have an overlap attribute because the model it works with does not want it.
31
+ super().__init__(
32
+ "google_perch",
33
+ sample_rate=sample_rate,
34
+ sample_secs=sample_secs,
35
+ resample_type=resample_type,
36
+ )
37
+
38
+ def process_audio_data(self, rawdata: np.array) -> np.array:
39
+ """
40
+ process_audio_data Process raw, resampled audio data into chunks that then can be analyzed
41
+
42
+ Args:
43
+ data (np.ndarray): raw, resampled audio data as returned from 'read_audio'
44
+
45
+ Returns:
46
+ list: chunked audio data
47
+ """
48
+ print("process audio data custom ")
49
+
50
+ self.chunks = []
51
+
52
+ # raise when sampling rate is unequal.
53
+ if self.actual_sampling_rate != self.sample_rate:
54
+ raise RuntimeError(
55
+ "Sampling rate is not the desired one. Desired sampling rate: {self.sample_rate}, actual sampling rate: {self.actual_sampling_rate}"
56
+ )
57
+
58
+ frame_length = int(self.sample_secs * self.sample_rate)
59
+ step_length = int(self.sample_secs - self.overlap) * self.sample_rate
60
+
61
+ self.chunks = tf_split_signal_into_chunks(
62
+ rawdata, frame_length, step_length, pad_end=True
63
+ ).numpy()
64
+
65
+ print(
66
+ "process audio data google: complete, read ",
67
+ str(len(self.chunks)),
68
+ "chunks.",
69
+ )
70
+
71
+ return self.chunks
72
+
73
+ @classmethod
74
+ def from_cfg(cls, cfg: dict):
75
+
76
+ # make sure there are no more than the allowed keyword arguments in the cfg
77
+ allowed = [
78
+ "sample_rate",
79
+ "sample_secs",
80
+ "resample_type",
81
+ "duration",
82
+ "actual_sampling_rate",
83
+ ]
84
+
85
+ if len([key for key in cfg if key not in allowed]) > 0:
86
+ raise RuntimeError("Erroneous keyword arguments in preprocessor config")
87
+
88
+ return cls(**cfg)