Spaces:
Sleeping
Sleeping
Commit ·
bab1cc1
1
Parent(s): 13cbb3a
Upload 4 files
Browse files
audio.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module containing audio helper functions.
|
| 2 |
+
"""
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
import config as cfg
|
| 6 |
+
|
| 7 |
+
RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def openAudioFile(path: str, sample_rate=48000, offset=0.0, duration=None):
|
| 11 |
+
"""Open an audio file.
|
| 12 |
+
|
| 13 |
+
Opens an audio file with librosa and the given settings.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
path: Path to the audio file.
|
| 17 |
+
sample_rate: The sample rate at which the file should be processed.
|
| 18 |
+
offset: The starting offset.
|
| 19 |
+
duration: Maximum duration of the loaded content.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
Returns the audio time series and the sampling rate.
|
| 23 |
+
"""
|
| 24 |
+
# Open file with librosa (uses ffmpeg or libav)
|
| 25 |
+
import librosa
|
| 26 |
+
|
| 27 |
+
sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast")
|
| 28 |
+
|
| 29 |
+
return sig, rate
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_sample_rate(path: str):
|
| 33 |
+
import librosa
|
| 34 |
+
return librosa.get_samplerate(path)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def saveSignal(sig, fname: str):
|
| 38 |
+
"""Saves a signal to file.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
sig: The signal to be saved.
|
| 42 |
+
fname: The file path.
|
| 43 |
+
"""
|
| 44 |
+
import soundfile as sf
|
| 45 |
+
|
| 46 |
+
sf.write(fname, sig, 48000, "PCM_16")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def noise(sig, shape, amount=None):
|
| 50 |
+
"""Creates noise.
|
| 51 |
+
|
| 52 |
+
Creates a noise vector with the given shape.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
sig: The original audio signal.
|
| 56 |
+
shape: Shape of the noise.
|
| 57 |
+
amount: The noise intensity.
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
An numpy array of noise with the given shape.
|
| 61 |
+
"""
|
| 62 |
+
# Random noise intensity
|
| 63 |
+
if amount == None:
|
| 64 |
+
amount = RANDOM.uniform(0.1, 0.5)
|
| 65 |
+
|
| 66 |
+
# Create Gaussian noise
|
| 67 |
+
try:
|
| 68 |
+
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape)
|
| 69 |
+
except:
|
| 70 |
+
noise = np.zeros(shape)
|
| 71 |
+
|
| 72 |
+
return noise.astype("float32")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def splitSignal(sig, rate, seconds, overlap, minlen):
|
| 76 |
+
"""Split signal with overlap.
|
| 77 |
+
|
| 78 |
+
Args:
|
| 79 |
+
sig: The original signal to be split.
|
| 80 |
+
rate: The sampling rate.
|
| 81 |
+
seconds: The duration of a segment.
|
| 82 |
+
overlap: The overlapping seconds of segments.
|
| 83 |
+
minlen: Minimum length of a split.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
A list of splits.
|
| 87 |
+
"""
|
| 88 |
+
sig_splits = []
|
| 89 |
+
|
| 90 |
+
for i in range(0, len(sig), int((seconds - overlap) * rate)):
|
| 91 |
+
split = sig[i : i + int(seconds * rate)]
|
| 92 |
+
|
| 93 |
+
# End of signal?
|
| 94 |
+
if len(split) < int(minlen * rate) and len(sig_splits) > 0:
|
| 95 |
+
break
|
| 96 |
+
|
| 97 |
+
# Signal chunk too short?
|
| 98 |
+
if len(split) < int(rate * seconds):
|
| 99 |
+
split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5)))
|
| 100 |
+
|
| 101 |
+
sig_splits.append(split)
|
| 102 |
+
|
| 103 |
+
return sig_splits
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def cropCenter(sig, rate, seconds):
|
| 107 |
+
"""Crop signal to center.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
sig: The original signal.
|
| 111 |
+
rate: The sampling rate.
|
| 112 |
+
seconds: The length of the signal.
|
| 113 |
+
"""
|
| 114 |
+
if len(sig) > int(seconds * rate):
|
| 115 |
+
start = int((len(sig) - int(seconds * rate)) / 2)
|
| 116 |
+
end = start + int(seconds * rate)
|
| 117 |
+
sig = sig[start:end]
|
| 118 |
+
|
| 119 |
+
# Pad with noise
|
| 120 |
+
elif len(sig) < int(seconds * rate):
|
| 121 |
+
sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5)))
|
| 122 |
+
|
| 123 |
+
return sig
|
config.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#################
|
| 2 |
+
# Misc settings #
|
| 3 |
+
#################
|
| 4 |
+
|
| 5 |
+
# Random seed for gaussian noise
|
| 6 |
+
RANDOM_SEED = 42
|
| 7 |
+
|
| 8 |
+
##########################
|
| 9 |
+
# Model paths and config #
|
| 10 |
+
##########################
|
| 11 |
+
|
| 12 |
+
MODEL_VESION = 'V2.4'
|
| 13 |
+
PB_MODEL = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model'
|
| 14 |
+
# MODEL_PATH = PB_MODEL # This will load the protobuf model
|
| 15 |
+
MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite'
|
| 16 |
+
MDATA_MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_MData_Model_FP16.tflite'
|
| 17 |
+
LABELS_FILE = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels.txt'
|
| 18 |
+
TRANSLATED_LABELS_PATH = 'labels/V2.4'
|
| 19 |
+
|
| 20 |
+
# Path to custom trained classifier
|
| 21 |
+
# If None, no custom classifier will be used
|
| 22 |
+
# Make sure to set the LABELS_FILE above accordingly
|
| 23 |
+
CUSTOM_CLASSIFIER = None
|
| 24 |
+
|
| 25 |
+
##################
|
| 26 |
+
# Audio settings #
|
| 27 |
+
##################
|
| 28 |
+
|
| 29 |
+
# We use a sample rate of 48kHz, so the model input size is
|
| 30 |
+
# (batch size, 48000 kHz * 3 seconds) = (1, 144000)
|
| 31 |
+
# Recordings will be resampled automatically.
|
| 32 |
+
SAMPLE_RATE: int = 48000
|
| 33 |
+
|
| 34 |
+
# We're using 3-second chunks
|
| 35 |
+
SIG_LENGTH: float = 3.0
|
| 36 |
+
|
| 37 |
+
# Define overlap between consecutive chunks <3.0; 0 = no overlap
|
| 38 |
+
SIG_OVERLAP: float = 0
|
| 39 |
+
|
| 40 |
+
# Define minimum length of audio chunk for prediction,
|
| 41 |
+
# chunks shorter than 3 seconds will be padded with zeros
|
| 42 |
+
SIG_MINLEN: float = 1.0
|
| 43 |
+
|
| 44 |
+
# Frequency range. This is model specific and should not be changed.
|
| 45 |
+
SIG_FMIN = 0
|
| 46 |
+
SIG_FMAX = 15000
|
| 47 |
+
|
| 48 |
+
#####################
|
| 49 |
+
# Metadata settings #
|
| 50 |
+
#####################
|
| 51 |
+
|
| 52 |
+
LATITUDE = -1
|
| 53 |
+
LONGITUDE = -1
|
| 54 |
+
WEEK = -1
|
| 55 |
+
LOCATION_FILTER_THRESHOLD = 0.03
|
| 56 |
+
|
| 57 |
+
######################
|
| 58 |
+
# Inference settings #
|
| 59 |
+
######################
|
| 60 |
+
|
| 61 |
+
# If None or empty file, no custom species list will be used
|
| 62 |
+
# Note: Entries in this list have to match entries from the LABELS_FILE
|
| 63 |
+
# We use the 2021 eBird taxonomy for species names (Clements list)
|
| 64 |
+
CODES_FILE = 'eBird_taxonomy_codes_2021E.json'
|
| 65 |
+
SPECIES_LIST_FILE = 'example/species_list.txt'
|
| 66 |
+
|
| 67 |
+
# File input path and output path for selection tables
|
| 68 |
+
INPUT_PATH: str = 'example/'
|
| 69 |
+
OUTPUT_PATH: str = 'example/'
|
| 70 |
+
|
| 71 |
+
ALLOWED_FILETYPES = ['wav', 'flac', 'mp3', 'ogg', 'm4a']
|
| 72 |
+
|
| 73 |
+
# Number of threads to use for inference.
|
| 74 |
+
# Can be as high as number of CPUs in your system
|
| 75 |
+
CPU_THREADS: int = 8
|
| 76 |
+
TFLITE_THREADS: int = 1
|
| 77 |
+
|
| 78 |
+
# False will output logits, True will convert to sigmoid activations
|
| 79 |
+
APPLY_SIGMOID: bool = True
|
| 80 |
+
SIGMOID_SENSITIVITY: float = 1.0
|
| 81 |
+
|
| 82 |
+
# Minimum confidence score to include in selection table
|
| 83 |
+
# (be aware: if APPLY_SIGMOID = False, this no longer represents
|
| 84 |
+
# probabilities and needs to be adjusted)
|
| 85 |
+
MIN_CONFIDENCE: float = 0.1
|
| 86 |
+
|
| 87 |
+
# Number of samples to process at the same time. Higher values can increase
|
| 88 |
+
# processing speed, but will also increase memory usage.
|
| 89 |
+
# Might only be useful for GPU inference.
|
| 90 |
+
BATCH_SIZE: int = 1
|
| 91 |
+
|
| 92 |
+
# Specifies the output format. 'table' denotes a Raven selection table,
|
| 93 |
+
# 'audacity' denotes a TXT file with the same format as Audacity timeline labels
|
| 94 |
+
# 'csv' denotes a CSV file with start, end, species and confidence.
|
| 95 |
+
RESULT_TYPE = 'table'
|
| 96 |
+
|
| 97 |
+
#####################
|
| 98 |
+
# Training settings #
|
| 99 |
+
#####################
|
| 100 |
+
|
| 101 |
+
# Training data path
|
| 102 |
+
TRAIN_DATA_PATH = 'train_data/'
|
| 103 |
+
|
| 104 |
+
# Sample crop mode
|
| 105 |
+
SAMPLE_CROP_MODE = 'center'
|
| 106 |
+
|
| 107 |
+
# List of non-event classes
|
| 108 |
+
NON_EVENT_CLASSES = ["noise", "other", "background", "silence"]
|
| 109 |
+
|
| 110 |
+
# Upsampling settings
|
| 111 |
+
UPSAMPLING_RATIO = 0.0
|
| 112 |
+
UPSAMPLING_MODE = 'repeat'
|
| 113 |
+
|
| 114 |
+
# Number of epochs to train for
|
| 115 |
+
TRAIN_EPOCHS: int = 100
|
| 116 |
+
|
| 117 |
+
# Batch size for training
|
| 118 |
+
TRAIN_BATCH_SIZE: int = 32
|
| 119 |
+
|
| 120 |
+
# Validation split (percentage)
|
| 121 |
+
TRAIN_VAL_SPLIT: float = 0.2
|
| 122 |
+
|
| 123 |
+
# Learning rate for training
|
| 124 |
+
TRAIN_LEARNING_RATE: float = 0.01
|
| 125 |
+
|
| 126 |
+
# Number of hidden units in custom classifier
|
| 127 |
+
# If >0, a two-layer classifier will be trained
|
| 128 |
+
TRAIN_HIDDEN_UNITS: int = 0
|
| 129 |
+
|
| 130 |
+
# Dropout rate for training
|
| 131 |
+
TRAIN_DROPOUT: float = 0.0
|
| 132 |
+
|
| 133 |
+
# Whether to use mixup for training
|
| 134 |
+
TRAIN_WITH_MIXUP: bool = False
|
| 135 |
+
|
| 136 |
+
# Whether to apply label smoothing for training
|
| 137 |
+
TRAIN_WITH_LABEL_SMOOTHING: bool = False
|
| 138 |
+
|
| 139 |
+
# Model output format
|
| 140 |
+
TRAINED_MODEL_OUTPUT_FORMAT = 'tflite'
|
| 141 |
+
|
| 142 |
+
# Cache settings
|
| 143 |
+
TRAIN_CACHE_MODE = 'none'
|
| 144 |
+
TRAIN_CACHE_FILE = 'train_cache.npz'
|
| 145 |
+
|
| 146 |
+
#####################
|
| 147 |
+
# Misc runtime vars #
|
| 148 |
+
#####################
|
| 149 |
+
CODES = {}
|
| 150 |
+
LABELS: list[str] = []
|
| 151 |
+
TRANSLATED_LABELS: list[str] = []
|
| 152 |
+
SPECIES_LIST: list[str] = []
|
| 153 |
+
ERROR_LOG_FILE: str = 'error_log.txt'
|
| 154 |
+
FILE_LIST = []
|
| 155 |
+
FILE_STORAGE_PATH = ''
|
| 156 |
+
|
| 157 |
+
######################
|
| 158 |
+
# Get and set config #
|
| 159 |
+
######################
|
| 160 |
+
|
| 161 |
+
def getConfig():
|
| 162 |
+
return {
|
| 163 |
+
'RANDOM_SEED': RANDOM_SEED,
|
| 164 |
+
'MODEL_PATH': MODEL_PATH,
|
| 165 |
+
'MDATA_MODEL_PATH': MDATA_MODEL_PATH,
|
| 166 |
+
'LABELS_FILE': LABELS_FILE,
|
| 167 |
+
'CUSTOM_CLASSIFIER': CUSTOM_CLASSIFIER,
|
| 168 |
+
'SAMPLE_RATE': SAMPLE_RATE,
|
| 169 |
+
'SIG_LENGTH': SIG_LENGTH,
|
| 170 |
+
'SIG_OVERLAP': SIG_OVERLAP,
|
| 171 |
+
'SIG_MINLEN': SIG_MINLEN,
|
| 172 |
+
'LATITUDE': LATITUDE,
|
| 173 |
+
'LONGITUDE': LONGITUDE,
|
| 174 |
+
'WEEK': WEEK,
|
| 175 |
+
'LOCATION_FILTER_THRESHOLD': LOCATION_FILTER_THRESHOLD,
|
| 176 |
+
'CODES_FILE': CODES_FILE,
|
| 177 |
+
'SPECIES_LIST_FILE': SPECIES_LIST_FILE,
|
| 178 |
+
'INPUT_PATH': INPUT_PATH,
|
| 179 |
+
'OUTPUT_PATH': OUTPUT_PATH,
|
| 180 |
+
'CPU_THREADS': CPU_THREADS,
|
| 181 |
+
'TFLITE_THREADS': TFLITE_THREADS,
|
| 182 |
+
'APPLY_SIGMOID': APPLY_SIGMOID,
|
| 183 |
+
'SIGMOID_SENSITIVITY': SIGMOID_SENSITIVITY,
|
| 184 |
+
'MIN_CONFIDENCE': MIN_CONFIDENCE,
|
| 185 |
+
'BATCH_SIZE': BATCH_SIZE,
|
| 186 |
+
'RESULT_TYPE': RESULT_TYPE,
|
| 187 |
+
'TRAIN_DATA_PATH': TRAIN_DATA_PATH,
|
| 188 |
+
'TRAIN_EPOCHS': TRAIN_EPOCHS,
|
| 189 |
+
'TRAIN_BATCH_SIZE': TRAIN_BATCH_SIZE,
|
| 190 |
+
'TRAIN_LEARNING_RATE': TRAIN_LEARNING_RATE,
|
| 191 |
+
'TRAIN_HIDDEN_UNITS': TRAIN_HIDDEN_UNITS,
|
| 192 |
+
'CODES': CODES,
|
| 193 |
+
'LABELS': LABELS,
|
| 194 |
+
'TRANSLATED_LABELS': TRANSLATED_LABELS,
|
| 195 |
+
'SPECIES_LIST': SPECIES_LIST,
|
| 196 |
+
'ERROR_LOG_FILE': ERROR_LOG_FILE
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
def setConfig(c):
|
| 200 |
+
|
| 201 |
+
global RANDOM_SEED
|
| 202 |
+
global MODEL_PATH
|
| 203 |
+
global MDATA_MODEL_PATH
|
| 204 |
+
global LABELS_FILE
|
| 205 |
+
global CUSTOM_CLASSIFIER
|
| 206 |
+
global SAMPLE_RATE
|
| 207 |
+
global SIG_LENGTH
|
| 208 |
+
global SIG_OVERLAP
|
| 209 |
+
global SIG_MINLEN
|
| 210 |
+
global LATITUDE
|
| 211 |
+
global LONGITUDE
|
| 212 |
+
global WEEK
|
| 213 |
+
global LOCATION_FILTER_THRESHOLD
|
| 214 |
+
global CODES_FILE
|
| 215 |
+
global SPECIES_LIST_FILE
|
| 216 |
+
global INPUT_PATH
|
| 217 |
+
global OUTPUT_PATH
|
| 218 |
+
global CPU_THREADS
|
| 219 |
+
global TFLITE_THREADS
|
| 220 |
+
global APPLY_SIGMOID
|
| 221 |
+
global SIGMOID_SENSITIVITY
|
| 222 |
+
global MIN_CONFIDENCE
|
| 223 |
+
global BATCH_SIZE
|
| 224 |
+
global RESULT_TYPE
|
| 225 |
+
global TRAIN_DATA_PATH
|
| 226 |
+
global TRAIN_EPOCHS
|
| 227 |
+
global TRAIN_BATCH_SIZE
|
| 228 |
+
global TRAIN_LEARNING_RATE
|
| 229 |
+
global TRAIN_HIDDEN_UNITS
|
| 230 |
+
global CODES
|
| 231 |
+
global LABELS
|
| 232 |
+
global TRANSLATED_LABELS
|
| 233 |
+
global SPECIES_LIST
|
| 234 |
+
global ERROR_LOG_FILE
|
| 235 |
+
|
| 236 |
+
RANDOM_SEED = c['RANDOM_SEED']
|
| 237 |
+
MODEL_PATH = c['MODEL_PATH']
|
| 238 |
+
MDATA_MODEL_PATH = c['MDATA_MODEL_PATH']
|
| 239 |
+
LABELS_FILE = c['LABELS_FILE']
|
| 240 |
+
CUSTOM_CLASSIFIER = c['CUSTOM_CLASSIFIER']
|
| 241 |
+
SAMPLE_RATE = c['SAMPLE_RATE']
|
| 242 |
+
SIG_LENGTH = c['SIG_LENGTH']
|
| 243 |
+
SIG_OVERLAP = c['SIG_OVERLAP']
|
| 244 |
+
SIG_MINLEN = c['SIG_MINLEN']
|
| 245 |
+
LATITUDE = c['LATITUDE']
|
| 246 |
+
LONGITUDE = c['LONGITUDE']
|
| 247 |
+
WEEK = c['WEEK']
|
| 248 |
+
LOCATION_FILTER_THRESHOLD = c['LOCATION_FILTER_THRESHOLD']
|
| 249 |
+
CODES_FILE = c['CODES_FILE']
|
| 250 |
+
SPECIES_LIST_FILE = c['SPECIES_LIST_FILE']
|
| 251 |
+
INPUT_PATH = c['INPUT_PATH']
|
| 252 |
+
OUTPUT_PATH = c['OUTPUT_PATH']
|
| 253 |
+
CPU_THREADS = c['CPU_THREADS']
|
| 254 |
+
TFLITE_THREADS = c['TFLITE_THREADS']
|
| 255 |
+
APPLY_SIGMOID = c['APPLY_SIGMOID']
|
| 256 |
+
SIGMOID_SENSITIVITY = c['SIGMOID_SENSITIVITY']
|
| 257 |
+
MIN_CONFIDENCE = c['MIN_CONFIDENCE']
|
| 258 |
+
BATCH_SIZE = c['BATCH_SIZE']
|
| 259 |
+
RESULT_TYPE = c['RESULT_TYPE']
|
| 260 |
+
TRAIN_DATA_PATH = c['TRAIN_DATA_PATH']
|
| 261 |
+
TRAIN_EPOCHS = c['TRAIN_EPOCHS']
|
| 262 |
+
TRAIN_BATCH_SIZE = c['TRAIN_BATCH_SIZE']
|
| 263 |
+
TRAIN_LEARNING_RATE = c['TRAIN_LEARNING_RATE']
|
| 264 |
+
TRAIN_HIDDEN_UNITS = c['TRAIN_HIDDEN_UNITS']
|
| 265 |
+
CODES = c['CODES']
|
| 266 |
+
LABELS = c['LABELS']
|
| 267 |
+
TRANSLATED_LABELS = c['TRANSLATED_LABELS']
|
| 268 |
+
SPECIES_LIST = c['SPECIES_LIST']
|
| 269 |
+
ERROR_LOG_FILE = c['ERROR_LOG_FILE']
|
model.py
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Contains functions to use the BirdNET models.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import warnings
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
import config as cfg
|
| 9 |
+
import utils
|
| 10 |
+
|
| 11 |
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
| 12 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
| 13 |
+
|
| 14 |
+
warnings.filterwarnings("ignore")
|
| 15 |
+
|
| 16 |
+
# Import TFLite from runtime or Tensorflow;
|
| 17 |
+
# import Keras if protobuf model;
|
| 18 |
+
# NOTE: we have to use TFLite if we want to use
|
| 19 |
+
# the metadata model or want to extract embeddings
|
| 20 |
+
try:
|
| 21 |
+
import tflite_runtime.interpreter as tflite
|
| 22 |
+
except ModuleNotFoundError:
|
| 23 |
+
from tensorflow import lite as tflite
|
| 24 |
+
if not cfg.MODEL_PATH.endswith(".tflite"):
|
| 25 |
+
from tensorflow import keras
|
| 26 |
+
|
| 27 |
+
INTERPRETER: tflite.Interpreter = None
|
| 28 |
+
C_INTERPRETER: tflite.Interpreter = None
|
| 29 |
+
M_INTERPRETER: tflite.Interpreter = None
|
| 30 |
+
PBMODEL = None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def loadModel(class_output=True):
|
| 34 |
+
"""Initializes the BirdNET Model.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
class_output: Omits the last layer when False.
|
| 38 |
+
"""
|
| 39 |
+
global PBMODEL
|
| 40 |
+
global INTERPRETER
|
| 41 |
+
global INPUT_LAYER_INDEX
|
| 42 |
+
global OUTPUT_LAYER_INDEX
|
| 43 |
+
|
| 44 |
+
# Do we have to load the tflite or protobuf model?
|
| 45 |
+
if cfg.MODEL_PATH.endswith(".tflite"):
|
| 46 |
+
# Load TFLite model and allocate tensors.
|
| 47 |
+
INTERPRETER = tflite.Interpreter(model_path=cfg.MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
|
| 48 |
+
INTERPRETER.allocate_tensors()
|
| 49 |
+
|
| 50 |
+
# Get input and output tensors.
|
| 51 |
+
input_details = INTERPRETER.get_input_details()
|
| 52 |
+
output_details = INTERPRETER.get_output_details()
|
| 53 |
+
|
| 54 |
+
# Get input tensor index
|
| 55 |
+
INPUT_LAYER_INDEX = input_details[0]["index"]
|
| 56 |
+
|
| 57 |
+
# Get classification output or feature embeddings
|
| 58 |
+
if class_output:
|
| 59 |
+
OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
| 60 |
+
else:
|
| 61 |
+
OUTPUT_LAYER_INDEX = output_details[0]["index"] - 1
|
| 62 |
+
|
| 63 |
+
else:
|
| 64 |
+
# Load protobuf model
|
| 65 |
+
# Note: This will throw a bunch of warnings about custom gradients
|
| 66 |
+
# which we will ignore until TF lets us block them
|
| 67 |
+
PBMODEL = keras.models.load_model(cfg.MODEL_PATH, compile=False)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def loadCustomClassifier():
|
| 71 |
+
"""Loads the custom classifier."""
|
| 72 |
+
global C_INTERPRETER
|
| 73 |
+
global C_INPUT_LAYER_INDEX
|
| 74 |
+
global C_OUTPUT_LAYER_INDEX
|
| 75 |
+
global C_INPUT_SIZE
|
| 76 |
+
|
| 77 |
+
# Load TFLite model and allocate tensors.
|
| 78 |
+
C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
|
| 79 |
+
C_INTERPRETER.allocate_tensors()
|
| 80 |
+
|
| 81 |
+
# Get input and output tensors.
|
| 82 |
+
input_details = C_INTERPRETER.get_input_details()
|
| 83 |
+
output_details = C_INTERPRETER.get_output_details()
|
| 84 |
+
|
| 85 |
+
# Get input tensor index
|
| 86 |
+
C_INPUT_LAYER_INDEX = input_details[0]["index"]
|
| 87 |
+
|
| 88 |
+
C_INPUT_SIZE = input_details[0]["shape"][-1]
|
| 89 |
+
|
| 90 |
+
# Get classification output
|
| 91 |
+
C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def loadMetaModel():
|
| 95 |
+
"""Loads the model for species prediction.
|
| 96 |
+
|
| 97 |
+
Initializes the model used to predict species list, based on coordinates and week of year.
|
| 98 |
+
"""
|
| 99 |
+
global M_INTERPRETER
|
| 100 |
+
global M_INPUT_LAYER_INDEX
|
| 101 |
+
global M_OUTPUT_LAYER_INDEX
|
| 102 |
+
|
| 103 |
+
# Load TFLite model and allocate tensors.
|
| 104 |
+
M_INTERPRETER = tflite.Interpreter(model_path=cfg.MDATA_MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
|
| 105 |
+
M_INTERPRETER.allocate_tensors()
|
| 106 |
+
|
| 107 |
+
# Get input and output tensors.
|
| 108 |
+
input_details = M_INTERPRETER.get_input_details()
|
| 109 |
+
output_details = M_INTERPRETER.get_output_details()
|
| 110 |
+
|
| 111 |
+
# Get input tensor index
|
| 112 |
+
M_INPUT_LAYER_INDEX = input_details[0]["index"]
|
| 113 |
+
M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def buildLinearClassifier(num_labels, input_size, hidden_units=0, dropout=0.0):
|
| 117 |
+
"""Builds a classifier.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
num_labels: Output size.
|
| 121 |
+
input_size: Size of the input.
|
| 122 |
+
hidden_units: If > 0, creates another hidden layer with the given number of units.
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
A new classifier.
|
| 126 |
+
"""
|
| 127 |
+
# import keras
|
| 128 |
+
from tensorflow import keras
|
| 129 |
+
|
| 130 |
+
# Build a simple one- or two-layer linear classifier
|
| 131 |
+
model = keras.Sequential()
|
| 132 |
+
|
| 133 |
+
# Input layer
|
| 134 |
+
model.add(keras.layers.InputLayer(input_shape=(input_size,)))
|
| 135 |
+
|
| 136 |
+
# Hidden layer
|
| 137 |
+
if hidden_units > 0:
|
| 138 |
+
# Dropout layer?
|
| 139 |
+
if dropout > 0:
|
| 140 |
+
model.add(keras.layers.Dropout(dropout))
|
| 141 |
+
model.add(keras.layers.Dense(hidden_units, activation="relu"))
|
| 142 |
+
|
| 143 |
+
# Dropout layer?
|
| 144 |
+
if dropout > 0:
|
| 145 |
+
model.add(keras.layers.Dropout(dropout))
|
| 146 |
+
|
| 147 |
+
# Classification layer
|
| 148 |
+
model.add(keras.layers.Dense(num_labels))
|
| 149 |
+
|
| 150 |
+
# Activation layer
|
| 151 |
+
model.add(keras.layers.Activation("sigmoid"))
|
| 152 |
+
|
| 153 |
+
return model
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def trainLinearClassifier(classifier,
|
| 157 |
+
x_train,
|
| 158 |
+
y_train,
|
| 159 |
+
epochs,
|
| 160 |
+
batch_size,
|
| 161 |
+
learning_rate,
|
| 162 |
+
val_split,
|
| 163 |
+
upsampling_ratio,
|
| 164 |
+
upsampling_mode,
|
| 165 |
+
train_with_mixup,
|
| 166 |
+
train_with_label_smoothing,
|
| 167 |
+
on_epoch_end=None):
|
| 168 |
+
"""Trains a custom classifier.
|
| 169 |
+
|
| 170 |
+
Trains a new classifier for BirdNET based on the given data.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
classifier: The classifier to be trained.
|
| 174 |
+
x_train: Samples.
|
| 175 |
+
y_train: Labels.
|
| 176 |
+
epochs: Number of epochs to train.
|
| 177 |
+
batch_size: Batch size.
|
| 178 |
+
learning_rate: The learning rate during training.
|
| 179 |
+
on_epoch_end: Optional callback `function(epoch, logs)`.
|
| 180 |
+
|
| 181 |
+
Returns:
|
| 182 |
+
(classifier, history)
|
| 183 |
+
"""
|
| 184 |
+
# import keras
|
| 185 |
+
from tensorflow import keras
|
| 186 |
+
|
| 187 |
+
class FunctionCallback(keras.callbacks.Callback):
|
| 188 |
+
def __init__(self, on_epoch_end=None) -> None:
|
| 189 |
+
super().__init__()
|
| 190 |
+
self.on_epoch_end_fn = on_epoch_end
|
| 191 |
+
|
| 192 |
+
def on_epoch_end(self, epoch, logs=None):
|
| 193 |
+
if self.on_epoch_end_fn:
|
| 194 |
+
self.on_epoch_end_fn(epoch, logs)
|
| 195 |
+
|
| 196 |
+
# Set random seed
|
| 197 |
+
np.random.seed(cfg.RANDOM_SEED)
|
| 198 |
+
|
| 199 |
+
# Shuffle data
|
| 200 |
+
idx = np.arange(x_train.shape[0])
|
| 201 |
+
np.random.shuffle(idx)
|
| 202 |
+
x_train = x_train[idx]
|
| 203 |
+
y_train = y_train[idx]
|
| 204 |
+
|
| 205 |
+
# Random val split
|
| 206 |
+
x_train, y_train, x_val, y_val = utils.random_split(x_train, y_train, val_split)
|
| 207 |
+
print(f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.", flush=True)
|
| 208 |
+
|
| 209 |
+
# Upsample training data
|
| 210 |
+
if upsampling_ratio > 0:
|
| 211 |
+
x_train, y_train = utils.upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
|
| 212 |
+
print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
|
| 213 |
+
|
| 214 |
+
# Apply mixup to training data
|
| 215 |
+
if train_with_mixup:
|
| 216 |
+
x_train, y_train = utils.mixup(x_train, y_train)
|
| 217 |
+
|
| 218 |
+
# Apply label smoothing
|
| 219 |
+
if train_with_label_smoothing:
|
| 220 |
+
y_train = utils.label_smoothing(y_train)
|
| 221 |
+
|
| 222 |
+
# Early stopping
|
| 223 |
+
callbacks = [
|
| 224 |
+
keras.callbacks.EarlyStopping(
|
| 225 |
+
monitor="val_loss", patience=5, verbose=1, start_from_epoch=epochs // 4, restore_best_weights=True
|
| 226 |
+
),
|
| 227 |
+
FunctionCallback(on_epoch_end=on_epoch_end),
|
| 228 |
+
]
|
| 229 |
+
|
| 230 |
+
# Cosine annealing lr schedule
|
| 231 |
+
lr_schedule = keras.experimental.CosineDecay(learning_rate, epochs * x_train.shape[0] / batch_size)
|
| 232 |
+
|
| 233 |
+
# Compile model
|
| 234 |
+
classifier.compile(
|
| 235 |
+
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
|
| 236 |
+
loss="binary_crossentropy",
|
| 237 |
+
metrics=[keras.metrics.AUC(curve="PR", multi_label=False, name="AUPRC")],
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
# Train model
|
| 241 |
+
history = classifier.fit(
|
| 242 |
+
x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
return classifier, history
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def saveLinearClassifier(classifier, model_path, labels):
|
| 249 |
+
"""Saves a custom classifier on the hard drive.
|
| 250 |
+
|
| 251 |
+
Saves the classifier as a tflite model, as well as the used labels in a .txt.
|
| 252 |
+
|
| 253 |
+
Args:
|
| 254 |
+
classifier: The custom classifier.
|
| 255 |
+
model_path: Path the model will be saved at.
|
| 256 |
+
labels: List of labels used for the classifier.
|
| 257 |
+
"""
|
| 258 |
+
import tensorflow as tf
|
| 259 |
+
|
| 260 |
+
saved_model = PBMODEL if PBMODEL else tf.keras.models.load_model(cfg.PB_MODEL, compile=False)
|
| 261 |
+
|
| 262 |
+
# Remove activation layer
|
| 263 |
+
classifier.pop()
|
| 264 |
+
|
| 265 |
+
combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
|
| 266 |
+
|
| 267 |
+
# Append .tflite if necessary
|
| 268 |
+
if not model_path.endswith(".tflite"):
|
| 269 |
+
model_path += ".tflite"
|
| 270 |
+
|
| 271 |
+
# Make folders
|
| 272 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
| 273 |
+
|
| 274 |
+
# Save model as tflite
|
| 275 |
+
converter = tflite.TFLiteConverter.from_keras_model(combined_model)
|
| 276 |
+
tflite_model = converter.convert()
|
| 277 |
+
open(model_path, "wb").write(tflite_model)
|
| 278 |
+
|
| 279 |
+
# Save labels
|
| 280 |
+
with open(model_path.replace(".tflite", "_Labels.txt"), "w") as f:
|
| 281 |
+
for label in labels:
|
| 282 |
+
f.write(label + "\n")
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def save_raven_model(classifier, model_path, labels):
|
| 286 |
+
import tensorflow as tf
|
| 287 |
+
import csv
|
| 288 |
+
import json
|
| 289 |
+
|
| 290 |
+
saved_model = PBMODEL if PBMODEL else tf.keras.models.load_model(cfg.PB_MODEL, compile=False)
|
| 291 |
+
combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
|
| 292 |
+
|
| 293 |
+
# Make signatures
|
| 294 |
+
class SignatureModule(tf.Module):
|
| 295 |
+
def __init__(self, keras_model):
|
| 296 |
+
super().__init__()
|
| 297 |
+
self.model = keras_model
|
| 298 |
+
|
| 299 |
+
@tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
|
| 300 |
+
def basic(self, inputs):
|
| 301 |
+
return {"scores": self.model(inputs)}
|
| 302 |
+
|
| 303 |
+
smodel = SignatureModule(combined_model)
|
| 304 |
+
signatures = {
|
| 305 |
+
"basic": smodel.basic,
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
# Save signature model
|
| 309 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
| 310 |
+
model_path = model_path[:-7] if model_path.endswith(".tflite") else model_path
|
| 311 |
+
tf.saved_model.save(smodel, model_path, signatures=signatures)
|
| 312 |
+
|
| 313 |
+
# Save label file
|
| 314 |
+
labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
|
| 315 |
+
labels_dir = os.path.join(model_path, "labels")
|
| 316 |
+
|
| 317 |
+
os.makedirs(labels_dir, exist_ok=True)
|
| 318 |
+
|
| 319 |
+
with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
|
| 320 |
+
labelwriter = csv.writer(labelsfile)
|
| 321 |
+
labelwriter.writerows(zip(labelIds, labels))
|
| 322 |
+
|
| 323 |
+
# Save class names file
|
| 324 |
+
classes_dir = os.path.join(model_path, "classes")
|
| 325 |
+
|
| 326 |
+
os.makedirs(classes_dir, exist_ok=True)
|
| 327 |
+
|
| 328 |
+
with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
|
| 329 |
+
classeswriter = csv.writer(classesfile)
|
| 330 |
+
for labelId in labelIds:
|
| 331 |
+
classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
|
| 332 |
+
|
| 333 |
+
# Save model config
|
| 334 |
+
model_config = os.path.join(model_path, "model_config.json")
|
| 335 |
+
with open(model_config, "w") as modelconfigfile:
|
| 336 |
+
modelconfig = {
|
| 337 |
+
"specVersion": 1,
|
| 338 |
+
"modelDescription": "Custom classifier trained with BirdNET "
|
| 339 |
+
+ cfg.MODEL_VESION
|
| 340 |
+
+ " embeddings.\nBirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\nhttps://birdnet.cornell.edu",
|
| 341 |
+
"modelTypeConfig": {"modelType": "RECOGNITION"},
|
| 342 |
+
"signatures": [
|
| 343 |
+
{
|
| 344 |
+
"signatureName": "basic",
|
| 345 |
+
"modelInputs": [{"inputName": "inputs", "sampleRate": 48000.0, "inputConfig": ["batch", "samples"]}],
|
| 346 |
+
"modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
|
| 347 |
+
}
|
| 348 |
+
],
|
| 349 |
+
"globalSemanticKeys": labelIds,
|
| 350 |
+
}
|
| 351 |
+
json.dump(modelconfig, modelconfigfile, indent=2)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def predictFilter(lat, lon, week):
|
| 355 |
+
"""Predicts the probability for each species.
|
| 356 |
+
|
| 357 |
+
Args:
|
| 358 |
+
lat: The latitude.
|
| 359 |
+
lon: The longitude.
|
| 360 |
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
| 361 |
+
|
| 362 |
+
Returns:
|
| 363 |
+
A list of probabilities for all species.
|
| 364 |
+
"""
|
| 365 |
+
global M_INTERPRETER
|
| 366 |
+
|
| 367 |
+
# Does interpreter exist?
|
| 368 |
+
if M_INTERPRETER == None:
|
| 369 |
+
loadMetaModel()
|
| 370 |
+
|
| 371 |
+
# Prepare mdata as sample
|
| 372 |
+
sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
|
| 373 |
+
|
| 374 |
+
# Run inference
|
| 375 |
+
M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
|
| 376 |
+
M_INTERPRETER.invoke()
|
| 377 |
+
|
| 378 |
+
return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def explore(lat: float, lon: float, week: int):
|
| 382 |
+
"""Predicts the species list.
|
| 383 |
+
|
| 384 |
+
Predicts the species list based on the coordinates and week of year.
|
| 385 |
+
|
| 386 |
+
Args:
|
| 387 |
+
lat: The latitude.
|
| 388 |
+
lon: The longitude.
|
| 389 |
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
| 390 |
+
|
| 391 |
+
Returns:
|
| 392 |
+
A sorted list of tuples with the score and the species.
|
| 393 |
+
"""
|
| 394 |
+
# Make filter prediction
|
| 395 |
+
l_filter = predictFilter(lat, lon, week)
|
| 396 |
+
|
| 397 |
+
# Apply threshold
|
| 398 |
+
l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
|
| 399 |
+
|
| 400 |
+
# Zip with labels
|
| 401 |
+
l_filter = list(zip(l_filter, cfg.LABELS))
|
| 402 |
+
|
| 403 |
+
# Sort by filter value
|
| 404 |
+
l_filter = sorted(l_filter, key=lambda x: x[0], reverse=True)
|
| 405 |
+
|
| 406 |
+
return l_filter
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def flat_sigmoid(x, sensitivity=-1):
|
| 410 |
+
return 1 / (1.0 + np.exp(sensitivity * np.clip(x, -15, 15)))
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def predict(sample):
|
| 414 |
+
"""Uses the main net to predict a sample.
|
| 415 |
+
|
| 416 |
+
Args:
|
| 417 |
+
sample: Audio sample.
|
| 418 |
+
|
| 419 |
+
Returns:
|
| 420 |
+
The prediction scores for the sample.
|
| 421 |
+
"""
|
| 422 |
+
# Has custom classifier?
|
| 423 |
+
if cfg.CUSTOM_CLASSIFIER != None:
|
| 424 |
+
return predictWithCustomClassifier(sample)
|
| 425 |
+
|
| 426 |
+
global INTERPRETER
|
| 427 |
+
|
| 428 |
+
# Does interpreter or keras model exist?
|
| 429 |
+
if INTERPRETER == None and PBMODEL == None:
|
| 430 |
+
loadModel()
|
| 431 |
+
|
| 432 |
+
if PBMODEL == None:
|
| 433 |
+
# Reshape input tensor
|
| 434 |
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
| 435 |
+
INTERPRETER.allocate_tensors()
|
| 436 |
+
|
| 437 |
+
# Make a prediction (Audio only for now)
|
| 438 |
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
| 439 |
+
INTERPRETER.invoke()
|
| 440 |
+
prediction = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
| 441 |
+
|
| 442 |
+
return prediction
|
| 443 |
+
|
| 444 |
+
else:
|
| 445 |
+
# Make a prediction (Audio only for now)
|
| 446 |
+
prediction = PBMODEL.embeddings_model.predict(sample)
|
| 447 |
+
|
| 448 |
+
return prediction
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def predictWithCustomClassifier(sample):
|
| 452 |
+
"""Uses the custom classifier to make a prediction.
|
| 453 |
+
|
| 454 |
+
Args:
|
| 455 |
+
sample: Audio sample.
|
| 456 |
+
|
| 457 |
+
Returns:
|
| 458 |
+
The prediction scores for the sample.
|
| 459 |
+
"""
|
| 460 |
+
global C_INTERPRETER
|
| 461 |
+
global C_INPUT_SIZE
|
| 462 |
+
|
| 463 |
+
# Does interpreter exist?
|
| 464 |
+
if C_INTERPRETER == None:
|
| 465 |
+
loadCustomClassifier()
|
| 466 |
+
|
| 467 |
+
vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
|
| 468 |
+
|
| 469 |
+
# Reshape input tensor
|
| 470 |
+
C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
|
| 471 |
+
C_INTERPRETER.allocate_tensors()
|
| 472 |
+
|
| 473 |
+
# Make a prediction
|
| 474 |
+
C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
|
| 475 |
+
C_INTERPRETER.invoke()
|
| 476 |
+
prediction = C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
|
| 477 |
+
|
| 478 |
+
return prediction
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
def embeddings(sample):
|
| 482 |
+
"""Extracts the embeddings for a sample.
|
| 483 |
+
|
| 484 |
+
Args:
|
| 485 |
+
sample: Audio samples.
|
| 486 |
+
|
| 487 |
+
Returns:
|
| 488 |
+
The embeddings.
|
| 489 |
+
"""
|
| 490 |
+
global INTERPRETER
|
| 491 |
+
|
| 492 |
+
# Does interpreter exist?
|
| 493 |
+
if INTERPRETER == None:
|
| 494 |
+
loadModel(False)
|
| 495 |
+
|
| 496 |
+
# Reshape input tensor
|
| 497 |
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
| 498 |
+
INTERPRETER.allocate_tensors()
|
| 499 |
+
|
| 500 |
+
# Extract feature embeddings
|
| 501 |
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
| 502 |
+
INTERPRETER.invoke()
|
| 503 |
+
features = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
| 504 |
+
|
| 505 |
+
return features
|
utils.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module containing common function.
|
| 2 |
+
"""
|
| 3 |
+
import os
|
| 4 |
+
import traceback
|
| 5 |
+
import numpy as np
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import config as cfg
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def collect_audio_files(path: str):
|
| 12 |
+
"""Collects all audio files in the given directory.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
path: The directory to be searched.
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
A sorted list of all audio files in the directory.
|
| 19 |
+
"""
|
| 20 |
+
# Get all files in directory with os.walk
|
| 21 |
+
files = []
|
| 22 |
+
|
| 23 |
+
for root, _, flist in os.walk(path):
|
| 24 |
+
for f in flist:
|
| 25 |
+
if not f.startswith(".") and f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES:
|
| 26 |
+
files.append(os.path.join(root, f))
|
| 27 |
+
|
| 28 |
+
return sorted(files)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def readLines(path: str):
|
| 32 |
+
"""Reads the lines into a list.
|
| 33 |
+
|
| 34 |
+
Opens the file and reads its contents into a list.
|
| 35 |
+
It is expected to have one line for each species or label.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
path: Absolute path to the species file.
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
A list of all species inside the file.
|
| 42 |
+
"""
|
| 43 |
+
return Path(path).read_text(encoding="utf-8").splitlines() if path else []
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def list_subdirectories(path: str):
|
| 47 |
+
"""Lists all directories inside a path.
|
| 48 |
+
|
| 49 |
+
Retrieves all the subdirectories in a given path without recursion.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
path: Directory to be searched.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
A filter sequence containing the absolute paths to all directories.
|
| 56 |
+
"""
|
| 57 |
+
return filter(lambda el: os.path.isdir(os.path.join(path, el)), os.listdir(path))
|
| 58 |
+
|
| 59 |
+
def random_split(x, y, val_ratio=0.2):
|
| 60 |
+
"""Splits the data into training and validation data.
|
| 61 |
+
|
| 62 |
+
Makes sure that each class is represented in both sets.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
x: Samples.
|
| 66 |
+
y: One-hot labels.
|
| 67 |
+
val_ratio: The ratio of validation data.
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
A tuple of (x_train, y_train, x_val, y_val).
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
# Set numpy random seed
|
| 74 |
+
np.random.seed(cfg.RANDOM_SEED)
|
| 75 |
+
|
| 76 |
+
# Get number of classes
|
| 77 |
+
num_classes = y.shape[1]
|
| 78 |
+
|
| 79 |
+
# Initialize training and validation data
|
| 80 |
+
x_train, y_train, x_val, y_val = [], [], [], []
|
| 81 |
+
|
| 82 |
+
# Split data
|
| 83 |
+
for i in range(num_classes):
|
| 84 |
+
|
| 85 |
+
# Get indices of current class
|
| 86 |
+
indices = np.where(y[:, i] == 1)[0]
|
| 87 |
+
|
| 88 |
+
# Get number of samples for each set
|
| 89 |
+
num_samples = len(indices)
|
| 90 |
+
num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
|
| 91 |
+
num_samples_val = max(0, num_samples - num_samples_train)
|
| 92 |
+
|
| 93 |
+
# Randomly choose samples for training and validation
|
| 94 |
+
np.random.shuffle(indices)
|
| 95 |
+
train_indices = indices[:num_samples_train]
|
| 96 |
+
val_indices = indices[num_samples_train:num_samples_train + num_samples_val]
|
| 97 |
+
|
| 98 |
+
# Append samples to training and validation data
|
| 99 |
+
x_train.append(x[train_indices])
|
| 100 |
+
y_train.append(y[train_indices])
|
| 101 |
+
x_val.append(x[val_indices])
|
| 102 |
+
y_val.append(y[val_indices])
|
| 103 |
+
|
| 104 |
+
# Concatenate data
|
| 105 |
+
x_train = np.concatenate(x_train)
|
| 106 |
+
y_train = np.concatenate(y_train)
|
| 107 |
+
x_val = np.concatenate(x_val)
|
| 108 |
+
y_val = np.concatenate(y_val)
|
| 109 |
+
|
| 110 |
+
# Shuffle data
|
| 111 |
+
indices = np.arange(len(x_train))
|
| 112 |
+
np.random.shuffle(indices)
|
| 113 |
+
x_train = x_train[indices]
|
| 114 |
+
y_train = y_train[indices]
|
| 115 |
+
|
| 116 |
+
indices = np.arange(len(x_val))
|
| 117 |
+
np.random.shuffle(indices)
|
| 118 |
+
x_val = x_val[indices]
|
| 119 |
+
y_val = y_val[indices]
|
| 120 |
+
|
| 121 |
+
return x_train, y_train, x_val, y_val
|
| 122 |
+
|
| 123 |
+
def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
|
| 124 |
+
"""Apply mixup to the given data.
|
| 125 |
+
|
| 126 |
+
Mixup is a data augmentation technique that generates new samples by
|
| 127 |
+
mixing two samples and their labels.
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
x: Samples.
|
| 131 |
+
y: One-hot labels.
|
| 132 |
+
augmentation_ratio: The ratio of augmented samples.
|
| 133 |
+
alpha: The beta distribution parameter.
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
Augmented data.
|
| 137 |
+
"""
|
| 138 |
+
|
| 139 |
+
# Calculate the number of samples to augment based on the ratio
|
| 140 |
+
num_samples_to_augment = int(len(x) * augmentation_ratio)
|
| 141 |
+
|
| 142 |
+
for _ in range(num_samples_to_augment):
|
| 143 |
+
|
| 144 |
+
# Randomly choose one instance from the dataset
|
| 145 |
+
index = np.random.choice(len(x))
|
| 146 |
+
x1, y1 = x[index], y[index]
|
| 147 |
+
|
| 148 |
+
# Randomly choose a different instance from the dataset
|
| 149 |
+
second_index = np.random.choice(len(x))
|
| 150 |
+
while second_index == index:
|
| 151 |
+
second_index = np.random.choice(len(x))
|
| 152 |
+
x2, y2 = x[second_index], y[second_index]
|
| 153 |
+
|
| 154 |
+
# Generate a random mixing coefficient (lambda)
|
| 155 |
+
lambda_ = np.random.beta(alpha, alpha)
|
| 156 |
+
|
| 157 |
+
# Mix the embeddings and labels
|
| 158 |
+
mixed_x = lambda_ * x1 + (1 - lambda_) * x2
|
| 159 |
+
mixed_y = lambda_ * y1 + (1 - lambda_) * y2
|
| 160 |
+
|
| 161 |
+
# Replace one of the original samples and labels with the augmented sample and labels
|
| 162 |
+
x[index] = mixed_x
|
| 163 |
+
y[index] = mixed_y
|
| 164 |
+
|
| 165 |
+
return x, y
|
| 166 |
+
|
| 167 |
+
def label_smoothing(y, alpha=0.1):
|
| 168 |
+
|
| 169 |
+
# Subtract alpha from correct label when it is >0
|
| 170 |
+
y[y > 0] -= alpha
|
| 171 |
+
|
| 172 |
+
# Assigned alpha to all other labels
|
| 173 |
+
y[y == 0] = alpha / y.shape[0]
|
| 174 |
+
|
| 175 |
+
return y
|
| 176 |
+
|
| 177 |
+
def upsampling(x, y, ratio=0.5, mode="repeat"):
|
| 178 |
+
"""Balance data through upsampling.
|
| 179 |
+
|
| 180 |
+
We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
|
| 181 |
+
|
| 182 |
+
Args:
|
| 183 |
+
x: Samples.
|
| 184 |
+
y: One-hot labels.
|
| 185 |
+
ratio: The minimum ratio of minority to majority samples.
|
| 186 |
+
mode: The upsampling mode. Either 'repeat', 'mean' or 'smote'.
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
Upsampled data.
|
| 190 |
+
"""
|
| 191 |
+
|
| 192 |
+
# Set numpy random seed
|
| 193 |
+
np.random.seed(cfg.RANDOM_SEED)
|
| 194 |
+
|
| 195 |
+
# Determin min number of samples
|
| 196 |
+
min_samples = int(np.max(y.sum(axis=0)) * ratio)
|
| 197 |
+
|
| 198 |
+
x_temp = []
|
| 199 |
+
y_temp = []
|
| 200 |
+
if mode == 'repeat':
|
| 201 |
+
|
| 202 |
+
# For each class with less than min_samples ranomdly repeat samples
|
| 203 |
+
for i in range(y.shape[1]):
|
| 204 |
+
|
| 205 |
+
while y[:, i].sum() + len(y_temp) < min_samples:
|
| 206 |
+
|
| 207 |
+
# Randomly choose a sample from the minority class
|
| 208 |
+
random_index = np.random.choice(np.where(y[:, i] == 1)[0])
|
| 209 |
+
|
| 210 |
+
# Append the sample and label to a temp list
|
| 211 |
+
x_temp.append(x[random_index])
|
| 212 |
+
y_temp.append(y[random_index])
|
| 213 |
+
|
| 214 |
+
elif mode == 'mean':
|
| 215 |
+
|
| 216 |
+
# For each class with less than min_samples
|
| 217 |
+
# select two random samples and calculate the mean
|
| 218 |
+
for i in range(y.shape[1]):
|
| 219 |
+
|
| 220 |
+
x_temp = []
|
| 221 |
+
y_temp = []
|
| 222 |
+
while y[:, i].sum() + len(y_temp) < min_samples:
|
| 223 |
+
|
| 224 |
+
# Randomly choose two samples from the minority class
|
| 225 |
+
random_indices = np.random.choice(np.where(y[:, i] == 1)[0], 2)
|
| 226 |
+
|
| 227 |
+
# Calculate the mean of the two samples
|
| 228 |
+
mean = np.mean(x[random_indices], axis=0)
|
| 229 |
+
|
| 230 |
+
# Append the mean and label to a temp list
|
| 231 |
+
x_temp.append(mean)
|
| 232 |
+
y_temp.append(y[random_indices[0]])
|
| 233 |
+
|
| 234 |
+
elif mode == 'smote':
|
| 235 |
+
|
| 236 |
+
# For each class with less than min_samples apply SMOTE
|
| 237 |
+
for i in range(y.shape[1]):
|
| 238 |
+
|
| 239 |
+
x_temp = []
|
| 240 |
+
y_temp = []
|
| 241 |
+
while y[:, i].sum() + len(y_temp) < min_samples:
|
| 242 |
+
|
| 243 |
+
# Randomly choose a sample from the minority class
|
| 244 |
+
random_index = np.random.choice(np.where(y[:, i] == 1)[0])
|
| 245 |
+
|
| 246 |
+
# Get the k nearest neighbors
|
| 247 |
+
k = 5
|
| 248 |
+
distances = np.sqrt(np.sum((x - x[random_index])**2, axis=1))
|
| 249 |
+
indices = np.argsort(distances)[1:k+1]
|
| 250 |
+
|
| 251 |
+
# Randomly choose one of the neighbors
|
| 252 |
+
random_neighbor = np.random.choice(indices)
|
| 253 |
+
|
| 254 |
+
# Calculate the difference vector
|
| 255 |
+
diff = x[random_neighbor] - x[random_index]
|
| 256 |
+
|
| 257 |
+
# Randomly choose a weight between 0 and 1
|
| 258 |
+
weight = np.random.uniform(0, 1)
|
| 259 |
+
|
| 260 |
+
# Calculate the new sample
|
| 261 |
+
new_sample = x[random_index] + weight * diff
|
| 262 |
+
|
| 263 |
+
# Append the new sample and label to a temp list
|
| 264 |
+
x_temp.append(new_sample)
|
| 265 |
+
y_temp.append(y[random_index])
|
| 266 |
+
|
| 267 |
+
# Append the temp list to the original data
|
| 268 |
+
if len(x_temp) > 0:
|
| 269 |
+
x = np.vstack((x, np.array(x_temp)))
|
| 270 |
+
y = np.vstack((y, np.array(y_temp)))
|
| 271 |
+
|
| 272 |
+
# Shuffle data
|
| 273 |
+
indices = np.arange(len(x))
|
| 274 |
+
np.random.shuffle(indices)
|
| 275 |
+
x = x[indices]
|
| 276 |
+
y = y[indices]
|
| 277 |
+
|
| 278 |
+
return x, y
|
| 279 |
+
|
| 280 |
+
def saveToCache(cache_file: str, x_train: np.ndarray, y_train: np.ndarray, labels: list[str]):
|
| 281 |
+
"""Saves the training data to a cache file.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
cache_file: The path to the cache file.
|
| 285 |
+
x_train: The training samples.
|
| 286 |
+
y_train: The training labels.
|
| 287 |
+
labels: The list of labels.
|
| 288 |
+
"""
|
| 289 |
+
# Create cache directory
|
| 290 |
+
os.makedirs(os.path.dirname(cache_file), exist_ok=True)
|
| 291 |
+
|
| 292 |
+
# Save to cache
|
| 293 |
+
np.savez_compressed(cache_file, x_train=x_train, y_train=y_train, labels=labels)
|
| 294 |
+
|
| 295 |
+
def loadFromCache(cache_file: str):
|
| 296 |
+
"""Loads the training data from a cache file.
|
| 297 |
+
|
| 298 |
+
Args:
|
| 299 |
+
cache_file: The path to the cache file.
|
| 300 |
+
|
| 301 |
+
Returns:
|
| 302 |
+
A tuple of (x_train, y_train, labels).
|
| 303 |
+
|
| 304 |
+
"""
|
| 305 |
+
# Load from cache
|
| 306 |
+
cache = np.load(cache_file, allow_pickle=True)
|
| 307 |
+
|
| 308 |
+
# Get data
|
| 309 |
+
x_train = cache["x_train"]
|
| 310 |
+
y_train = cache["y_train"]
|
| 311 |
+
labels = cache["labels"]
|
| 312 |
+
|
| 313 |
+
return x_train, y_train, labels
|
| 314 |
+
|
| 315 |
+
def clearErrorLog():
|
| 316 |
+
"""Clears the error log file.
|
| 317 |
+
|
| 318 |
+
For debugging purposes.
|
| 319 |
+
"""
|
| 320 |
+
if os.path.isfile(cfg.ERROR_LOG_FILE):
|
| 321 |
+
os.remove(cfg.ERROR_LOG_FILE)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def writeErrorLog(ex: Exception):
|
| 325 |
+
"""Writes an exception to the error log.
|
| 326 |
+
|
| 327 |
+
Formats the stacktrace and writes it in the error log file configured in the config.
|
| 328 |
+
|
| 329 |
+
Args:
|
| 330 |
+
ex: An exception that occurred.
|
| 331 |
+
"""
|
| 332 |
+
with open(cfg.ERROR_LOG_FILE, "a") as elog:
|
| 333 |
+
elog.write("".join(traceback.TracebackException.from_exception(ex).format()) + "\n")
|
| 334 |
+
|
| 335 |
+
|