Spaces:

BilalSardar
/

BirdNet

Sleeping

App Files Files Community

BilalSardar commited on Oct 21, 2023

Commit

bab1cc1

1 Parent(s): 13cbb3a

Upload 4 files

Browse files

Files changed (4) hide show

audio.py +123 -0
config.py +269 -0
model.py +505 -0
utils.py +335 -0

audio.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""Module containing audio helper functions.
+"""
+import numpy as np
+import config as cfg
+RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
+def openAudioFile(path: str, sample_rate=48000, offset=0.0, duration=None):
+    """Open an audio file.
+    Opens an audio file with librosa and the given settings.
+    Args:
+        path: Path to the audio file.
+        sample_rate: The sample rate at which the file should be processed.
+        offset: The starting offset.
+        duration: Maximum duration of the loaded content.
+    Returns:
+        Returns the audio time series and the sampling rate.
+    """
+    # Open file with librosa (uses ffmpeg or libav)
+    import librosa
+    sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast")
+    return sig, rate
+def get_sample_rate(path: str):
+    import librosa
+    return librosa.get_samplerate(path)
+def saveSignal(sig, fname: str):
+    """Saves a signal to file.
+    Args:
+        sig: The signal to be saved.
+        fname: The file path.
+    """
+    import soundfile as sf
+    sf.write(fname, sig, 48000, "PCM_16")
+def noise(sig, shape, amount=None):
+    """Creates noise.
+    Creates a noise vector with the given shape.
+    Args:
+        sig: The original audio signal.
+        shape: Shape of the noise.
+        amount: The noise intensity.
+    Returns:
+        An numpy array of noise with the given shape.
+    """
+    # Random noise intensity
+    if amount == None:
+        amount = RANDOM.uniform(0.1, 0.5)
+    # Create Gaussian noise
+    try:
+        noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape)
+    except:
+        noise = np.zeros(shape)
+    return noise.astype("float32")
+def splitSignal(sig, rate, seconds, overlap, minlen):
+    """Split signal with overlap.
+    Args:
+        sig: The original signal to be split.
+        rate: The sampling rate.
+        seconds: The duration of a segment.
+        overlap: The overlapping seconds of segments.
+        minlen: Minimum length of a split.
+    Returns:
+        A list of splits.
+    """
+    sig_splits = []
+    for i in range(0, len(sig), int((seconds - overlap) * rate)):
+        split = sig[i : i + int(seconds * rate)]
+        # End of signal?
+        if len(split) < int(minlen * rate) and len(sig_splits) > 0:
+            break
+        # Signal chunk too short?
+        if len(split) < int(rate * seconds):
+            split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5)))
+        sig_splits.append(split)
+    return sig_splits
+def cropCenter(sig, rate, seconds):
+    """Crop signal to center.
+    Args:
+        sig: The original signal.
+        rate: The sampling rate.
+        seconds: The length of the signal.
+    """
+    if len(sig) > int(seconds * rate):
+        start = int((len(sig) - int(seconds * rate)) / 2)
+        end = start + int(seconds * rate)
+        sig = sig[start:end]
+    # Pad with noise
+    elif len(sig) < int(seconds * rate):
+        sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5)))
+    return sig

config.py ADDED Viewed

	@@ -0,0 +1,269 @@

+#################
+# Misc settings #
+#################
+# Random seed for gaussian noise
+RANDOM_SEED = 42
+##########################
+# Model paths and config #
+##########################
+MODEL_VESION = 'V2.4'
+PB_MODEL = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model'
+# MODEL_PATH = PB_MODEL # This will load the protobuf model
+MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite'
+MDATA_MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_MData_Model_FP16.tflite'
+LABELS_FILE = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels.txt'
+TRANSLATED_LABELS_PATH = 'labels/V2.4'
+# Path to custom trained classifier
+# If None, no custom classifier will be used
+# Make sure to set the LABELS_FILE above accordingly
+CUSTOM_CLASSIFIER = None
+##################
+# Audio settings #
+##################
+# We use a sample rate of 48kHz, so the model input size is
+# (batch size, 48000 kHz * 3 seconds) = (1, 144000)
+# Recordings will be resampled automatically.
+SAMPLE_RATE: int = 48000
+# We're using 3-second chunks
+SIG_LENGTH: float = 3.0
+# Define overlap between consecutive chunks <3.0; 0 = no overlap
+SIG_OVERLAP: float = 0
+# Define minimum length of audio chunk for prediction,
+# chunks shorter than 3 seconds will be padded with zeros
+SIG_MINLEN: float = 1.0
+# Frequency range. This is model specific and should not be changed.
+SIG_FMIN = 0
+SIG_FMAX = 15000
+#####################
+# Metadata settings #
+#####################
+LATITUDE = -1
+LONGITUDE = -1
+WEEK = -1
+LOCATION_FILTER_THRESHOLD = 0.03
+######################
+# Inference settings #
+######################
+# If None or empty file, no custom species list will be used
+# Note: Entries in this list have to match entries from the LABELS_FILE
+# We use the 2021 eBird taxonomy for species names (Clements list)
+CODES_FILE = 'eBird_taxonomy_codes_2021E.json'
+SPECIES_LIST_FILE = 'example/species_list.txt'
+# File input path and output path for selection tables
+INPUT_PATH: str = 'example/'
+OUTPUT_PATH: str = 'example/'
+ALLOWED_FILETYPES = ['wav', 'flac', 'mp3', 'ogg', 'm4a']
+# Number of threads to use for inference.
+# Can be as high as number of CPUs in your system
+CPU_THREADS: int = 8
+TFLITE_THREADS: int = 1
+# False will output logits, True will convert to sigmoid activations
+APPLY_SIGMOID: bool = True
+SIGMOID_SENSITIVITY: float = 1.0
+# Minimum confidence score to include in selection table
+# (be aware: if APPLY_SIGMOID = False, this no longer represents
+# probabilities and needs to be adjusted)
+MIN_CONFIDENCE: float = 0.1
+# Number of samples to process at the same time. Higher values can increase
+# processing speed, but will also increase memory usage.
+# Might only be useful for GPU inference.
+BATCH_SIZE: int = 1
+# Specifies the output format. 'table' denotes a Raven selection table,
+# 'audacity' denotes a TXT file with the same format as Audacity timeline labels
+# 'csv' denotes a CSV file with start, end, species and confidence.
+RESULT_TYPE = 'table'
+#####################
+# Training settings #
+#####################
+# Training data path
+TRAIN_DATA_PATH = 'train_data/'
+# Sample crop mode
+SAMPLE_CROP_MODE = 'center'
+# List of non-event classes
+NON_EVENT_CLASSES = ["noise", "other", "background", "silence"]
+# Upsampling settings
+UPSAMPLING_RATIO = 0.0
+UPSAMPLING_MODE = 'repeat'
+# Number of epochs to train for
+TRAIN_EPOCHS: int = 100
+# Batch size for training
+TRAIN_BATCH_SIZE: int = 32
+# Validation split (percentage)
+TRAIN_VAL_SPLIT: float = 0.2
+# Learning rate for training
+TRAIN_LEARNING_RATE: float = 0.01
+# Number of hidden units in custom classifier
+# If >0, a two-layer classifier will be trained
+TRAIN_HIDDEN_UNITS: int = 0
+# Dropout rate for training
+TRAIN_DROPOUT: float = 0.0
+# Whether to use mixup for training
+TRAIN_WITH_MIXUP: bool = False
+# Whether to apply label smoothing for training
+TRAIN_WITH_LABEL_SMOOTHING: bool = False
+# Model output format
+TRAINED_MODEL_OUTPUT_FORMAT = 'tflite'
+# Cache settings
+TRAIN_CACHE_MODE = 'none'
+TRAIN_CACHE_FILE = 'train_cache.npz'
+#####################
+# Misc runtime vars #
+#####################
+CODES = {}
+LABELS: list[str] = []
+TRANSLATED_LABELS: list[str] = []
+SPECIES_LIST: list[str] = []
+ERROR_LOG_FILE: str = 'error_log.txt'
+FILE_LIST = []
+FILE_STORAGE_PATH = ''
+######################
+# Get and set config #
+######################
+def getConfig():
+    return {
+        'RANDOM_SEED': RANDOM_SEED,
+        'MODEL_PATH': MODEL_PATH,
+        'MDATA_MODEL_PATH': MDATA_MODEL_PATH,
+        'LABELS_FILE': LABELS_FILE,
+        'CUSTOM_CLASSIFIER': CUSTOM_CLASSIFIER,
+        'SAMPLE_RATE': SAMPLE_RATE,
+        'SIG_LENGTH': SIG_LENGTH,
+        'SIG_OVERLAP': SIG_OVERLAP,
+        'SIG_MINLEN': SIG_MINLEN,
+        'LATITUDE': LATITUDE,
+        'LONGITUDE': LONGITUDE,
+        'WEEK': WEEK,
+        'LOCATION_FILTER_THRESHOLD': LOCATION_FILTER_THRESHOLD,
+        'CODES_FILE': CODES_FILE,
+        'SPECIES_LIST_FILE': SPECIES_LIST_FILE,
+        'INPUT_PATH': INPUT_PATH,
+        'OUTPUT_PATH': OUTPUT_PATH,
+        'CPU_THREADS': CPU_THREADS,
+        'TFLITE_THREADS': TFLITE_THREADS,
+        'APPLY_SIGMOID': APPLY_SIGMOID,
+        'SIGMOID_SENSITIVITY': SIGMOID_SENSITIVITY,
+        'MIN_CONFIDENCE': MIN_CONFIDENCE,
+        'BATCH_SIZE': BATCH_SIZE,
+        'RESULT_TYPE': RESULT_TYPE,
+        'TRAIN_DATA_PATH': TRAIN_DATA_PATH,
+        'TRAIN_EPOCHS': TRAIN_EPOCHS,
+        'TRAIN_BATCH_SIZE': TRAIN_BATCH_SIZE,
+        'TRAIN_LEARNING_RATE': TRAIN_LEARNING_RATE,
+        'TRAIN_HIDDEN_UNITS': TRAIN_HIDDEN_UNITS,
+        'CODES': CODES,
+        'LABELS': LABELS,
+        'TRANSLATED_LABELS': TRANSLATED_LABELS,
+        'SPECIES_LIST': SPECIES_LIST,
+        'ERROR_LOG_FILE': ERROR_LOG_FILE
+    }
+def setConfig(c):
+    global RANDOM_SEED
+    global MODEL_PATH
+    global MDATA_MODEL_PATH
+    global LABELS_FILE
+    global CUSTOM_CLASSIFIER
+    global SAMPLE_RATE
+    global SIG_LENGTH
+    global SIG_OVERLAP
+    global SIG_MINLEN
+    global LATITUDE
+    global LONGITUDE
+    global WEEK
+    global LOCATION_FILTER_THRESHOLD
+    global CODES_FILE
+    global SPECIES_LIST_FILE
+    global INPUT_PATH
+    global OUTPUT_PATH
+    global CPU_THREADS
+    global TFLITE_THREADS
+    global APPLY_SIGMOID
+    global SIGMOID_SENSITIVITY
+    global MIN_CONFIDENCE
+    global BATCH_SIZE
+    global RESULT_TYPE
+    global TRAIN_DATA_PATH
+    global TRAIN_EPOCHS
+    global TRAIN_BATCH_SIZE
+    global TRAIN_LEARNING_RATE
+    global TRAIN_HIDDEN_UNITS
+    global CODES
+    global LABELS
+    global TRANSLATED_LABELS
+    global SPECIES_LIST
+    global ERROR_LOG_FILE
+    RANDOM_SEED = c['RANDOM_SEED']
+    MODEL_PATH = c['MODEL_PATH']
+    MDATA_MODEL_PATH = c['MDATA_MODEL_PATH']
+    LABELS_FILE = c['LABELS_FILE']
+    CUSTOM_CLASSIFIER = c['CUSTOM_CLASSIFIER']
+    SAMPLE_RATE = c['SAMPLE_RATE']
+    SIG_LENGTH = c['SIG_LENGTH']
+    SIG_OVERLAP = c['SIG_OVERLAP']
+    SIG_MINLEN = c['SIG_MINLEN']
+    LATITUDE = c['LATITUDE']
+    LONGITUDE = c['LONGITUDE']
+    WEEK = c['WEEK']
+    LOCATION_FILTER_THRESHOLD = c['LOCATION_FILTER_THRESHOLD']
+    CODES_FILE = c['CODES_FILE']
+    SPECIES_LIST_FILE = c['SPECIES_LIST_FILE']
+    INPUT_PATH = c['INPUT_PATH']
+    OUTPUT_PATH = c['OUTPUT_PATH']
+    CPU_THREADS = c['CPU_THREADS']
+    TFLITE_THREADS = c['TFLITE_THREADS']
+    APPLY_SIGMOID = c['APPLY_SIGMOID']
+    SIGMOID_SENSITIVITY = c['SIGMOID_SENSITIVITY']
+    MIN_CONFIDENCE = c['MIN_CONFIDENCE']
+    BATCH_SIZE = c['BATCH_SIZE']
+    RESULT_TYPE = c['RESULT_TYPE']
+    TRAIN_DATA_PATH = c['TRAIN_DATA_PATH']
+    TRAIN_EPOCHS = c['TRAIN_EPOCHS']
+    TRAIN_BATCH_SIZE = c['TRAIN_BATCH_SIZE']
+    TRAIN_LEARNING_RATE = c['TRAIN_LEARNING_RATE']
+    TRAIN_HIDDEN_UNITS = c['TRAIN_HIDDEN_UNITS']
+    CODES = c['CODES']
+    LABELS = c['LABELS']
+    TRANSLATED_LABELS = c['TRANSLATED_LABELS']
+    SPECIES_LIST = c['SPECIES_LIST']
+    ERROR_LOG_FILE = c['ERROR_LOG_FILE']

model.py ADDED Viewed

	@@ -0,0 +1,505 @@

+"""Contains functions to use the BirdNET models.
+"""
+import os
+import warnings
+import numpy as np
+import config as cfg
+import utils
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+warnings.filterwarnings("ignore")
+# Import TFLite from runtime or Tensorflow;
+# import Keras if protobuf model;
+# NOTE: we have to use TFLite if we want to use
+# the metadata model or want to extract embeddings
+try:
+    import tflite_runtime.interpreter as tflite
+except ModuleNotFoundError:
+    from tensorflow import lite as tflite
+if not cfg.MODEL_PATH.endswith(".tflite"):
+    from tensorflow import keras
+INTERPRETER: tflite.Interpreter = None
+C_INTERPRETER: tflite.Interpreter = None
+M_INTERPRETER: tflite.Interpreter = None
+PBMODEL = None
+def loadModel(class_output=True):
+    """Initializes the BirdNET Model.
+    Args:
+        class_output: Omits the last layer when False.
+    """
+    global PBMODEL
+    global INTERPRETER
+    global INPUT_LAYER_INDEX
+    global OUTPUT_LAYER_INDEX
+    # Do we have to load the tflite or protobuf model?
+    if cfg.MODEL_PATH.endswith(".tflite"):
+        # Load TFLite model and allocate tensors.
+        INTERPRETER = tflite.Interpreter(model_path=cfg.MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
+        INTERPRETER.allocate_tensors()
+        # Get input and output tensors.
+        input_details = INTERPRETER.get_input_details()
+        output_details = INTERPRETER.get_output_details()
+        # Get input tensor index
+        INPUT_LAYER_INDEX = input_details[0]["index"]
+        # Get classification output or feature embeddings
+        if class_output:
+            OUTPUT_LAYER_INDEX = output_details[0]["index"]
+        else:
+            OUTPUT_LAYER_INDEX = output_details[0]["index"] - 1
+    else:
+        # Load protobuf model
+        # Note: This will throw a bunch of warnings about custom gradients
+        # which we will ignore until TF lets us block them
+        PBMODEL = keras.models.load_model(cfg.MODEL_PATH, compile=False)
+def loadCustomClassifier():
+    """Loads the custom classifier."""
+    global C_INTERPRETER
+    global C_INPUT_LAYER_INDEX
+    global C_OUTPUT_LAYER_INDEX
+    global C_INPUT_SIZE
+    # Load TFLite model and allocate tensors.
+    C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
+    C_INTERPRETER.allocate_tensors()
+    # Get input and output tensors.
+    input_details = C_INTERPRETER.get_input_details()
+    output_details = C_INTERPRETER.get_output_details()
+    # Get input tensor index
+    C_INPUT_LAYER_INDEX = input_details[0]["index"]
+    C_INPUT_SIZE = input_details[0]["shape"][-1]
+    # Get classification output
+    C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
+def loadMetaModel():
+    """Loads the model for species prediction.
+    Initializes the model used to predict species list, based on coordinates and week of year.
+    """
+    global M_INTERPRETER
+    global M_INPUT_LAYER_INDEX
+    global M_OUTPUT_LAYER_INDEX
+    # Load TFLite model and allocate tensors.
+    M_INTERPRETER = tflite.Interpreter(model_path=cfg.MDATA_MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
+    M_INTERPRETER.allocate_tensors()
+    # Get input and output tensors.
+    input_details = M_INTERPRETER.get_input_details()
+    output_details = M_INTERPRETER.get_output_details()
+    # Get input tensor index
+    M_INPUT_LAYER_INDEX = input_details[0]["index"]
+    M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
+def buildLinearClassifier(num_labels, input_size, hidden_units=0, dropout=0.0):
+    """Builds a classifier.
+    Args:
+        num_labels: Output size.
+        input_size: Size of the input.
+        hidden_units: If > 0, creates another hidden layer with the given number of units.
+    Returns:
+        A new classifier.
+    """
+    # import keras
+    from tensorflow import keras
+    # Build a simple one- or two-layer linear classifier
+    model = keras.Sequential()
+    # Input layer
+    model.add(keras.layers.InputLayer(input_shape=(input_size,)))
+    # Hidden layer
+    if hidden_units > 0:
+        # Dropout layer?
+        if dropout > 0:
+            model.add(keras.layers.Dropout(dropout))
+        model.add(keras.layers.Dense(hidden_units, activation="relu"))
+    # Dropout layer?
+    if dropout > 0:
+        model.add(keras.layers.Dropout(dropout))
+    # Classification layer
+    model.add(keras.layers.Dense(num_labels))
+    # Activation layer
+    model.add(keras.layers.Activation("sigmoid"))
+    return model
+def trainLinearClassifier(classifier,
+                          x_train,
+                          y_train,
+                          epochs,
+                          batch_size,
+                          learning_rate,
+                          val_split,
+                          upsampling_ratio,
+                          upsampling_mode,
+                          train_with_mixup,
+                          train_with_label_smoothing,
+                          on_epoch_end=None):
+    """Trains a custom classifier.
+    Trains a new classifier for BirdNET based on the given data.
+    Args:
+        classifier: The classifier to be trained.
+        x_train: Samples.
+        y_train: Labels.
+        epochs: Number of epochs to train.
+        batch_size: Batch size.
+        learning_rate: The learning rate during training.
+        on_epoch_end: Optional callback `function(epoch, logs)`.
+    Returns:
+        (classifier, history)
+    """
+    # import keras
+    from tensorflow import keras
+    class FunctionCallback(keras.callbacks.Callback):
+        def __init__(self, on_epoch_end=None) -> None:
+            super().__init__()
+            self.on_epoch_end_fn = on_epoch_end
+        def on_epoch_end(self, epoch, logs=None):
+            if self.on_epoch_end_fn:
+                self.on_epoch_end_fn(epoch, logs)
+    # Set random seed
+    np.random.seed(cfg.RANDOM_SEED)
+    # Shuffle data
+    idx = np.arange(x_train.shape[0])
+    np.random.shuffle(idx)
+    x_train = x_train[idx]
+    y_train = y_train[idx]
+    # Random val split
+    x_train, y_train, x_val, y_val = utils.random_split(x_train, y_train, val_split)
+    print(f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.", flush=True)
+    # Upsample training data
+    if upsampling_ratio > 0:
+        x_train, y_train = utils.upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
+        print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
+    # Apply mixup to training data
+    if train_with_mixup:
+        x_train, y_train = utils.mixup(x_train, y_train)
+    # Apply label smoothing
+    if train_with_label_smoothing:
+        y_train = utils.label_smoothing(y_train)
+    # Early stopping
+    callbacks = [
+        keras.callbacks.EarlyStopping(
+            monitor="val_loss", patience=5, verbose=1, start_from_epoch=epochs // 4, restore_best_weights=True
+        ),
+        FunctionCallback(on_epoch_end=on_epoch_end),
+    ]
+    # Cosine annealing lr schedule
+    lr_schedule = keras.experimental.CosineDecay(learning_rate, epochs * x_train.shape[0] / batch_size)
+    # Compile model
+    classifier.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
+        loss="binary_crossentropy",
+        metrics=[keras.metrics.AUC(curve="PR", multi_label=False, name="AUPRC")],
+    )
+    # Train model
+    history = classifier.fit(
+        x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
+    )
+    return classifier, history
+def saveLinearClassifier(classifier, model_path, labels):
+    """Saves a custom classifier on the hard drive.
+    Saves the classifier as a tflite model, as well as the used labels in a .txt.
+    Args:
+        classifier: The custom classifier.
+        model_path: Path the model will be saved at.
+        labels: List of labels used for the classifier.
+    """
+    import tensorflow as tf
+    saved_model = PBMODEL if PBMODEL else tf.keras.models.load_model(cfg.PB_MODEL, compile=False)
+    # Remove activation layer
+    classifier.pop()
+    combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
+    # Append .tflite if necessary
+    if not model_path.endswith(".tflite"):
+        model_path += ".tflite"
+    # Make folders
+    os.makedirs(os.path.dirname(model_path), exist_ok=True)
+    # Save model as tflite
+    converter = tflite.TFLiteConverter.from_keras_model(combined_model)
+    tflite_model = converter.convert()
+    open(model_path, "wb").write(tflite_model)
+    # Save labels
+    with open(model_path.replace(".tflite", "_Labels.txt"), "w") as f:
+        for label in labels:
+            f.write(label + "\n")
+def save_raven_model(classifier, model_path, labels):
+    import tensorflow as tf
+    import csv
+    import json
+    saved_model = PBMODEL if PBMODEL else tf.keras.models.load_model(cfg.PB_MODEL, compile=False)
+    combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
+    # Make signatures
+    class SignatureModule(tf.Module):
+        def __init__(self, keras_model):
+            super().__init__()
+            self.model = keras_model
+        @tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
+        def basic(self, inputs):
+            return {"scores": self.model(inputs)}
+    smodel = SignatureModule(combined_model)
+    signatures = {
+        "basic": smodel.basic,
+    }
+    # Save signature model
+    os.makedirs(os.path.dirname(model_path), exist_ok=True)
+    model_path = model_path[:-7] if model_path.endswith(".tflite") else model_path
+    tf.saved_model.save(smodel, model_path, signatures=signatures)
+    # Save label file
+    labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
+    labels_dir = os.path.join(model_path, "labels")
+    os.makedirs(labels_dir, exist_ok=True)
+    with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
+        labelwriter = csv.writer(labelsfile)
+        labelwriter.writerows(zip(labelIds, labels))
+    # Save class names file
+    classes_dir = os.path.join(model_path, "classes")
+    os.makedirs(classes_dir, exist_ok=True)
+    with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
+        classeswriter = csv.writer(classesfile)
+        for labelId in labelIds:
+            classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
+    # Save model config
+    model_config = os.path.join(model_path, "model_config.json")
+    with open(model_config, "w") as modelconfigfile:
+        modelconfig = {
+            "specVersion": 1,
+            "modelDescription": "Custom classifier trained with BirdNET "
+            + cfg.MODEL_VESION
+            + " embeddings.\nBirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\nhttps://birdnet.cornell.edu",
+            "modelTypeConfig": {"modelType": "RECOGNITION"},
+            "signatures": [
+                {
+                    "signatureName": "basic",
+                    "modelInputs": [{"inputName": "inputs", "sampleRate": 48000.0, "inputConfig": ["batch", "samples"]}],
+                    "modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
+                }
+            ],
+            "globalSemanticKeys": labelIds,
+        }
+        json.dump(modelconfig, modelconfigfile, indent=2)
+def predictFilter(lat, lon, week):
+    """Predicts the probability for each species.
+    Args:
+        lat: The latitude.
+        lon: The longitude.
+        week: The week of the year [1-48]. Use -1 for yearlong.
+    Returns:
+        A list of probabilities for all species.
+    """
+    global M_INTERPRETER
+    # Does interpreter exist?
+    if M_INTERPRETER == None:
+        loadMetaModel()
+    # Prepare mdata as sample
+    sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
+    # Run inference
+    M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
+    M_INTERPRETER.invoke()
+    return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
+def explore(lat: float, lon: float, week: int):
+    """Predicts the species list.
+    Predicts the species list based on the coordinates and week of year.
+    Args:
+        lat: The latitude.
+        lon: The longitude.
+        week: The week of the year [1-48]. Use -1 for yearlong.
+    Returns:
+        A sorted list of tuples with the score and the species.
+    """
+    # Make filter prediction
+    l_filter = predictFilter(lat, lon, week)
+    # Apply threshold
+    l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
+    # Zip with labels
+    l_filter = list(zip(l_filter, cfg.LABELS))
+    # Sort by filter value
+    l_filter = sorted(l_filter, key=lambda x: x[0], reverse=True)
+    return l_filter
+def flat_sigmoid(x, sensitivity=-1):
+    return 1 / (1.0 + np.exp(sensitivity * np.clip(x, -15, 15)))
+def predict(sample):
+    """Uses the main net to predict a sample.
+    Args:
+        sample: Audio sample.
+    Returns:
+        The prediction scores for the sample.
+    """
+    # Has custom classifier?
+    if cfg.CUSTOM_CLASSIFIER != None:
+        return predictWithCustomClassifier(sample)
+    global INTERPRETER
+    # Does interpreter or keras model exist?
+    if INTERPRETER == None and PBMODEL == None:
+        loadModel()
+    if PBMODEL == None:
+        # Reshape input tensor
+        INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
+        INTERPRETER.allocate_tensors()
+        # Make a prediction (Audio only for now)
+        INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
+        INTERPRETER.invoke()
+        prediction = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
+        return prediction
+    else:
+        # Make a prediction (Audio only for now)
+        prediction = PBMODEL.embeddings_model.predict(sample)
+        return prediction
+def predictWithCustomClassifier(sample):
+    """Uses the custom classifier to make a prediction.
+    Args:
+        sample: Audio sample.
+    Returns:
+        The prediction scores for the sample.
+    """
+    global C_INTERPRETER
+    global C_INPUT_SIZE
+    # Does interpreter exist?
+    if C_INTERPRETER == None:
+        loadCustomClassifier()
+    vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
+    # Reshape input tensor
+    C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
+    C_INTERPRETER.allocate_tensors()
+    # Make a prediction
+    C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
+    C_INTERPRETER.invoke()
+    prediction = C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
+    return prediction
+def embeddings(sample):
+    """Extracts the embeddings for a sample.
+    Args:
+        sample: Audio samples.
+    Returns:
+        The embeddings.
+    """
+    global INTERPRETER
+    # Does interpreter exist?
+    if INTERPRETER == None:
+        loadModel(False)
+    # Reshape input tensor
+    INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
+    INTERPRETER.allocate_tensors()
+    # Extract feature embeddings
+    INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
+    INTERPRETER.invoke()
+    features = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
+    return features

utils.py ADDED Viewed

	@@ -0,0 +1,335 @@

+"""Module containing common function.
+"""
+import os
+import traceback
+import numpy as np
+from pathlib import Path
+import config as cfg
+def collect_audio_files(path: str):
+    """Collects all audio files in the given directory.
+    Args:
+        path: The directory to be searched.
+    Returns:
+        A sorted list of all audio files in the directory.
+    """
+    # Get all files in directory with os.walk
+    files = []
+    for root, _, flist in os.walk(path):
+        for f in flist:
+            if not f.startswith(".") and f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES:
+                files.append(os.path.join(root, f))
+    return sorted(files)
+def readLines(path: str):
+    """Reads the lines into a list.
+    Opens the file and reads its contents into a list.
+    It is expected to have one line for each species or label.
+    Args:
+        path: Absolute path to the species file.
+    Returns:
+        A list of all species inside the file.
+    """
+    return Path(path).read_text(encoding="utf-8").splitlines() if path else []
+def list_subdirectories(path: str):
+    """Lists all directories inside a path.
+    Retrieves all the subdirectories in a given path without recursion.
+    Args:
+        path: Directory to be searched.
+    Returns:
+        A filter sequence containing the absolute paths to all directories.
+    """
+    return filter(lambda el: os.path.isdir(os.path.join(path, el)), os.listdir(path))
+def random_split(x, y, val_ratio=0.2):
+    """Splits the data into training and validation data.
+    Makes sure that each class is represented in both sets.
+    Args:
+        x: Samples.
+        y: One-hot labels.
+        val_ratio: The ratio of validation data.
+    Returns:
+        A tuple of (x_train, y_train, x_val, y_val).
+    """
+    # Set numpy random seed
+    np.random.seed(cfg.RANDOM_SEED)
+    # Get number of classes
+    num_classes = y.shape[1]
+    # Initialize training and validation data
+    x_train, y_train, x_val, y_val = [], [], [], []
+    # Split data
+    for i in range(num_classes):
+        # Get indices of current class
+        indices = np.where(y[:, i] == 1)[0]
+        # Get number of samples for each set
+        num_samples = len(indices)
+        num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
+        num_samples_val = max(0, num_samples - num_samples_train)
+        # Randomly choose samples for training and validation
+        np.random.shuffle(indices)
+        train_indices = indices[:num_samples_train]
+        val_indices = indices[num_samples_train:num_samples_train + num_samples_val]
+        # Append samples to training and validation data
+        x_train.append(x[train_indices])
+        y_train.append(y[train_indices])
+        x_val.append(x[val_indices])
+        y_val.append(y[val_indices])
+    # Concatenate data
+    x_train = np.concatenate(x_train)
+    y_train = np.concatenate(y_train)
+    x_val = np.concatenate(x_val)
+    y_val = np.concatenate(y_val)
+    # Shuffle data
+    indices = np.arange(len(x_train))
+    np.random.shuffle(indices)
+    x_train = x_train[indices]
+    y_train = y_train[indices]
+    indices = np.arange(len(x_val))
+    np.random.shuffle(indices)
+    x_val = x_val[indices]
+    y_val = y_val[indices]
+    return x_train, y_train, x_val, y_val
+def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
+    """Apply mixup to the given data.
+    Mixup is a data augmentation technique that generates new samples by
+    mixing two samples and their labels.
+    Args:
+        x: Samples.
+        y: One-hot labels.
+        augmentation_ratio: The ratio of augmented samples.
+        alpha: The beta distribution parameter.
+    Returns:
+        Augmented data.
+    """
+    # Calculate the number of samples to augment based on the ratio
+    num_samples_to_augment = int(len(x) * augmentation_ratio)
+    for _ in range(num_samples_to_augment):
+        # Randomly choose one instance from the dataset
+        index = np.random.choice(len(x))
+        x1, y1 = x[index], y[index]
+        # Randomly choose a different instance from the dataset
+        second_index = np.random.choice(len(x))
+        while second_index == index:
+            second_index = np.random.choice(len(x))
+        x2, y2 = x[second_index], y[second_index]
+        # Generate a random mixing coefficient (lambda)
+        lambda_ = np.random.beta(alpha, alpha)
+        # Mix the embeddings and labels
+        mixed_x = lambda_ * x1 + (1 - lambda_) * x2
+        mixed_y = lambda_ * y1 + (1 - lambda_) * y2
+        # Replace one of the original samples and labels with the augmented sample and labels
+        x[index] = mixed_x
+        y[index] = mixed_y
+    return x, y
+def label_smoothing(y, alpha=0.1):
+    # Subtract alpha from correct label when it is >0
+    y[y > 0] -= alpha
+    # Assigned alpha to all other labels
+    y[y == 0] = alpha / y.shape[0]
+    return y
+def upsampling(x, y, ratio=0.5, mode="repeat"):
+    """Balance data through upsampling.
+    We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
+    Args:
+        x: Samples.
+        y: One-hot labels.
+        ratio: The minimum ratio of minority to majority samples.
+        mode: The upsampling mode. Either 'repeat', 'mean' or 'smote'.
+    Returns:
+        Upsampled data.
+    """
+    # Set numpy random seed
+    np.random.seed(cfg.RANDOM_SEED)
+    # Determin min number of samples
+    min_samples = int(np.max(y.sum(axis=0)) * ratio)
+    x_temp = []
+    y_temp = []
+    if mode == 'repeat':
+        # For each class with less than min_samples ranomdly repeat samples
+        for i in range(y.shape[1]):
+            while y[:, i].sum() + len(y_temp) < min_samples:
+                # Randomly choose a sample from the minority class
+                random_index = np.random.choice(np.where(y[:, i] == 1)[0])
+                # Append the sample and label to a temp list
+                x_temp.append(x[random_index])
+                y_temp.append(y[random_index])
+    elif mode == 'mean':
+        # For each class with less than min_samples
+        # select two random samples and calculate the mean
+        for i in range(y.shape[1]):
+            x_temp = []
+            y_temp = []
+            while y[:, i].sum() + len(y_temp) < min_samples:
+                # Randomly choose two samples from the minority class
+                random_indices = np.random.choice(np.where(y[:, i] == 1)[0], 2)
+                # Calculate the mean of the two samples
+                mean = np.mean(x[random_indices], axis=0)
+                # Append the mean and label to a temp list
+                x_temp.append(mean)
+                y_temp.append(y[random_indices[0]])
+    elif mode == 'smote':
+        # For each class with less than min_samples apply SMOTE
+        for i in range(y.shape[1]):
+            x_temp = []
+            y_temp = []
+            while y[:, i].sum() + len(y_temp) < min_samples:
+                # Randomly choose a sample from the minority class
+                random_index = np.random.choice(np.where(y[:, i] == 1)[0])
+                # Get the k nearest neighbors
+                k = 5
+                distances = np.sqrt(np.sum((x - x[random_index])**2, axis=1))
+                indices = np.argsort(distances)[1:k+1]
+                # Randomly choose one of the neighbors
+                random_neighbor = np.random.choice(indices)
+                # Calculate the difference vector
+                diff = x[random_neighbor] - x[random_index]
+                # Randomly choose a weight between 0 and 1
+                weight = np.random.uniform(0, 1)
+                # Calculate the new sample
+                new_sample = x[random_index] + weight * diff
+                # Append the new sample and label to a temp list
+                x_temp.append(new_sample)
+                y_temp.append(y[random_index])
+    # Append the temp list to the original data
+    if len(x_temp) > 0:
+        x = np.vstack((x, np.array(x_temp)))
+        y = np.vstack((y, np.array(y_temp)))
+    # Shuffle data
+    indices = np.arange(len(x))
+    np.random.shuffle(indices)
+    x = x[indices]
+    y = y[indices]
+    return x, y
+def saveToCache(cache_file: str, x_train: np.ndarray, y_train: np.ndarray, labels: list[str]):
+    """Saves the training data to a cache file.
+    Args:
+        cache_file: The path to the cache file.
+        x_train: The training samples.
+        y_train: The training labels.
+        labels: The list of labels.
+    """
+    # Create cache directory
+    os.makedirs(os.path.dirname(cache_file), exist_ok=True)
+    # Save to cache
+    np.savez_compressed(cache_file, x_train=x_train, y_train=y_train, labels=labels)
+def loadFromCache(cache_file: str):
+    """Loads the training data from a cache file.
+    Args:
+        cache_file: The path to the cache file.
+    Returns:
+        A tuple of (x_train, y_train, labels).
+    """
+    # Load from cache
+    cache = np.load(cache_file, allow_pickle=True)
+    # Get data
+    x_train = cache["x_train"]
+    y_train = cache["y_train"]
+    labels = cache["labels"]
+    return x_train, y_train, labels
+def clearErrorLog():
+    """Clears the error log file.
+    For debugging purposes.
+    """
+    if os.path.isfile(cfg.ERROR_LOG_FILE):
+        os.remove(cfg.ERROR_LOG_FILE)
+def writeErrorLog(ex: Exception):
+    """Writes an exception to the error log.
+    Formats the stacktrace and writes it in the error log file configured in the config.
+    Args:
+        ex: An exception that occurred.
+    """
+    with open(cfg.ERROR_LOG_FILE, "a") as elog:
+        elog.write("".join(traceback.TracebackException.from_exception(ex).format()) + "\n")