| import numpy as np |
| import os |
| import time |
| from utils import * |
| import librosa |
| import h5py |
| import pickle |
| from operator import itemgetter |
|
|
|
|
| ornithos = { |
| 'NidalIssa': { |
| 'extra_label': '' |
| }, |
| 'KevinLeveque': { |
| 'extra_label': '' |
| }, |
| 'HerveRenaudineau': { |
| 'extra_label': '' |
| }, |
| 'GuillaumeBigayon': { |
| 'extra_label': '' |
| }, |
| 'GhislainRiou': { |
| 'extra_label': '' |
| }, |
| 'GaëtanMineau': { |
| 'extra_label': '' |
| }, |
| 'FredericCazaban': { |
| 'extra_label': '' |
| }, |
| 'ChristopheMercier': { |
| 'extra_label': '' |
| }, |
| 'AymericMousseau': { |
| 'extra_label': 'amousseau_' |
| }, |
| 'AdrienPajot': { |
| 'extra_label': '' |
| }, |
| 'WillyRaitiere': { |
| 'extra_label': 'willyraitiere_' |
| }, |
| 'MaxencePajot': { |
| 'extra_label': 'Piste de marqueur' |
| }, |
| 'MathurinAubry': { |
| 'extra_label': '' |
| }, |
| 'LionelManceau': { |
| 'extra_label': '' |
| } |
| } |
|
|
|
|
| def prepare_dataset(parent_directory, test_files, export_negative=False, pcen=False, freq_accuracy=33.3, dt=0.003, overlap_spectro=0.4, w_pix=512, extra_label='', |
| split=None, normalize='', suppress_others=True, new_data=False): |
| """ |
| Loops over birder directories, calling process_birder_directory at turns. This creates a spectrogram img dataset of 256*512 img size, along with |
| a list of annotations under the form of bounding box coordinates around each bird calls in the images. Additional info is saved along bb coordinates, |
| such as birder and file name. |
| """ |
| if split is not None: |
| if split == 1: |
| ornithos = { |
| 'NidalIssa': { |
| 'extra_label': '' |
| }, |
| 'KevinLeveque': { |
| 'extra_label': '' |
| }, |
| 'HerveRenaudineau': { |
| 'extra_label': '' |
| }, |
| 'GuillaumeBigayon': { |
| 'extra_label': '' |
| }, |
| 'GhislainRiou': { |
| 'extra_label': '' |
| }, |
| 'GaëtanMineau': { |
| 'extra_label': '' |
| } |
| } |
|
|
| extra_label += '_one' |
| |
| elif split == 2: |
|
|
| ornithos = { |
| 'WillyRaitiere': { |
| 'extra_label': 'willyraitiere_' |
| } |
| } |
|
|
| extra_label += '_two' |
|
|
| else: |
|
|
| ornithos = { |
| 'AdrienPajot': { |
| 'extra_label': '' |
| }, |
| 'MaxencePajot': { |
| 'extra_label': 'Piste de marqueur' |
| }, |
| 'MathurinAubry': { |
| 'extra_label': '' |
| }, |
| 'LionelManceau': { |
| 'extra_label': '' |
| }, |
| 'FredericCazaban': { |
| 'extra_label': '' |
| }, |
| 'ChristopheMercier': { |
| 'extra_label': '' |
| }, |
| 'AymericMousseau': { |
| 'extra_label': 'amousseau_' |
| } |
| } |
|
|
| extra_label += '_three' |
|
|
| if new_data: |
| ornithos = { |
| 'mediae': {'extra_label': ''} |
| } |
| extra_label += '_2' |
|
|
| t = time.time() |
|
|
| img_db = [] |
| annotations = pd.DataFrame() |
|
|
| for idx, dic in ornithos.items(): |
|
|
| directory = os.path.join(parent_directory, idx) |
| extra_str_label = dic['extra_label'] |
|
|
| birder_img_db, birder_annotations, increment_birder = _process_birder_directory(directory, test_files, extra_str_label, pcen, freq_accuracy, dt, overlap_spectro, |
| w_pix, normalize, suppress_others, new_data) |
| img_db += birder_img_db |
| annotations = pd.concat([annotations, birder_annotations]) |
|
|
| print(f'\n Ornitho: {idx}, time: {time.time() - t}, n_files: {increment_birder}') |
|
|
| print('*** Loading large files info ***') |
|
|
| for f in os.listdir(parent_directory): |
| if 'temp_large_file' in f: |
| with open(os.path.join(parent_directory, f), 'rb') as file: |
| large_file_dict = pickle.load(file) |
| img_db += large_file_dict['img_db'] |
| annotations = pd.concat([annotations, large_file_dict['labels']]) |
| os.remove(os.path.join(parent_directory, f)) |
|
|
| print('*** Extracting positive and negative samples ***') |
|
|
| annotations['index'] = range(len(annotations)) |
|
|
| |
|
|
| |
| if len(test_files) > 0: |
| test_annotations = annotations.loc[(annotations['coord'].notnull()) & (annotations['filename'].isin(test_files))].sort_values('index').copy() |
| test_idx = test_annotations['index'].values |
| test_img = np.stack(itemgetter(*test_idx)(img_db)) |
| train_annotations = annotations.loc[~annotations['filename'].isin(test_files)].sort_values('index').copy() |
| train_idx = train_annotations['index'].values |
| train_img = np.stack(itemgetter(*train_idx)(img_db)) |
|
|
| print(f'Extraction done in {time.time() - t}, serializing') |
|
|
| |
| dict_to_serialize = { |
| 'train':{ |
| 'img_db': train_img, |
| 'labels': train_annotations |
| } |
| } |
| |
| if len(test_files) > 0: |
| dict_to_serialize.update({ |
| 'test':{ |
| 'img_db': test_img, |
| 'labels': test_annotations |
| } |
| }) |
|
|
| |
|
|
| for key, value in dict_to_serialize.items(): |
|
|
| with h5py.File(os.path.join(parent_directory, f'{key}_img_db{extra_label}.hdf5'), 'w') as f: |
|
|
| f.create_dataset('img_db', data=value['img_db']) |
|
|
| with h5py.File(os.path.join(parent_directory, f'{key}_annotations{extra_label}.hdf5'), 'w') as f: |
|
|
| for idx in range(len(value['labels'])): |
|
|
| grp = f.create_group(str(idx)) |
| subds = value['labels'].iloc[idx] |
| bb_coord = np.vstack(subds.coord) |
| grp.create_dataset('bb_coord', data=bb_coord) |
|
|
| for key in ['bird_id', 'filename', 'birder']: |
| grp.create_dataset(key, data=subds[key]) |
|
|
| print(f'** done serializing positive samples, now mb turn of negative samples: {time.time() - t}') |
|
|
| |
| if len(test_files) > 0: |
|
|
| negative_samples = annotations.loc[(annotations['coord'].isnull()) & (annotations['filename'].isin(test_files))].sort_values('index').copy() |
| negative_idx = negative_samples['index'].values |
|
|
| negative_img = np.stack(itemgetter(*negative_idx)(img_db)) |
|
|
| with h5py.File(os.path.join(parent_directory, f'test_negative_img_db{extra_label}.hdf5'), 'w') as f: |
|
|
| f.create_dataset('img_db', data=negative_img) |
|
|
| print(f'** done serializing negative test samples: {time.time() - t}') |
| |
| |
| if export_negative: |
|
|
| negative_samples = annotations.loc[annotations['coord'].isnull()].copy() |
| negative_idx = negative_samples['index'].values |
|
|
| negative_img = np.stack(itemgetter(*negative_idx)(img_db)) |
|
|
| with h5py.File(os.path.join(parent_directory, 'negative_img_db.hdf5'), 'w') as f: |
|
|
| f.create_dataset('img_db', data=negative_img) |
|
|
| print(f'** done serializing negative_samples samples: {time.time() - t}') |
|
|
| print(f'Done in {time.time() - t}') |
|
|
|
|
| def process_data_directory(directory, pcen, freq_accuracy, dt, overlap_spectro, w_pix, normalize=''): |
|
|
| img_db = [] |
|
|
| |
|
|
| increment = 0 |
| failed = 0 |
|
|
| for f in os.listdir(directory): |
| if (f.endswith('.wav')) or (f.endswith('.WAV')): |
| |
| fp = File_Processor(directory, f) |
| new_img_db = fp.process_file(pcen=pcen, freq_accuracy=freq_accuracy, dt=dt, overlap_spectro=overlap_spectro, w_pix=w_pix, normalize=normalize) |
| if len(new_img_db) == 0: |
| failed += 1 |
| img_db += new_img_db |
| |
| increment += 1 |
| |
| return img_db, increment, failed |
|
|
|
|
| def _process_birder_directory(directory, test_files, extra_str_label, pcen, freq_accuracy, dt, overlap_spectro, w_pix, normalize='', suppress_others=True, |
| new_data=False): |
|
|
| parent_dir = os.path.dirname(directory) |
| basename = os.path.basename(directory) |
|
|
| img_db = [] |
| labels_ = pd.DataFrame() |
|
|
| labels = create_label_dataset(directory, extra_str_label=extra_str_label, suppress_others=suppress_others, suppress_noise=False, is_csv=new_data) |
| |
| labels = labels.loc[(labels['filename'].map(lambda x: x not in test_files)) | (labels['bird_id'] != -1)].copy() |
|
|
| |
|
|
| increment_birder = 0 |
|
|
| for j, f in enumerate(os.listdir(directory)): |
| if (f.endswith('.wav')) or (f.endswith('.WAV')): |
|
|
| is_test = f.split('.')[0] in test_files |
| |
| fp = File_Processor(directory, f, labels) |
| try: |
| new_img_db, new_labels_ = fp.process_file(extra_str_label=extra_str_label, freq_accuracy=freq_accuracy, dt=dt, overlap_spectro=overlap_spectro, pcen=pcen, |
| w_pix=w_pix, normalize=normalize, is_test=is_test) |
| except: |
| print(f'File {f} failed, skipping file') |
| continue |
|
|
| |
| if fp.large_file: |
| save_path = os.path.join(parent_dir, f'temp_large_file_{basename}_{j}') |
| with open(save_path, 'wb') as f: |
| pickle.dump({'img_db': new_img_db, 'labels': new_labels_}, f) |
| |
| else: |
| img_db += new_img_db |
| labels_ = pd.concat([labels_, new_labels_]) |
| |
| increment_birder += 1 |
| |
| return img_db, labels_, increment_birder |
|
|
|
|
| class File_Processor: |
| |
| |
| |
| H_PIX = 256 |
| LOW_FREQ = 500 |
| |
| def __init__(self, directory, file, labels=None): |
| |
| self.directory = directory |
| self.file = file |
| self.labels = labels |
| |
| |
| def process_file(self, extra_str_label='', freq_accuracy=33.3, dt=0.003, overlap_spectro=0.4, pcen=False, w_pix=512, normalize='', limit_points=757576, is_test=False): |
| ''' |
| Generates and split spectrogram into images of chosen width, and associate labels to each image under the form of bounding box coordinates |
| ''' |
|
|
| |
| self.FREQ_ACCURACY = freq_accuracy |
| self.DT = dt |
|
|
| wav_path = os.path.join(self.directory, self.file) |
| self.filename = self.file.split('.')[0].replace(extra_str_label, '') |
| self.is_test = is_test |
|
|
| |
| self.W_PIX = w_pix |
| self.HOP_SPECTRO = int((1 - overlap_spectro) * self.W_PIX) |
|
|
| |
| librosa_data, FREQ = self._load(wav_path) |
| |
| if (FREQ != 44100) and (FREQ != 48000): |
| print(f'Could not process file sampled at {FREQ} hz') |
| return [] |
| self.FREQ = FREQ |
| self.WIN_LENGTH = int(self.FREQ / self.FREQ_ACCURACY) |
| self.HOP_LENGTH = int(self.FREQ * self.DT) |
| overlap_fft = np.round(1 - self.HOP_LENGTH / self.WIN_LENGTH, 3) |
|
|
| |
| self.FREQ_ACCURACY = self.FREQ / self.WIN_LENGTH |
| self.DT = int((1 - overlap_fft) * self.WIN_LENGTH) / self.FREQ |
|
|
| |
| self.LOW_IDX = 1 + int(self.LOW_FREQ / self.FREQ_ACCURACY) |
| self.HIGH_IDX = self.LOW_IDX + self.H_PIX |
|
|
| self.LOW_FREQ = (self.LOW_IDX - 1) * self.FREQ_ACCURACY |
| self.HIGH_FREQ = (self.HIGH_IDX - 1) * self.FREQ_ACCURACY |
|
|
| if (len(librosa_data) <= limit_points * self.HOP_LENGTH): |
| self.large_file = False |
| spectro = self._spectrogram(librosa_data) |
| power_spec = self._process_spectro(spectro, pcen=pcen, normalize=normalize) |
| else: |
| |
| self.large_file = True |
| start_idx, cut_idx, n_overlaps = self._block_spectrogram_idx(librosa_data, limit_points) |
|
|
| t = time.time() |
| temp_dir = os.getcwd() |
|
|
| for i, (start, end) in enumerate(zip(start_idx, cut_idx)): |
|
|
| temp_file_path = os.path.join(temp_dir, f'power_spec_temp_{i}') |
| data_cut = librosa_data[start:end] |
| spectro_cut = self._spectrogram(data_cut) |
| if i == 0: |
| spectro_cut = spectro_cut[:, :-n_overlaps] |
| elif i == len(cut_idx) - 1: |
| spectro_cut = spectro_cut[:, n_overlaps:] |
| else: |
| spectro_cut = spectro_cut[:, n_overlaps:-n_overlaps] |
|
|
| new_power_spec = self._process_spectro(spectro_cut, pcen=pcen, normalize=normalize) |
|
|
| |
| with open(temp_file_path, 'wb') as f: |
| pickle.dump(new_power_spec, f) |
| |
| print(f'Block {i} processed in {time.time() - t}') |
| |
| |
| power_spec = np.array([]) |
|
|
| for i in range(len(start_idx)): |
| |
| temp_file_path = os.path.join(temp_dir, f'power_spec_temp_{i}') |
| with open(temp_file_path, 'rb') as f: |
| new_power_spec = pickle.load(f) |
|
|
| if len(power_spec) == 0: |
| power_spec = new_power_spec |
| else: |
| power_spec = np.concatenate([power_spec, new_power_spec], axis=-1) |
| |
| |
| remove = [os.remove(os.path.join(temp_dir, f)) for f in os.listdir(temp_dir) if 'power_spec_temp_' in f] |
|
|
| |
| self.spectrogram_length = power_spec.shape[-1] |
|
|
| |
| img_db = self._split_power_spec(power_spec, normalize=normalize) |
|
|
| |
| if self.labels is not None: |
| labels_ = self._merge_and_filter_labels(img_db) |
| |
| if len(labels_) == 0: |
| return [], labels_ |
| |
| positive_idx = labels_['index'].values |
| positive_img_db = list(itemgetter(*positive_idx)(img_db)) |
| if len(positive_img_db) == 1: |
| positive_img_db[0] = positive_img_db[0][np.newaxis, :] |
| return positive_img_db, labels_ |
| else: |
| return img_db |
|
|
| |
| def _process_spectro(self, spectrogram, pcen, normalize): |
| |
| spectrogram = spectrogram[self.LOW_IDX:self.HIGH_IDX, :] |
|
|
| |
| power_spec = self._frontend(spectrogram, pcen=pcen) |
|
|
| return power_spec |
|
|
| |
| def _load(self, wav_path): |
| |
| librosa_data, freq = librosa.load(wav_path, sr=None) |
|
|
| return librosa_data, freq |
| |
| |
| def _spectrogram(self, data): |
| |
| spectrogram = librosa.stft(data, win_length=self.WIN_LENGTH, hop_length=self.HOP_LENGTH, n_fft=self.WIN_LENGTH) |
|
|
| return spectrogram |
|
|
| |
| def _block_spectrogram_idx(self, data, limit_points): |
| """ |
| Splits librosa data into chunks of defined width. Attention is carried to preserve fft integrity by carefully overlapping successive chunks |
| (and considering that subsequent librosa stft is centered). |
| """ |
| |
| data_len = len(data) |
| cut_every = limit_points * self.HOP_LENGTH |
| cut_idx = [int(i * cut_every) for i in np.arange(1, data_len / cut_every)] |
| |
| n_overlaps = int((self.WIN_LENGTH - 1) / self.HOP_LENGTH) |
| start_idx = [0] + [cut - (2 * n_overlaps + 1) * self.HOP_LENGTH for cut in cut_idx] |
| cut_idx = cut_idx + [data_len] |
|
|
| return start_idx, cut_idx, n_overlaps |
| |
| |
| def _frontend(self, spectrogram, pcen=False, chunk=600, eps=1e-8): |
| """ |
| Computes modulus of spectrogram amplitudes. Spectrogram is first split into 1 hours chunks, and further into smaller |
| chunks whose log power are concatenated. This appeared to run faster on my pc but can ofc be challenged (esp. the smaller |
| chunk division). |
| If pcen is activated, concatenate it to the standard preprocessing as an additional channel |
| """ |
|
|
| chunk_size = int(chunk / self.DT) |
| square_power_specs = [] |
| log_power_specs = [] |
|
|
| |
| splits = np.array_split(spectrogram, max(1, int(spectrogram.shape[1] * self.DT / 3600)), axis=1) |
|
|
| for i, split in enumerate(splits): |
|
|
| power_spec = np.abs(split[:, :chunk_size]) |
| log_power_spec = np.log(eps + power_spec) |
| if pcen: |
| square_power_spec = power_spec**2 |
|
|
| for i in range(1, 1 + split.shape[1] // chunk_size): |
| |
| power_spec = np.abs(split[:, i * chunk_size:(i + 1) * chunk_size]) |
| log_power_spec = np.hstack((log_power_spec, np.log(eps + power_spec))) |
| if pcen: |
| square_power_spec = np.hstack((square_power_spec, power_spec**2)) |
| |
| log_power_specs.append(log_power_spec) |
| if pcen: |
| square_power_specs.append(square_power_spec) |
|
|
| log_power_spec = np.hstack(log_power_specs) |
| output = log_power_spec.reshape((1,) + log_power_spec.shape) |
|
|
| |
|
|
| if pcen: |
| for square_power_spec in square_power_specs: |
| square_power_spec = librosa.pcen(square_power_spec, sr=self.FREQ, hop_length=self.HOP_LENGTH, time_constant=0.4, |
| gain=0.8, power=0.25, bias=10) |
| square_power_spec = np.hstack(square_power_specs) |
| square_power_spec = square_power_spec.reshape((1,) + square_power_spec.shape) |
| output = np.vstack((output, square_power_spec)) |
|
|
| return output |
| |
| |
| def _split_power_spec(self, log_power_spec, normalize=''): |
| """ |
| Splits a spectrogram 2D array along axis=1 given hop size and img width. |
| """ |
|
|
| if normalize == 'global': |
| maxi = log_power_spec.max() |
| log_power_spec = log_power_spec - maxi |
|
|
| |
| img_db = [log_power_spec[..., k * self.HOP_SPECTRO: k * self.HOP_SPECTRO + self.W_PIX] for k in range(max(1, |
| int(1 + np.ceil((log_power_spec.shape[-1] - self.W_PIX) / self.HOP_SPECTRO))))] |
|
|
| if normalize == 'local': |
| img_db = [subspec - subspec.max() for subspec in img_db] |
|
|
| if img_db[-1].shape[-1] < self.W_PIX: |
|
|
| while img_db[-1].shape[-1] < self.W_PIX: |
| img = np.concatenate((img_db[-1], img_db[-1][..., -(self.W_PIX - img_db[-1].shape[-1]):]), axis=2) |
| img_db[-1] = img |
|
|
| return img_db |
| |
| |
| def _merge_and_filter_labels(self, img_db): |
| """ |
| Computes and return a dataframe containing img indexes and a list of bb coordinates for each images in a given file |
| """ |
|
|
| |
| img_coord = [(i * self.HOP_SPECTRO, i * self.HOP_SPECTRO + self.W_PIX - 1) for i in range(len(img_db))] |
| img_coord = pd.DataFrame(img_coord).rename(columns={0: 'start', 1:'end'}) |
|
|
| |
| labels_ = self.labels.loc[self.labels['filename'] == self.filename].copy() |
| |
| if len(labels_) == 0: |
| labels_ = pd.DataFrame({key: [] for key in ['index', 'coord', 'bird_id', 'filename', 'birder']}) |
| return labels_ |
|
|
| |
| for ex_label, new_label in zip(['t_start', 't_end'], ['x_1', 'x_2']): |
| labels_[new_label] = (labels_[ex_label].astype(float) / self.DT).astype(int) |
|
|
| |
| for ex_label, new_label in zip(['f_start', 'f_end'], ['y_1', 'y_2']): |
| labels_[new_label] = ((labels_[ex_label].clip(lower=self.LOW_FREQ, upper=self.HIGH_FREQ) - self.LOW_FREQ) / self.FREQ_ACCURACY).astype(int) |
|
|
| labels_ = labels_.loc[labels_['y_1'] != labels_['y_2']] |
| labels_.index = range(len(labels_)) |
|
|
| labels_['w'] = labels_['x_2'] - labels_['x_1'] + 1 |
| labels_['h'] = labels_['y_2'] - labels_['y_1'] + 1 |
|
|
| for size in ['w', 'h']: |
| labels_ = labels_.loc[labels_[size] > 0] |
|
|
| labels_['joint'] = 1 |
| img_coord['joint'] = 1 |
| img_coord.reset_index(inplace=True) |
|
|
| coord = ['x_1', 'y_1', 'x_2', 'y_2'] |
| labels_ = labels_[coord + ['w', 'h', 'joint', 'bird_id']].merge(img_coord, on='joint') |
| labels_ = labels_.loc[(labels_['x_1'].between(labels_['start'], labels_['end'])) | (labels_['x_2'].between(labels_['start'], labels_['end']))] |
|
|
| |
| labels_['inside'] = labels_[['x_2', 'end']].min(axis=1) - labels_[['x_1', 'start']].max(axis=1) + 1 |
|
|
| cond_1 = (labels_['inside'] < 0.5 * labels_['w']) & (labels_['inside'] < 15) |
| cond_2 = (labels_['inside'] < 0.1 * labels_['w']) |
|
|
| labels_ = labels_.loc[~(cond_1 | cond_2)] |
|
|
| |
| labels_['x_1'] = (labels_['x_1'] - labels_['start'] - (labels_['w'] * 0.1).astype(int).clip(lower=3, upper=6)).clip(lower=0) |
| labels_['x_2'] = (labels_['x_2'] - labels_['start'] + (labels_['w'] * 0.1).astype(int).clip(lower=3, upper=6)).clip(upper=self.W_PIX - 1) |
| labels_['y_1'] = (labels_['y_1'] - (labels_['h'] * 0.1).astype(int).clip(lower=3, upper=6)).clip(lower=0) |
| labels_['y_2'] = (labels_['y_2'] + (labels_['h'] * 0.1).astype(int).clip(lower=3, upper=6)).clip(upper=self.H_PIX - 1) |
|
|
| labels_['w'] = labels_['x_2'] - labels_['x_1'] |
| labels_['h'] = labels_['y_2'] - labels_['y_1'] |
|
|
| labels_['coord'] = [(x_1, y_1, x_2, y_2) for (x_1, y_1, x_2, y_2) in zip(labels_['x_1'], labels_['y_1'], |
| labels_['x_2'], labels_['y_2'])] |
|
|
| |
| labels_ = labels_.merge(labels_.loc[labels_['bird_id'] != -1].groupby('index').size().reset_index().rename(columns={0: 'count'}), on='index') |
| labels_ = labels_.loc[(labels_['bird_id'] != -1) | (labels_['count'] == 0)] |
|
|
| |
| labels_ = labels_.groupby('index', as_index=False).agg({'coord': lambda x: x.tolist(), 'bird_id': lambda x: x.tolist()}) |
|
|
| |
| labels_ = pd.merge( |
| pd.DataFrame({'index': range(len(img_db))}, index=range(len(img_db))), |
| labels_, |
| how='outer', |
| on='index' |
| ) |
|
|
| if not self.is_test: |
|
|
| |
| positive_count = labels_['coord'].isnull().value_counts() |
| if True in positive_count.index: |
| n_negative = positive_count[True] |
| else: |
| n_negative = 0 |
| if False in positive_count.index: |
| n_positive = positive_count[False] |
| else: |
| n_positive = 0 |
|
|
| negative_idx = labels_.loc[labels_['coord'].isnull(), 'index'].values |
| if len(negative_idx) > 0: |
| negative_idx = sorted(np.random.choice(negative_idx, min(int(n_positive * 0.1), 20))) |
| |
| labels_.loc[negative_idx, 'coord'] = labels_.loc[negative_idx, 'coord'].map(lambda x: [(-1, -1, -1, -1)]) |
| |
| labels_.loc[negative_idx, 'bird_id'] = labels_.loc[negative_idx, 'bird_id'].map(lambda x: [-1]) |
| labels_ = labels_.loc[labels_['coord'].notnull()].copy() |
| |
| noise_idx = labels_['bird_id'].map(lambda x: -1 in x) |
| labels_.loc[noise_idx, 'coord'] = labels_.loc[noise_idx, 'coord'].map(lambda x: [(-1, -1, -1, -1)]) |
| |
| labels_['filename'] = self.filename |
| labels_['birder'] = self.directory.split('\\')[-1] |
|
|
| return labels_ |