Spaces:

vskode
/

acodet

Sleeping

acodet / tests /test.py

vskode

initial commit without binaries or large files for huggingface

c96678c over 1 year ago

4.23 kB

	import os, sys
	import unittest
	from pathlib import Path
	import pandas as pd

	sys.path.insert(0, os.path.abspath("."))

	########### MODIFY SESSION SETTINGS BEFORE GLOBAL CONFIG IS IMPORTED #########
	from acodet.create_session_file import create_session_file

	create_session_file()
	import json

	with open("acodet/src/tmp_session.json", "r") as f:
	session = json.load(f)
	session["sound_files_source"] = "tests/test_files/test_audio_files"
	session[
	"generated_annotation_source"
	] = "tests/test_files/test_generated_annotations"
	session[
	"annotation_destination"
	] = "tests/test_files/test_combined_annotations"
	session[
	"generated_annotations_folder"
	] = "tests/test_files/test_generated_annotations"

	session[
	"reviewed_annotation_source"
	] = "tests/test_files/test_generated_annotations"
	session["tfrecords_destination_folder"] = "tests/test_files/test_tfrecords"

	with open("acodet/src/tmp_session.json", "w") as f:
	json.dump(session, f)
	##############################################################################


	from acodet.annotate import run_annotation, filter_annots_by_thresh
	from acodet.funcs import return_windowed_file, get_train_set_size
	from acodet.models import GoogleMod
	from acodet.combine_annotations import generate_final_annotations
	from acodet.tfrec import write_tfrec_dataset
	from acodet.train import run_training
	from acodet import global_config as conf



	class TestDetection(unittest.TestCase):
	def test_annotation(self):
	self.time_stamp = run_annotation()
	df = pd.read_csv(
	(
	Path(conf.GEN_ANNOTS_DIR)
	.joinpath(self.time_stamp)
	.joinpath("stats.csv")
	)
	)
	self.assertEqual(
	df["number of predictions with thresh>0.8"][0],
	326,
	"Number of predictions is not what it should be.",
	)

	filter_annots_by_thresh(self.time_stamp)
	file = list(
	Path(conf.GEN_ANNOT_SRC)
	.joinpath(self.time_stamp)
	.joinpath(f"thresh_{conf.THRESH}")
	.glob("*/.txt")
	)[0]
	df = pd.read_csv(file)
	self.assertEqual(
	len(df),
	309,
	"Number of predictions from filtered thresholds " "is incorrect.",
	)


	class TestTraining(unittest.TestCase):
	def test_model_load(self):
	model = GoogleMod(load_g_ckpt=False).model
	self.assertGreater(len(model.layers), 15)

	# def test_tfrecord_loading(self):
	# data_dir = list(Path(conf.TFREC_DESTINATION).iterdir())
	# n_train, n_noise = get_train_set_size(data_dir)
	# self.assertEqual(n_train, 517)
	# self.assertEqual(n_noise, 42)

	class TestTFRecordCreation(unittest.TestCase):
	def test_tfrecord(self):
	time_stamp = list(Path(conf.ANNOT_DEST).iterdir())[-1]
	write_tfrec_dataset(annot_dir=time_stamp, active_learning=False)
	metadata_file_path = Path(conf.TFREC_DESTINATION).joinpath(
	"dataset_meta_train.json"
	)
	self.assertEqual(
	metadata_file_path.exists(),
	1,
	"TFRecords metadata file was not created.",
	)

	with open(metadata_file_path, "r") as f:
	data = json.load(f)
	self.assertEqual(
	data["dataset"]["size"]["train"],
	517,
	"TFRecords files has wrong number of datapoints.",
	)

	def test_combined_annotation(self):
	generate_final_annotations(active_learning=False)
	time_stamp = list(Path(conf.GEN_ANNOTS_DIR).iterdir())[-1].stem
	combined_annots_path = (
	Path(conf.ANNOT_DEST)
	.joinpath(time_stamp)
	.joinpath("combined_annotations.csv")
	)
	self.assertEqual(
	combined_annots_path.exists(),
	1,
	"csv file containing combined_annotations does not exist.",
	)
	df = pd.read_csv(combined_annots_path)
	self.assertEqual(
	df.start.iloc[-1],
	1795.2825,
	"The annotations in combined_annotations.csv don't seem to be identical",
	)


	if __name__ == "__main__":
	unittest.main()