Spaces:

ashandilgith
/

piranaware_version3

Sleeping

piranaware_version3 / src /preprocess.py

pushing tested revised files for HF compatibility

ab65fad about 2 months ago

1.85 kB

	#this version was created for hugging face. if issues persist revert to preprocess_local


	import librosa
	import numpy as np
	import os

	# --- CONSTANTS ---
	SAMPLE_RATE = 22050
	DURATION = 1.0 # Slice length (Seconds)
	SAMPLES_PER_SLICE = int(SAMPLE_RATE * DURATION)
	N_MELS = 128 # Frequency resolution

	def audio_to_spectrograms(file_path):
	"""
	Converts audio into a batch of 1-second spectrogram slices.
	Returns shape: (Num_Slices, 128, 44, 1)
	"""
	try:
	# 1. Safety Check: File Existence
	if not os.path.exists(file_path):
	print(f"❌ Error: File not found at {file_path}")
	return None

	# 2. Load Audio
	# We enforce sr=22050 for consistency
	y, sr = librosa.load(file_path, sr=SAMPLE_RATE)

	# 3. Calculate Slices
	num_slices = len(y) // SAMPLES_PER_SLICE

	# Safety: If audio is too short (< 1 second), fail gracefully
	if num_slices < 1:
	print("❌ Audio too short")
	return None

	# 4. Create Spectrograms
	spectrograms = []
	for i in range(num_slices):
	# Extract 1-second chunk
	y_slice = y[iSAMPLES_PER_SLICE : (i+1)SAMPLES_PER_SLICE]

	# Generate Mel Spectrogram
	spec = librosa.feature.melspectrogram(y=y_slice, sr=sr, n_mels=N_MELS)

	# Convert to Decibels (Log Scale) and Normalize (0-1)
	log_spec = librosa.power_to_db(spec, ref=np.max)
	norm_spec = np.clip((log_spec + 80) / 80, 0, 1)

	# Add Channel Dimension (Required for CNNs)
	spectrograms.append(norm_spec[..., np.newaxis])

	return np.array(spectrograms)

	except Exception as e:
	print(f"❌ Preprocessing Error: {e}")
	return None