piranaware_version3 / src /preprocess.py
ashandilgith's picture
pushing tested revised files for HF compatibility
ab65fad
#this version was created for hugging face. if issues persist revert to preprocess_local
import librosa
import numpy as np
import os
# --- CONSTANTS ---
SAMPLE_RATE = 22050
DURATION = 1.0 # Slice length (Seconds)
SAMPLES_PER_SLICE = int(SAMPLE_RATE * DURATION)
N_MELS = 128 # Frequency resolution
def audio_to_spectrograms(file_path):
"""
Converts audio into a batch of 1-second spectrogram slices.
Returns shape: (Num_Slices, 128, 44, 1)
"""
try:
# 1. Safety Check: File Existence
if not os.path.exists(file_path):
print(f"❌ Error: File not found at {file_path}")
return None
# 2. Load Audio
# We enforce sr=22050 for consistency
y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
# 3. Calculate Slices
num_slices = len(y) // SAMPLES_PER_SLICE
# Safety: If audio is too short (< 1 second), fail gracefully
if num_slices < 1:
print("❌ Audio too short")
return None
# 4. Create Spectrograms
spectrograms = []
for i in range(num_slices):
# Extract 1-second chunk
y_slice = y[i*SAMPLES_PER_SLICE : (i+1)*SAMPLES_PER_SLICE]
# Generate Mel Spectrogram
spec = librosa.feature.melspectrogram(y=y_slice, sr=sr, n_mels=N_MELS)
# Convert to Decibels (Log Scale) and Normalize (0-1)
log_spec = librosa.power_to_db(spec, ref=np.max)
norm_spec = np.clip((log_spec + 80) / 80, 0, 1)
# Add Channel Dimension (Required for CNNs)
spectrograms.append(norm_spec[..., np.newaxis])
return np.array(spectrograms)
except Exception as e:
print(f"❌ Preprocessing Error: {e}")
return None