#this version was created for hugging face. if issues persist revert to preprocess_local import librosa import numpy as np import os # --- CONSTANTS --- SAMPLE_RATE = 22050 DURATION = 1.0 # Slice length (Seconds) SAMPLES_PER_SLICE = int(SAMPLE_RATE * DURATION) N_MELS = 128 # Frequency resolution def audio_to_spectrograms(file_path): """ Converts audio into a batch of 1-second spectrogram slices. Returns shape: (Num_Slices, 128, 44, 1) """ try: # 1. Safety Check: File Existence if not os.path.exists(file_path): print(f"❌ Error: File not found at {file_path}") return None # 2. Load Audio # We enforce sr=22050 for consistency y, sr = librosa.load(file_path, sr=SAMPLE_RATE) # 3. Calculate Slices num_slices = len(y) // SAMPLES_PER_SLICE # Safety: If audio is too short (< 1 second), fail gracefully if num_slices < 1: print("❌ Audio too short") return None # 4. Create Spectrograms spectrograms = [] for i in range(num_slices): # Extract 1-second chunk y_slice = y[i*SAMPLES_PER_SLICE : (i+1)*SAMPLES_PER_SLICE] # Generate Mel Spectrogram spec = librosa.feature.melspectrogram(y=y_slice, sr=sr, n_mels=N_MELS) # Convert to Decibels (Log Scale) and Normalize (0-1) log_spec = librosa.power_to_db(spec, ref=np.max) norm_spec = np.clip((log_spec + 80) / 80, 0, 1) # Add Channel Dimension (Required for CNNs) spectrograms.append(norm_spec[..., np.newaxis]) return np.array(spectrograms) except Exception as e: print(f"❌ Preprocessing Error: {e}") return None