deepscanAPI / scripts /extract_audio_features.py
kautilya286's picture
first commit
b5b2f19
import os
import librosa
import numpy as np
DATA_DIR = "data"
CATEGORIES = ["real_audio", "fake_audio"]
OUTPUT_DIR = "features_audio"
os.makedirs(OUTPUT_DIR, exist_ok=True)
X, y = [], []
# Data augmentation techniques
def augment_audio(audio, sr):
# Example: Shift pitch randomly within a range
pitch_shift = np.random.randint(-5, 5) # Random pitch shift between -5 and 5 semitones
audio = librosa.effects.pitch_shift(audio, sr, n_steps=pitch_shift)
# Example: Time stretch randomly between 0.8x and 1.2x speed
rate = np.random.uniform(0.8, 1.2)
audio = librosa.effects.time_stretch(audio, rate)
# Example: Add random noise
noise_factor = np.random.uniform(0.001, 0.005) # Random noise factor
noise = np.random.randn(len(audio)) * noise_factor
audio = audio + noise
return audio
def extract_mfcc(path):
try:
audio, sr = librosa.load(path, sr=16000) # Load the audio with 16kHz sampling rate
# Apply audio augmentation
audio = augment_audio(audio, sr)
# Extract MFCCs from the augmented audio
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
mfcc_mean = np.mean(mfcc.T, axis=0) # Average MFCC features
return mfcc_mean
except Exception as e:
print(f"[ERROR] Failed to process {path}: {e}")
return None
# Loop through each category and process audio files
for label, cat in enumerate(CATEGORIES):
folder = os.path.join(DATA_DIR, cat)
for fname in os.listdir(folder):
if not fname.endswith(".wav"): # Only process .wav files
continue
fpath = os.path.join(folder, fname)
features = extract_mfcc(fpath) # Extract MFCC features
if features is not None:
X.append(features)
y.append(label)
# Save the extracted features and labels
np.save(os.path.join(OUTPUT_DIR, "embeddings.npy"), np.array(X))
np.save(os.path.join(OUTPUT_DIR, "labels.npy"), np.array(y))
print(f"✅ Extracted MFCC features for {len(X)} audio samples.")