Spaces:
Configuration error
Configuration error
File size: 2,055 Bytes
b5b2f19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import os
import librosa
import numpy as np
DATA_DIR = "data"
CATEGORIES = ["real_audio", "fake_audio"]
OUTPUT_DIR = "features_audio"
os.makedirs(OUTPUT_DIR, exist_ok=True)
X, y = [], []
# Data augmentation techniques
def augment_audio(audio, sr):
# Example: Shift pitch randomly within a range
pitch_shift = np.random.randint(-5, 5) # Random pitch shift between -5 and 5 semitones
audio = librosa.effects.pitch_shift(audio, sr, n_steps=pitch_shift)
# Example: Time stretch randomly between 0.8x and 1.2x speed
rate = np.random.uniform(0.8, 1.2)
audio = librosa.effects.time_stretch(audio, rate)
# Example: Add random noise
noise_factor = np.random.uniform(0.001, 0.005) # Random noise factor
noise = np.random.randn(len(audio)) * noise_factor
audio = audio + noise
return audio
def extract_mfcc(path):
try:
audio, sr = librosa.load(path, sr=16000) # Load the audio with 16kHz sampling rate
# Apply audio augmentation
audio = augment_audio(audio, sr)
# Extract MFCCs from the augmented audio
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
mfcc_mean = np.mean(mfcc.T, axis=0) # Average MFCC features
return mfcc_mean
except Exception as e:
print(f"[ERROR] Failed to process {path}: {e}")
return None
# Loop through each category and process audio files
for label, cat in enumerate(CATEGORIES):
folder = os.path.join(DATA_DIR, cat)
for fname in os.listdir(folder):
if not fname.endswith(".wav"): # Only process .wav files
continue
fpath = os.path.join(folder, fname)
features = extract_mfcc(fpath) # Extract MFCC features
if features is not None:
X.append(features)
y.append(label)
# Save the extracted features and labels
np.save(os.path.join(OUTPUT_DIR, "embeddings.npy"), np.array(X))
np.save(os.path.join(OUTPUT_DIR, "labels.npy"), np.array(y))
print(f"✅ Extracted MFCC features for {len(X)} audio samples.")
|