Spaces:

kautilya286
/

deepscanAPI

Configuration error

File size: 2,055 Bytes

b5b2f19

import os
import librosa
import numpy as np

DATA_DIR = "data"
CATEGORIES = ["real_audio", "fake_audio"]
OUTPUT_DIR = "features_audio"
os.makedirs(OUTPUT_DIR, exist_ok=True)

X, y = [], []

# Data augmentation techniques
def augment_audio(audio, sr):
    # Example: Shift pitch randomly within a range
    pitch_shift = np.random.randint(-5, 5)  # Random pitch shift between -5 and 5 semitones
    audio = librosa.effects.pitch_shift(audio, sr, n_steps=pitch_shift)

    # Example: Time stretch randomly between 0.8x and 1.2x speed
    rate = np.random.uniform(0.8, 1.2)
    audio = librosa.effects.time_stretch(audio, rate)

    # Example: Add random noise
    noise_factor = np.random.uniform(0.001, 0.005)  # Random noise factor
    noise = np.random.randn(len(audio)) * noise_factor
    audio = audio + noise

    return audio

def extract_mfcc(path):
    try:
        audio, sr = librosa.load(path, sr=16000)  # Load the audio with 16kHz sampling rate
        
        # Apply audio augmentation
        audio = augment_audio(audio, sr)
        
        # Extract MFCCs from the augmented audio
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        mfcc_mean = np.mean(mfcc.T, axis=0)  # Average MFCC features
        return mfcc_mean
    except Exception as e:
        print(f"[ERROR] Failed to process {path}: {e}")
        return None

# Loop through each category and process audio files
for label, cat in enumerate(CATEGORIES):
    folder = os.path.join(DATA_DIR, cat)
    for fname in os.listdir(folder):
        if not fname.endswith(".wav"):  # Only process .wav files
            continue
        fpath = os.path.join(folder, fname)
        features = extract_mfcc(fpath)  # Extract MFCC features
        if features is not None:
            X.append(features)
            y.append(label)

# Save the extracted features and labels
np.save(os.path.join(OUTPUT_DIR, "embeddings.npy"), np.array(X))
np.save(os.path.join(OUTPUT_DIR, "labels.npy"), np.array(y))

print(f"✅ Extracted MFCC features for {len(X)} audio samples.")