Spaces:

kautilya286
/

deepscanAPI

Configuration error

App Files Files Community

deepscanAPI / scripts /extract_audio_features.py

kautilya286

first commit

b5b2f19 9 months ago

raw

history blame contribute delete

2.06 kB

	import os
	import librosa
	import numpy as np

	DATA_DIR = "data"
	CATEGORIES = ["real_audio", "fake_audio"]
	OUTPUT_DIR = "features_audio"
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	X, y = [], []

	# Data augmentation techniques
	def augment_audio(audio, sr):
	# Example: Shift pitch randomly within a range
	pitch_shift = np.random.randint(-5, 5) # Random pitch shift between -5 and 5 semitones
	audio = librosa.effects.pitch_shift(audio, sr, n_steps=pitch_shift)

	# Example: Time stretch randomly between 0.8x and 1.2x speed
	rate = np.random.uniform(0.8, 1.2)
	audio = librosa.effects.time_stretch(audio, rate)

	# Example: Add random noise
	noise_factor = np.random.uniform(0.001, 0.005) # Random noise factor
	noise = np.random.randn(len(audio)) * noise_factor
	audio = audio + noise

	return audio

	def extract_mfcc(path):
	try:
	audio, sr = librosa.load(path, sr=16000) # Load the audio with 16kHz sampling rate

	# Apply audio augmentation
	audio = augment_audio(audio, sr)

	# Extract MFCCs from the augmented audio
	mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
	mfcc_mean = np.mean(mfcc.T, axis=0) # Average MFCC features
	return mfcc_mean
	except Exception as e:
	print(f"[ERROR] Failed to process {path}: {e}")
	return None

	# Loop through each category and process audio files
	for label, cat in enumerate(CATEGORIES):
	folder = os.path.join(DATA_DIR, cat)
	for fname in os.listdir(folder):
	if not fname.endswith(".wav"): # Only process .wav files
	continue
	fpath = os.path.join(folder, fname)
	features = extract_mfcc(fpath) # Extract MFCC features
	if features is not None:
	X.append(features)
	y.append(label)

	# Save the extracted features and labels
	np.save(os.path.join(OUTPUT_DIR, "embeddings.npy"), np.array(X))
	np.save(os.path.join(OUTPUT_DIR, "labels.npy"), np.array(y))

	print(f"✅ Extracted MFCC features for {len(X)} audio samples.")