Vitalis_Devcore / src /audio_ear /feature_extractor.py

FerrellSyntheticIntelligence

feat: audio ear, cognition modules, dream engine, vitalis IDE, test encoder

63dd1f4 1 day ago

1.15 kB

	import librosa
	import numpy as np
	from typing import Tuple, Dict
	from pathlib import Path

	def extract_features(wav_path: Path) -> Tuple[np.ndarray, Dict[str, float]]:
	"""
	Extracts the 13-band Mel-frequency cepstral coefficients (MFCC)
	and heuristic prosody markers from a raw WAV file.
	"""
	y, sr = librosa.load(str(wav_path), sr=16000)

	# Extract MFCC matrix
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

	# Heuristic prosody extraction
	pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
	valid_pitches = pitches[magnitudes > np.median(magnitudes)]
	pitch = float(np.mean(valid_pitches)) if len(valid_pitches) > 0 else 0.0

	energy = float(np.mean(librosa.feature.rms(y=y)))
	tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

	# Calculate pause ratio based on silence thresholds
	pause_ratio = float(np.sum(np.abs(y) < 0.01) / len(y)) if len(y) > 0 else 0.0

	prosody = {
	"pitch": pitch,
	"energy": energy,
	"tempo": float(tempo[0] if isinstance(tempo, np.ndarray) else tempo),
	"pause_ratio": pause_ratio
	}

	return mfcc, prosody