Spaces:

DhruvB1906
/

StrokeMitra-API

Sleeping

App Files Files Community

StrokeMitra-API / src /features /formant_extractor.py

DhruvB1906

Upload folder using huggingface_hub

4e9a3bc verified 2 months ago

raw

history blame contribute delete

4.87 kB

	"""Formant feature extraction using Praat (parselmouth)."""

	import logging
	import numpy as np
	import parselmouth

	from src.features.schemas import FormantFeatures

	logger = logging.getLogger(__name__)


	class FormantExtractor:
	"""Extract formant features (F1, F2, F3) using Praat."""

	def __init__(
	self,
	max_num_formants: int = 5,
	ceiling_hz: float = 5500,
	window_length: float = 0.025,
	pre_emphasis: float = 0.97,
	):
	"""Initialize formant extractor."""
	self.max_num_formants = max_num_formants
	self.ceiling_hz = ceiling_hz
	self.window_length = window_length
	self.pre_emphasis = pre_emphasis

	def extract(self, waveform: np.ndarray, sr: int) -> FormantFeatures:
	"""
	Extract formant features.

	Args:
	waveform: Audio waveform
	sr: Sample rate

	Returns:
	FormantFeatures with F1, F2, F3 statistics
	"""
	logger.debug("Extracting formants using Praat")

	try:
	# Create Praat Sound object
	sound = parselmouth.Sound(waveform, sampling_frequency=sr)

	# Extract formants
	formants = sound.to_formant_burg(
	time_step=0.01,
	max_number_of_formants=self.max_num_formants,
	maximum_formant=self.ceiling_hz,
	window_length=self.window_length,
	pre_emphasis_from=50.0,
	)

	# Extract F1, F2, F3 contours
	f1_contour = []
	f2_contour = []
	f3_contour = []

	for time in np.arange(0, sound.duration, 0.01):
	f1 = formants.get_value_at_time(1, time)
	f2 = formants.get_value_at_time(2, time)
	f3 = formants.get_value_at_time(3, time)

	# Filter out undefined values
	if f1 is not None and not np.isnan(f1):
	f1_contour.append(f1)
	if f2 is not None and not np.isnan(f2):
	f2_contour.append(f2)
	if f3 is not None and not np.isnan(f3):
	f3_contour.append(f3)

	# Convert to arrays
	f1_contour = np.array(f1_contour) if f1_contour else np.array([0.0])
	f2_contour = np.array(f2_contour) if f2_contour else np.array([0.0])
	f3_contour = np.array(f3_contour) if f3_contour else np.array([0.0])

	# Compute statistics
	f1_mean = float(np.mean(f1_contour))
	f1_std = float(np.std(f1_contour))
	f2_mean = float(np.mean(f2_contour))
	f2_std = float(np.std(f2_contour))
	f3_mean = float(np.mean(f3_contour))
	f3_std = float(np.std(f3_contour))

	# Compute vowel space area (VSA) - simplified using F1 and F2
	vowel_space_area = self._compute_vsa(f1_contour, f2_contour)

	# Formant dispersion
	formant_dispersion = float(np.mean([f1_mean, f2_mean, f3_mean]))

	logger.info(
	f"Formants extracted: F1={f1_mean:.0f}Hz, F2={f2_mean:.0f}Hz, VSA={vowel_space_area:.0f}"
	)

	return FormantFeatures(
	f1_contour=f1_contour,
	f2_contour=f2_contour,
	f3_contour=f3_contour,
	f1_mean=f1_mean,
	f1_std=f1_std,
	f2_mean=f2_mean,
	f2_std=f2_std,
	f3_mean=f3_mean,
	f3_std=f3_std,
	vowel_space_area=vowel_space_area,
	formant_dispersion=formant_dispersion,
	)

	except Exception as e:
	logger.error(f"Formant extraction failed: {e}")
	# Return default values
	return self._default_formants()

	def _compute_vsa(self, f1: np.ndarray, f2: np.ndarray) -> float:
	"""Compute vowel space area (simplified triangle area)."""
	if len(f1) < 3 or len(f2) < 3:
	return 0.0

	# Use percentiles to get corner vowels (simplified)
	f1_low, f1_mid, f1_high = np.percentile(f1, [25, 50, 75])
	f2_low, f2_mid, f2_high = np.percentile(f2, [25, 50, 75])

	# Triangle area using Heron's formula (simplified)
	area = abs((f1_low - f1_high) * (f2_mid - f2_low) / 2.0)

	return float(area)

	def _default_formants(self) -> FormantFeatures:
	"""Return default formant features on failure."""
	return FormantFeatures(
	f1_contour=np.array([500.0]),
	f2_contour=np.array([1500.0]),
	f3_contour=np.array([2500.0]),
	f1_mean=500.0,
	f1_std=0.0,
	f2_mean=1500.0,
	f2_std=0.0,
	f3_mean=2500.0,
	f3_std=0.0,
	vowel_space_area=0.0,
	formant_dispersion=1500.0,
	)