Spaces:

Hariharan00
/

voice-detection-api

Sleeping

voice-detection-api / ml /explanation.py

Hariharan S

Initial deployment

6822466 3 months ago

2.52 kB

	try:
	import numpy as np
	except ImportError:
	np = None

	def generate_explanation(features, ai_probability: float) -> str:
	"""
	Generate human-readable explanation based on audio features

	Args:
	features: Extracted feature vector (36 dims)
	ai_probability: Model's predicted probability of being AI

	Returns:
	Explanation string
	"""
	# Extract key feature groups based on feature_extraction.py
	# 0-12: MFCC mean
	# 13-25: MFCC std (13 values)
	# 31: Flux std
	# 33: Pitch std
	# 34: ZCR mean

	# Handle both Numpy arrays (if available) and Python lists (Simulation Mode)
	if isinstance(features, list):
	# Python list logic (Simulation Mode)
	mfcc_std_avg = sum(features[13:26]) / 13
	flux_std = features[31]
	pitch_std = features[33]
	zcr_mean = features[34]
	else:
	# Numpy logic
	mfcc_std_avg = features[13:26].mean()
	flux_std = features[31]
	pitch_std = features[33]
	zcr_mean = features[34]

	# Thresholds are heuristic based on typical audio characteristics
	# AI voices often have lower variance (smoother) than human voices

	if ai_probability > 0.6:
	# High confidence AI
	reasons = []

	if mfcc_std_avg < 20:
	reasons.append("over-smoothed spectral formants")
	if pitch_std < 20:
	reasons.append("unnatural pitch uniformity")
	if flux_std < 5:
	reasons.append("synthetic spectral onset patterns")

	if reasons:
	return f"Detected {', '.join(reasons[:2])}"
	else:
	return "Strong synthetic audio artifacts detected"

	elif ai_probability > 0.5:
	# Weak confidence AI
	return "Subtle synthetic irregularities observed in spectral features"

	elif ai_probability < 0.4:
	# High confidence Human
	reasons = []

	if pitch_std > 30:
	reasons.append("natural pitch modulation")
	if mfcc_std_avg > 30:
	reasons.append("organic vocal tract variance")
	if flux_std > 10:
	reasons.append("dynamic spectral flux")

	if reasons:
	return f"Detected {', '.join(reasons[:2])}"
	else:
	return "Natural vocal dynamics and breathing patterns detected"

	else:
	# Weak confidence Human (0.4 - 0.5)
	return "Audio features consistent with human speech, minor anomalies"