Spaces:

Krish1440
/

ai-voice-detector-api

Running

App Files Files Community

ai-voice-detector-api / detect.py

Krish1440

Upload 5 files

60d70d0 verified 13 days ago

raw

history blame contribute delete

6.76 kB

	import io
	import librosa
	import numpy as np
	import soundfile as sf
	import torch
	from transformers import pipeline

	class AudioDetector:
	def __init__(self):
	print("--- [AudioDetector] Initializing 4-Model Ensemble System... ---")

	# The Committee of Experts
	self.models_config = [
	{
	"id": "MelodyMachine/Deepfake-audio-detection-V2",
	"name": "MelodyMachine",
	"weight": 1.0
	},
	{
	"id": "mo-thecreator/Deepfake-audio-detection",
	"name": "Mo-Creator",
	"weight": 1.0
	},
	{
	"id": "Hemgg/Deepfake-audio-detection",
	"name": "Hemgg",
	"weight": 1.0
	},
	{
	"id": "Gustking/wav2vec2-large-xlsr-deepfake-audio-classification",
	"name": "Gustking-XLSR",
	"weight": 1.2 # Higher weight for the large model
	}
	]

	self.pipelines = []

	for cfg in self.models_config:
	try:
	print(f"--- Loading Model: {cfg['name']} ({cfg['id']}) ---")
	# Load pipeline
	p = pipeline("audio-classification", model=cfg['id'])
	self.pipelines.append({"pipe": p, "config": cfg})
	print(f"[+] Loaded {cfg['name']}")
	except Exception as e:
	print(f"[-] Failed to load {cfg['name']}: {e}")

	if not self.pipelines:
	print("CRITICAL: No models could be loaded. Ensemble is empty.")

	def analyze_audio(self, audio_data: bytes, language: str):
	try:
	# 1. Load Audio
	buffer = io.BytesIO(audio_data)
	y, sr = librosa.load(buffer, sr=16000)

	# 2. Extract Features (For Explanation Context Only)
	# We preserve this for generating professional justifications,
	# but the DECISION is purely model-based.
	centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))

	# 3. Running The Ensemble
	votes = []
	total_score = 0
	total_weight = 0

	print(f"\n--- Running Ensemble Inference on {len(self.pipelines)} models ---")

	for item in self.pipelines:
	p = item['pipe']
	cfg = item['config']
	weight = cfg['weight']

	try:
	# Run Inference
	results = p(y, top_k=None) # Get all labels

	# Parsing Result for AI Probability
	ai_score = 0.0

	# Logic: Find the label that means "Fake"
	ai_labels = ["fake", "spoof", "aivoice", "artificial", "generated"]

	found = False
	for r in results:
	label_clean = r['label'].lower().strip()
	if label_clean in ai_labels:
	ai_score = r['score']
	found = True
	break

	# Note: If no AI label is found (e.g. only 'real'/'human'), ai_score stays 0.0 (Human)
	# This logic covers {0: 'real', 1: 'fake'} where 'fake' is present.

	verdict = "AI" if ai_score > 0.5 else "HUMAN"

	# Weighted contribution
	votes.append({
	"name": cfg['name'],
	"ai_prob": ai_score,
	"verdict": verdict
	})

	total_score += (ai_score * weight)
	total_weight += weight

	print(f" > {cfg['name']}: {ai_score:.4f} ({verdict})")

	except Exception as e:
	print(f"Error inferencing {cfg['name']}: {e}")

	# 4. Final Aggregation
	if total_weight > 0:
	final_ensemble_score = total_score / total_weight
	else:
	final_ensemble_score = 0.0 # Fail safe

	is_ai = final_ensemble_score > 0.5
	final_classification = "AI_GENERATED" if is_ai else "HUMAN"

	# Confidence Score: Distance from 0.5, normalized to 0.5-1.0 roughly,
	# or just probability of the winning class.
	class_confidence = final_ensemble_score if is_ai else (1.0 - final_ensemble_score)

	print(f"--- Final Ensemble Score: {final_ensemble_score:.4f} => {final_classification} (Conf: {class_confidence:.2f}) ---\n")

	# 5. Construct Explanation
	# "3 out of 4 models detected deepfake artifacts..."
	ai_votes_count = sum(1 for v in votes if v['verdict'] == 'AI')
	total_models = len(votes)

	explanations = []
	explanations.append(f"Ensemble Analysis: {ai_votes_count}/{total_models} models flagged this audio as AI-generated.")
	explanations.append(f"Aggregated Score: {final_ensemble_score*100:.1f}%.")

	if is_ai:
	if centroid > 2000:
	explanations.append("High-frequency spectral artifacts consistent with neural vocoders detected.")
	else:
	explanations.append("Deep learning pattern matching identified non-biological features.")
	else:
	explanations.append("Acoustic analysis confirms natural vocal resonance and organic production.")

	final_explanation = " ".join(explanations)

	return {
	"classification": final_classification,
	# Return logical confidence (prob of the chosen class)
	"confidenceScore": round(float(class_confidence), 2),
	"explanation": final_explanation
	}

	except Exception as e:
	print(f"Analysis Failed: {e}")
	return {
	"classification": "HUMAN", # Fail safe
	"confidenceScore": 0.0,
	"error": str(e),
	"explanation": "Analysis failed due to internal error."
	}

	# Global Instance
	detector = AudioDetector()