Spaces:

ranar118
/

voice_detection

Sleeping

ranar110

Fix: Update response format to match Multi-Language problem statement (classification key)

4f7e968 3 months ago

4.12 kB

	import os
	import warnings

	# Suppress warnings
	warnings.filterwarnings("ignore")

	# Global model cache
	MODEL_CACHE = {}
	MODEL_NAME = "MelodyMachine/Deepfake-audio-detection" # A good starting model from HF

	def load_model():
	"""Load the model and feature extractor if not already loaded."""
	if MODEL_CACHE.get("model") is None:
	print(f"Loading model: {MODEL_NAME}...")
	try:
	# Lazy import to prevent startup timeout
	from transformers import AutoFeatureExtractor, AutoModelForAudioClassification

	# Load feature extractor
	feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
	# Load model with memory optimization
	model = AutoModelForAudioClassification.from_pretrained(
	MODEL_NAME,
	low_cpu_mem_usage=True
	)

	MODEL_CACHE["feature_extractor"] = feature_extractor
	MODEL_CACHE["model"] = model
	print("Model loaded successfully.")
	except Exception as e:
	print(f"Error loading model: {e}")
	return None, None

	return MODEL_CACHE["model"], MODEL_CACHE["feature_extractor"]

	def preprocess_audio(file_path, max_duration=10):
	"""Load and preprocess audio file for the model."""
	try:
	# Lazy import
	import librosa
	import numpy as np

	# Load audio file (resample to 16kHz as typically required by Wav2Vec2)
	audio, sample_rate = librosa.load(file_path, sr=16000, duration=max_duration)
	return audio, sample_rate
	except Exception as e:
	print(f"Error preprocessing audio: {e}")
	return None, None

	def analyze_audio_real(metadata):
	"""
	Run actual AI inference on the audio file.
	Replaces the mock logic with real Deep Learning model predictions.
	"""
	# Lazy import
	import torch

	file_path = metadata.get('file_path')

	if not file_path or not os.path.exists(file_path):
	return {
	"error": "File not found",
	"is_human": None,
	"confidence": 0.0
	}

	# Load model
	model, feature_extractor = load_model()
	if not model or not feature_extractor:
	# Fallback if model fails to load (e.g. no internet/memory)
	return {
	"error": "Model failed to load",
	"is_human": None,
	"confidence": 0.0
	}

	try:
	# Preprocess
	audio, sr = preprocess_audio(file_path)
	if audio is None:
	return {"error": "Invalid audio file", "is_human": None}

	# Prepare inputs
	inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt")

	# Inference
	with torch.no_grad():
	logits = model(**inputs).logits

	# Get probabilities (softmax)
	probs = torch.nn.functional.softmax(logits, dim=-1)

	# Get predicted label and score
	predicted_id = torch.argmax(logits, dim=-1).item()
	confidence = probs[0][predicted_id].item()

	id2label = model.config.id2label
	predicted_label = id2label[predicted_id]

	# Logic: if label contains "real" or "bona-fide", it's human
	is_human = "real" in predicted_label.lower() or "bona" in predicted_label.lower()

	# Return structured analysis
	return {
	"is_human": is_human,
	"classification": "HUMAN" if is_human else "AI_GENERATED",
	"confidence": round(confidence, 4),
	"detected_language": "analyzed",
	"model_used": MODEL_NAME,
	"raw_label": predicted_label,
	"segments": [
	{"start": 0.0, "end": min(metadata.get('duration_seconds', 0), 10.0), "label": predicted_label}
	]
	}

	except Exception as e:
	print(f"Inference error: {e}")
	return {
	"error": str(e),
	"is_human": None,
	"confidence": 0.0
	}