Spaces:
Sleeping
Sleeping
File size: 4,866 Bytes
c5c9261 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import torch
import torch.nn.functional as F
import numpy as np
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
from app.config import settings
import logging
import gc
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class VoiceDetector:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(VoiceDetector, cls).__new__(cls)
cls._instance.model = None
cls._instance.feature_extractor = None
# Force CPU to save memory on free tier
cls._instance.device = "cpu"
cls._instance.load_model()
return cls._instance
def load_model(self):
try:
logger.info(f"Loading model {settings.MODEL_NAME} on {self.device}...")
# Clear memory before loading
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Load with memory optimization
self.feature_extractor = AutoFeatureExtractor.from_pretrained(
settings.MODEL_NAME
)
self.model = AutoModelForAudioClassification.from_pretrained(
settings.MODEL_NAME,
low_cpu_mem_usage=True, # Memory optimization
torch_dtype=torch.float32
)
self.model.to(self.device)
self.model.eval()
# Clear unused memory
gc.collect()
logger.info("Model loaded successfully.")
except Exception as e:
logger.error(f"Failed to load model: {e}")
raise RuntimeError(f"Failed to load model: {e}")
def calibrate_confidence(self, probs, temperature=1.5):
"""
Apply temperature scaling to calibrate confidence scores.
This makes the model less overconfident and more reliable.
Temperature > 1.0 makes predictions less confident (more realistic)
Temperature < 1.0 makes predictions more confident
"""
# Apply temperature scaling to logits before softmax
logits = torch.log(probs + 1e-10) # Convert back to logits
scaled_logits = logits / temperature
calibrated_probs = F.softmax(scaled_logits, dim=-1)
return calibrated_probs
def predict(self, audio_array):
"""
Refined prediction for stability.
"""
if self.model is None:
self.load_model()
try:
# Prepare input
inputs = self.feature_extractor(
audio_array,
sampling_rate=settings.SAMPLE_RATE,
return_tensors="pt",
padding=True
)
inputs = {key: val.to(self.device) for key, val in inputs.items()}
# Inference
with torch.no_grad():
logits = self.model(**inputs).logits
# Use raw softmax for the base confidence
probs = F.softmax(logits, dim=-1)
# Get model labels from config
id2label = self.model.config.id2label
# Get the predicted class index
pred_idx = torch.argmax(probs, dim=-1).item()
label = str(id2label[pred_idx]).lower()
confidence = probs[0][pred_idx].item()
logger.info(f"Model Raw Output: Index={pred_idx}, Label={label}, Confidence={confidence:.4f}")
# Robust Mapping Logic
# mo-thecreator/Deepfake-audio-detection usually uses:
# 0 -> REAL, 1 -> FAKE
is_ai = False
if "fake" in label or "spoof" in label:
is_ai = True
elif "real" in label or "bonafide" in label:
is_ai = False
else:
# Direct index mapping fallback (very safe for this specific model)
if pred_idx == 1:
is_ai = True
else:
is_ai = False
result_label = "AI_GENERATED" if is_ai else "HUMAN"
# Stability check: If confidence is too low (< 0.6),
# the model is essentially guessing.
if confidence < 0.6:
logger.info(f"Low confidence ({confidence:.4f}) detected. Result might be uncertain.")
return result_label, confidence
except Exception as e:
logger.error(f"Prediction error: {e}")
raise RuntimeError(f"Prediction failed: {e}")
voice_detector = VoiceDetector()
|