VoiceGuard-API / src /pipeline /detector.py
S-Vetrivel's picture
Refactor project structure to Unified AI Voice Detection System
b3f89f5
import yaml
import numpy as np
import os
import sys
# Add src to path if needed, or rely on root execution
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
from src.utils.audio import load_audio, to_tensor
from src.components.feature_extractor import FeatureExtractor
from src.components.rule_based import RuleBasedDetector
from src.components.model_wrapper import ModelWrapper
from src.utils.compatibility import apply_patches
# Apply dependency patches immediately
apply_patches()
class VoicePipeline:
def __init__(self, config_path: str = "config/hparams.yaml"):
self.config = self._load_config(config_path)
# Initialize components
self.feature_extractor = FeatureExtractor()
self.rule_detector = RuleBasedDetector(self.config.get("rules", {}))
self.model_wrapper = ModelWrapper(self.config.get("model", {}))
self.model_wrapper.config["vad"] = self.config.get("vad", {}) # Pass VAD config if separate
self.model_wrapper.load_vad() # Ensure VAD loaded
def _load_config(self, path: str) -> dict:
if not os.path.exists(path):
# Fallback default if config missing
print(f"Config not found at {path}, using defaults.")
return {}
with open(path, 'r') as f:
return yaml.safe_load(f)
def process(self, audio_bytes: bytes) -> dict:
"""
Process audio bytes and return classification result.
"""
try:
# 1. Load Audio
audio_array, sr = load_audio(audio_bytes)
# 2. Extract Features
features = self.feature_extractor.extract(audio_array, sr)
# 3. Rule-Based Check
rule_label, rule_prob, rule_expl = self.rule_detector.predict(features)
# 4. Model Prediction
# Convert to tensor for model
audio_tensor = to_tensor(audio_array)
model_prob = self.model_wrapper.predict(audio_tensor, sr)
# 5. Ensemble Logic
# If Model is very confident, trust it.
# If Model is unsure, check Rules.
# Weights from config
w_model = self.config.get("pipeline", {}).get("weights", {}).get("model", 0.7)
w_rules = self.config.get("pipeline", {}).get("weights", {}).get("rules", 0.3)
# Normalize rule prob (0.55/0.65 are arbitrary from reference, let's map to 0-1)
# If HUMAN (0.55) -> 0.2? If AI (0.65) -> 0.8?
# Let's just use the raw prob from rule detector if it makes sense,
# but rule detector retuns 0.65 for AI... that's low confidence.
# Let's map "AI_GENERATED" to 0.9 and "HUMAN" to 0.1 for the sake of weighted average
rule_score = 0.9 if rule_label == "AI_GENERATED" else 0.1
combined_score = (model_prob * w_model) + (rule_score * w_rules)
# Thresholds
thresh_ai = self.config.get("pipeline", {}).get("thresholds", {}).get("ai_generated", 0.70)
if combined_score >= thresh_ai:
final_label = "AI_GENERATED"
explanation = f"Detected synthetic patterns (Model: {model_prob:.2f}, Rules: {rule_label})"
else:
final_label = "HUMAN"
explanation = f"Natural speech patterns (Model: {model_prob:.2f}, Rules: {rule_label})"
return {
"classification": final_label,
"confidenceScore": float(combined_score),
"explanation": explanation,
"details": {
"model_probability": float(model_prob),
"rule_classification": rule_label,
"features": features # Optional: return features for debug
}
}
except Exception as e:
import traceback
traceback.print_exc()
return {
"error": str(e)
}