Spaces:
Sleeping
Sleeping
| import base64 | |
| import io | |
| import numpy as np | |
| import librosa | |
| from fastapi import FastAPI, Header, HTTPException, Depends | |
| from pydantic import BaseModel | |
| from transformers import pipeline | |
| API_KEY = "SynxsOG" | |
| SUPPORTED_LANGUAGES = {"Tamil", "English", "Hindi", "Malayalam", "Telugu"} | |
| MODEL_NAME = "MelodyMachine/Deepfake-audio-detection-V2" | |
| detector = pipeline("audio-classification", model=MODEL_NAME) | |
| app = FastAPI(title="AI Generated Voice Detection API") | |
| class VoiceRequest(BaseModel): | |
| language: str | |
| audioFormat: str | |
| audioBase64: str | |
| def verify_api_key(x_api_key: str = Header(None)): | |
| if x_api_key != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid API key") | |
| return x_api_key | |
| def load_audio(base64_audio: str): | |
| clean_base64 = base64_audio.split(",")[-1] | |
| audio_bytes = base64.b64decode(clean_base64) | |
| y, sr = librosa.load(io.BytesIO(audio_bytes), sr=16000) | |
| return y, sr | |
| def get_forensics(y, sr): | |
| # Forensic analysis to check for natural variation | |
| pitch = librosa.yin(y, fmin=50, fmax=300) | |
| pitch_std = np.nanstd(pitch) | |
| rms = librosa.feature.rms(y=y)[0] | |
| rms_std = np.std(rms) | |
| return {"pitch_std": pitch_std, "rms_std": rms_std} | |
| def hybrid_decision(model_label, model_score, forensic_data): | |
| if model_label == "LABEL_1": | |
| is_ai = True | |
| base_prob = model_score | |
| else: | |
| is_ai = False | |
| base_prob = 1 - model_score | |
| adjustment = 0 | |
| if forensic_data["pitch_std"] > 15: | |
| adjustment -= 0.10 | |
| if forensic_data["rms_std"] > 0.01: | |
| adjustment -= 0.05 | |
| final_ai_prob = max(0.01, min(base_prob + adjustment, 0.99)) | |
| if final_ai_prob <0.5: | |
| return "AI_GENERATED", round(final_ai_prob, 4) | |
| else: | |
| return "HUMAN", round(1 - final_ai_prob, 4) | |
| async def detect_voice(data: VoiceRequest, api_key: str = Depends(verify_api_key)): | |
| try: | |
| y, sr = load_audio(data.audioBase64) | |
| preds = detector(y) | |
| top = preds[0] | |
| f_data = get_forensics(y, sr) | |
| classification, confidence = hybrid_decision(top["label"], top["score"], f_data) | |
| return { | |
| "status": "success", | |
| "classification": classification, | |
| "language":data.language, | |
| "confidenceScore": confidence, | |
| "explanation": "Verified via hybrid spectral and forensic analysis.", | |
| } | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} |