Spaces:

mariam-ahmed15
/

Deepfake-audio-detection

Running

File size: 1,840 Bytes

b7e88e7
 
 
 
 
 
 
3fc49d8
b7e88e7
3fc49d8
b7e88e7
 
 
 
3fc49d8
b7e88e7
 
 
 
3fc49d8
b7e88e7
 
 
 
3fc49d8
b7e88e7
 
 
 
3fc49d8
b7e88e7
 
 
 
 
 
 
 
 
 
 
 
 
 
3fc49d8
b7e88e7
 
 
 
 
 
 
 
3fc49d8
b7e88e7
 
3fc49d8
 
 
 
 
b7e88e7
 
3fc49d8
b7e88e7

import torch
import gradio as gr
import librosa
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor

# 1. CONFIGURATION
MODEL_ID = "facebook/wav2vec2-xls-r-300m"
QUANTIZED_MODEL_PATH = "quantized_model.pth"

# 2. LOAD MODEL
print("Loading model architecture...")
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)

# Apply quantization structure
model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

# Load weights
print("Loading quantized weights...")
model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu')))
model.eval()

# 3. PREDICTION FUNCTION
def predict_audio(audio_path):
    if audio_path is None:
        return "No Audio Provided"
    
    # Load and resample
    speech_array, sr = librosa.load(audio_path, sr=16000)
    
    inputs = feature_extractor(
        speech_array, 
        sampling_rate=16000, 
        return_tensors="pt", 
        padding=True
    )
    
    with torch.no_grad():
        logits = model(**inputs).logits
    
    probs = torch.nn.functional.softmax(logits, dim=-1)
    
    # Label 0 = Real, Label 1 = Deepfake (Double check your own labels!)
    fake_prob = probs[0][1].item()
    real_prob = probs[0][0].item()
    
    return {
        "Deepfake": fake_prob, 
        "Real": real_prob
    }

# 4. CREATE INTERFACE (Modified for Upload Only)
iface = gr.Interface(
    fn=predict_audio,
    inputs=gr.Audio(
        sources=["upload"],  
        type="filepath",
        label="Upload Audio File"
    ), 
    outputs=gr.Label(num_top_classes=2),
    title="Deepfake Audio Detection API",
    description="Upload an audio file (WAV/MP3) to check if it's real or fake."
)

iface.launch()