File size: 1,443 Bytes
2bfde2c db83cda 2bfde2c db83cda 2bfde2c db83cda 2bfde2c db83cda | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | import os
import torch
import librosa
import numpy as np
from transformers import (
AutoFeatureExtractor,
AutoModelForAudioClassification,
)
import gradio as gr
TOKEN = os.environ['HF_TOKEN']
MODEL_ID = "aitf-komdigi/KomdigiITS-86M-DFK-DeepfakeAudioClassification"
feature_extractor = AutoFeatureExtractor.from_pretrained(
MODEL_ID,
token=TOKEN,
)
model = AutoModelForAudioClassification.from_pretrained(
MODEL_ID,
token=TOKEN,
)
model.eval()
def predict(audio):
if audio is None:
return "No audio uploaded"
sr, waveform = audio
waveform = waveform.astype(np.float32)
if waveform.ndim > 1:
waveform = waveform.mean(axis=1)
waveform = librosa.resample(
waveform,
orig_sr=sr,
target_sr=16000,
)
inputs = feature_extractor(
waveform,
sampling_rate=16000,
return_tensors="pt",
)
with torch.no_grad():
logits = model(**inputs).logits
score = torch.sigmoid(logits).item()
prediction = "Fake" if score >= 0.5 else "Real"
return {
"Real": round(1.0 - score, 4),
"Fake": round(score, 4),
}
demo = gr.Interface(
fn=predict,
inputs=gr.Audio(
sources=["upload", "microphone"],
type="numpy",
),
outputs=gr.Label(),
title="Audio Deepfake Detection",
description="Detect whether an audio clip is real or AI-generated.",
)
demo.launch()
|