Spaces:
Build error
Build error
Commit ·
7ac54b9
1
Parent(s): 9e98ed6
add the app.py file
Browse files- README.md +86 -0
- app.py +59 -0
- deepfake.py +62 -0
- requirements.txt +18 -0
README.md
CHANGED
|
@@ -12,3 +12,89 @@ short_description: A machine learning-based system for detecting deepfake
|
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Deepfake-voice_detection_Yamnet
|
| 19 |
+
This repository contains a machine learning-based system to detect deepfake (synthetic) voices. The system utilizes audio feature extraction techniques such as YAMNet and deep learning models (ANN, CNN, RNN) to differentiate between real and fake audio.
|
| 20 |
+
|
| 21 |
+
# 🚀 Features
|
| 22 |
+
- Audio Preprocessing: Converts raw audio into meaningful features.
|
| 23 |
+
- Feature Extraction: Uses YAMNet to extract embeddings from audio signals.
|
| 24 |
+
- Dataset : Uses the kaggle "In The Wild (audio Deepfake)" dataset for training the model.[Link](https://www.kaggle.com/datasets/abdallamohamed312/in-the-wild-audio-deepfake)
|
| 25 |
+
- Deep Learning Models: Implements ANN, CNN, and RNN architectures for classification.
|
| 26 |
+
- Training & Evaluation: Trains models with labeled datasets and evaluates accuracy.
|
| 27 |
+
- Inference API: Provides an API to classify input audio as real or deepfake.
|
| 28 |
+
1. .png)
|
| 29 |
+
2. .png)
|
| 30 |
+
|
| 31 |
+
# Model Performance
|
| 32 |
+
|
| 33 |
+
1. ANN model atchitecture
|
| 34 |
+
- Model training stat: Accuracy and Val_lose
|
| 35 |
+
1. 
|
| 36 |
+
- Evaluation Metrics: Accuracy, Precision, Recall, F1-score
|
| 37 |
+
2. 
|
| 38 |
+
|
| 39 |
+
precision recall f1-score support
|
| 40 |
+
fake 0.97 0.95 0.96 446
|
| 41 |
+
real 0.96 0.98 0.97 554
|
| 42 |
+
|
| 43 |
+
accuracy 0.97 1000
|
| 44 |
+
macro avg 0.97 0.97 0.97 1000
|
| 45 |
+
weighted avg 0.97 0.97 0.97 1000
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
2. CNN model atchitecture
|
| 49 |
+
- Model training stat: Accuracy and Val_lose
|
| 50 |
+
1. 
|
| 51 |
+
- Evaluation Metrics: Accuracy, Precision, Recall, F1-score
|
| 52 |
+
2. 
|
| 53 |
+
|
| 54 |
+
precision recall f1-score support
|
| 55 |
+
fake 0.97 0.96 0.97 446
|
| 56 |
+
real 0.97 0.97 0.97 554
|
| 57 |
+
|
| 58 |
+
accuracy 0.97 1000
|
| 59 |
+
macro avg 0.97 0.97 0.97 1000
|
| 60 |
+
weighted avg 0.97 0.97 0.97 1000
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
3. RNN model atchitecture
|
| 64 |
+
- Model training stat: Accuracy and Val_lose
|
| 65 |
+
1. 
|
| 66 |
+
- Evaluation Metrics: Accuracy, Precision, Recall, F1-score
|
| 67 |
+
2. 
|
| 68 |
+
|
| 69 |
+
precision recall f1-score support
|
| 70 |
+
fake 0.94 0.93 0.94 446
|
| 71 |
+
real 0.94 0.95 0.95 554
|
| 72 |
+
|
| 73 |
+
accuracy 0.94 1000
|
| 74 |
+
macro avg 0.94 0.94 0.94 1000
|
| 75 |
+
weighted avg 0.94 0.94 0.94 1000
|
| 76 |
+
|
| 77 |
+
Best Model Achieved: ANN with 97% accuracy
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# 🛠️ Setup & Installation
|
| 81 |
+
|
| 82 |
+
1.Clone the repository:
|
| 83 |
+
```python
|
| 84 |
+
git clone https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet.git
|
| 85 |
+
cd Deepfake-voice-detection_Yamnet
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
2.Install dependencies:
|
| 89 |
+
```python
|
| 90 |
+
pip install -r requirements.txt
|
| 91 |
+
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
3.Run inference
|
| 95 |
+
- change the direction for API folder
|
| 96 |
+
```python
|
| 97 |
+
python main.py
|
| 98 |
+
|
| 99 |
+
```
|
| 100 |
+
|
app.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import librosa
|
| 5 |
+
#import librosa.display
|
| 6 |
+
import io
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
from deepfake import infa_deepfake
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
current_direction = os.path.dirname(os.path.abspath(__file__))
|
| 13 |
+
# Streamlit UI
|
| 14 |
+
st.title("Deepfake Audio Detection")
|
| 15 |
+
st.write("Upload an audio file to check if it's real or fake.")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
st.header("Prediction for sample audio")
|
| 19 |
+
# **Sample Audio Selection**
|
| 20 |
+
sample_options = {
|
| 21 |
+
"Choose a sample...": None,
|
| 22 |
+
"Real Sample": os.path.join(current_direction,"sample_audio/fake2.mp3"),
|
| 23 |
+
"Fake Sample": os.path.join(current_direction,"sample_audio/fake3.mp3")
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
selected_sample = st.selectbox("Select a sample audio for testing:", list(sample_options.keys()))
|
| 27 |
+
|
| 28 |
+
if selected_sample != "Choose a sample...":
|
| 29 |
+
sample_path = sample_options[selected_sample]
|
| 30 |
+
print(sample_path)
|
| 31 |
+
|
| 32 |
+
report=infa_deepfake(sample_path)
|
| 33 |
+
if report["status"]==1:
|
| 34 |
+
# Display result
|
| 35 |
+
st.audio(sample_path, format='audio/wav')
|
| 36 |
+
st.write(f"**Prediction:** {report['class']}")
|
| 37 |
+
st.write(f"**Confidence Score:** {report['prediction']}")
|
| 38 |
+
else:
|
| 39 |
+
st.audio(sample_path, format='audio/wav')
|
| 40 |
+
st.write(f"**Message:** {report['class']}")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
st.header("Prediction for your audio")
|
| 45 |
+
|
| 46 |
+
uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
|
| 47 |
+
|
| 48 |
+
if uploaded_file is not None:
|
| 49 |
+
# Read audio file
|
| 50 |
+
report=infa_deepfake(io.BytesIO(uploaded_file.read()))
|
| 51 |
+
if report["status"]==1:
|
| 52 |
+
# Display result
|
| 53 |
+
st.audio(uploaded_file, format='audio/wav')
|
| 54 |
+
st.write(f"**Prediction:** {report['class']}")
|
| 55 |
+
st.write(f"**Confidence Score:** {report['prediction']}")
|
| 56 |
+
else:
|
| 57 |
+
st.audio(uploaded_file, format='audio/wav')
|
| 58 |
+
st.write(f"**Message:** {report['class']}")
|
| 59 |
+
|
deepfake.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import librosa
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
import tensorflow as tf
|
| 7 |
+
import tensorflow_hub as hub
|
| 8 |
+
|
| 9 |
+
# Reload the model
|
| 10 |
+
current_direction = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
deepfake_model = tf.saved_model.load(os.path.join(current_direction,"ann_human_or_bot"))
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_wav_16k_mono(filename):
|
| 15 |
+
try:
|
| 16 |
+
|
| 17 |
+
""" Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
|
| 18 |
+
sound_sample,sr=librosa.load(filename ,sr=16000)
|
| 19 |
+
return 1,sound_sample
|
| 20 |
+
|
| 21 |
+
except Exception as e:
|
| 22 |
+
return 0,str(e)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def infa_deepfake(audio_path):
|
| 27 |
+
try:
|
| 28 |
+
status,testing_wav_data = load_wav_16k_mono(audio_path)
|
| 29 |
+
|
| 30 |
+
# Reload the model
|
| 31 |
+
#reloaded_model = tf.saved_model.load(model_path)
|
| 32 |
+
|
| 33 |
+
# If it's a saved model, access the signature
|
| 34 |
+
infer = deepfake_model.signatures['serving_default']
|
| 35 |
+
if status ==1:
|
| 36 |
+
# Now use the model for prediction, passing in the necessary inputs (e.g., audio data)
|
| 37 |
+
# Make sure 'testing_wav_data' is prepared in the required shape/format
|
| 38 |
+
input_tensor = tf.convert_to_tensor(testing_wav_data, dtype=tf.float32)
|
| 39 |
+
|
| 40 |
+
# Get the prediction output
|
| 41 |
+
output = infer(input_tensor)
|
| 42 |
+
predictions = output['output_0'] # Adjust 'output_0' based on your model's output signature
|
| 43 |
+
|
| 44 |
+
#print(predictions)
|
| 45 |
+
my_classes=['FAKE', 'REAL']
|
| 46 |
+
|
| 47 |
+
human_bot = my_classes[tf.math.argmax(predictions)]
|
| 48 |
+
#print(f'The main sound is: {human_bot}')
|
| 49 |
+
|
| 50 |
+
score=dict(zip(my_classes,list(predictions.numpy())))
|
| 51 |
+
|
| 52 |
+
return {"status":1,"class":human_bot,"prediction":score}
|
| 53 |
+
else:
|
| 54 |
+
|
| 55 |
+
return {"status":0,"class":testing_wav_data,"prediction":None}
|
| 56 |
+
|
| 57 |
+
except Exception as e:
|
| 58 |
+
return {"status":0,"class":str(e),"prediction":None}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
#report=infa_deepfake("sample_audio/fake2.mp3")
|
| 62 |
+
#print(report)
|
requirements.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorflow==2.18.0
|
| 2 |
+
keras==3.8.0
|
| 3 |
+
#transformers==4.48.3
|
| 4 |
+
scikit-learn==1.6.1
|
| 5 |
+
#torch==2.5.1
|
| 6 |
+
|
| 7 |
+
numpy==2.0.2
|
| 8 |
+
pandas==2.2.3
|
| 9 |
+
librosa==0.10.2
|
| 10 |
+
tensorflow_hub==0.16.1
|
| 11 |
+
|
| 12 |
+
streamlit
|
| 13 |
+
pip install matplotlib
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
#fastapi==0.115.8
|
| 17 |
+
#uvicorn==0.34.0
|
| 18 |
+
python-multipart==0.0.20
|