KaushiGihan commited on
Commit
7ac54b9
·
1 Parent(s): 9e98ed6

add the app.py file

Browse files
Files changed (4) hide show
  1. README.md +86 -0
  2. app.py +59 -0
  3. deepfake.py +62 -0
  4. requirements.txt +18 -0
README.md CHANGED
@@ -12,3 +12,89 @@ short_description: A machine learning-based system for detecting deepfake
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+
17
+
18
+ # Deepfake-voice_detection_Yamnet
19
+ This repository contains a machine learning-based system to detect deepfake (synthetic) voices. The system utilizes audio feature extraction techniques such as YAMNet and deep learning models (ANN, CNN, RNN) to differentiate between real and fake audio.
20
+
21
+ # 🚀 Features
22
+ - Audio Preprocessing: Converts raw audio into meaningful features.
23
+ - Feature Extraction: Uses YAMNet to extract embeddings from audio signals.
24
+ - Dataset : Uses the kaggle "In The Wild (audio Deepfake)" dataset for training the model.[Link](https://www.kaggle.com/datasets/abdallamohamed312/in-the-wild-audio-deepfake)
25
+ - Deep Learning Models: Implements ANN, CNN, and RNN architectures for classification.
26
+ - Training & Evaluation: Trains models with labeled datasets and evaluates accuracy.
27
+ - Inference API: Provides an API to classify input audio as real or deepfake.
28
+ 1. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/Screenshot%20(97).png)
29
+ 2. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/Screenshot%20(98).png)
30
+
31
+ # Model Performance
32
+
33
+ 1. ANN model atchitecture
34
+ - Model training stat: Accuracy and Val_lose
35
+ 1. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/ann%20s.png)
36
+ - Evaluation Metrics: Accuracy, Precision, Recall, F1-score
37
+ 2. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/ann%20c.png)
38
+
39
+ precision recall f1-score support
40
+ fake 0.97 0.95 0.96 446
41
+ real 0.96 0.98 0.97 554
42
+
43
+ accuracy 0.97 1000
44
+ macro avg 0.97 0.97 0.97 1000
45
+ weighted avg 0.97 0.97 0.97 1000
46
+
47
+
48
+ 2. CNN model atchitecture
49
+ - Model training stat: Accuracy and Val_lose
50
+ 1. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/cnn%20s.png)
51
+ - Evaluation Metrics: Accuracy, Precision, Recall, F1-score
52
+ 2. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/cnn%20c.png)
53
+
54
+ precision recall f1-score support
55
+ fake 0.97 0.96 0.97 446
56
+ real 0.97 0.97 0.97 554
57
+
58
+ accuracy 0.97 1000
59
+ macro avg 0.97 0.97 0.97 1000
60
+ weighted avg 0.97 0.97 0.97 1000
61
+
62
+
63
+ 3. RNN model atchitecture
64
+ - Model training stat: Accuracy and Val_lose
65
+ 1. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/rnn%20s.png)
66
+ - Evaluation Metrics: Accuracy, Precision, Recall, F1-score
67
+ 2. ![image](https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet/blob/main/src_img/rnn%20c.png)
68
+
69
+ precision recall f1-score support
70
+ fake 0.94 0.93 0.94 446
71
+ real 0.94 0.95 0.95 554
72
+
73
+ accuracy 0.94 1000
74
+ macro avg 0.94 0.94 0.94 1000
75
+ weighted avg 0.94 0.94 0.94 1000
76
+
77
+ Best Model Achieved: ANN with 97% accuracy
78
+
79
+
80
+ # 🛠️ Setup & Installation
81
+
82
+ 1.Clone the repository:
83
+ ```python
84
+ git clone https://github.com/KaushiML3/Deepfake-voice-detection_Yamnet.git
85
+ cd Deepfake-voice-detection_Yamnet
86
+ ```
87
+
88
+ 2.Install dependencies:
89
+ ```python
90
+ pip install -r requirements.txt
91
+
92
+ ```
93
+
94
+ 3.Run inference
95
+ - change the direction for API folder
96
+ ```python
97
+ python main.py
98
+
99
+ ```
100
+
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ import librosa
5
+ #import librosa.display
6
+ import io
7
+ import os
8
+
9
+ from deepfake import infa_deepfake
10
+
11
+
12
+ current_direction = os.path.dirname(os.path.abspath(__file__))
13
+ # Streamlit UI
14
+ st.title("Deepfake Audio Detection")
15
+ st.write("Upload an audio file to check if it's real or fake.")
16
+
17
+
18
+ st.header("Prediction for sample audio")
19
+ # **Sample Audio Selection**
20
+ sample_options = {
21
+ "Choose a sample...": None,
22
+ "Real Sample": os.path.join(current_direction,"sample_audio/fake2.mp3"),
23
+ "Fake Sample": os.path.join(current_direction,"sample_audio/fake3.mp3")
24
+ }
25
+
26
+ selected_sample = st.selectbox("Select a sample audio for testing:", list(sample_options.keys()))
27
+
28
+ if selected_sample != "Choose a sample...":
29
+ sample_path = sample_options[selected_sample]
30
+ print(sample_path)
31
+
32
+ report=infa_deepfake(sample_path)
33
+ if report["status"]==1:
34
+ # Display result
35
+ st.audio(sample_path, format='audio/wav')
36
+ st.write(f"**Prediction:** {report['class']}")
37
+ st.write(f"**Confidence Score:** {report['prediction']}")
38
+ else:
39
+ st.audio(sample_path, format='audio/wav')
40
+ st.write(f"**Message:** {report['class']}")
41
+
42
+
43
+
44
+ st.header("Prediction for your audio")
45
+
46
+ uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
47
+
48
+ if uploaded_file is not None:
49
+ # Read audio file
50
+ report=infa_deepfake(io.BytesIO(uploaded_file.read()))
51
+ if report["status"]==1:
52
+ # Display result
53
+ st.audio(uploaded_file, format='audio/wav')
54
+ st.write(f"**Prediction:** {report['class']}")
55
+ st.write(f"**Confidence Score:** {report['prediction']}")
56
+ else:
57
+ st.audio(uploaded_file, format='audio/wav')
58
+ st.write(f"**Message:** {report['class']}")
59
+
deepfake.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import librosa
4
+ import os
5
+
6
+ import tensorflow as tf
7
+ import tensorflow_hub as hub
8
+
9
+ # Reload the model
10
+ current_direction = os.path.dirname(os.path.abspath(__file__))
11
+ deepfake_model = tf.saved_model.load(os.path.join(current_direction,"ann_human_or_bot"))
12
+
13
+
14
+ def load_wav_16k_mono(filename):
15
+ try:
16
+
17
+ """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
18
+ sound_sample,sr=librosa.load(filename ,sr=16000)
19
+ return 1,sound_sample
20
+
21
+ except Exception as e:
22
+ return 0,str(e)
23
+
24
+
25
+
26
+ def infa_deepfake(audio_path):
27
+ try:
28
+ status,testing_wav_data = load_wav_16k_mono(audio_path)
29
+
30
+ # Reload the model
31
+ #reloaded_model = tf.saved_model.load(model_path)
32
+
33
+ # If it's a saved model, access the signature
34
+ infer = deepfake_model.signatures['serving_default']
35
+ if status ==1:
36
+ # Now use the model for prediction, passing in the necessary inputs (e.g., audio data)
37
+ # Make sure 'testing_wav_data' is prepared in the required shape/format
38
+ input_tensor = tf.convert_to_tensor(testing_wav_data, dtype=tf.float32)
39
+
40
+ # Get the prediction output
41
+ output = infer(input_tensor)
42
+ predictions = output['output_0'] # Adjust 'output_0' based on your model's output signature
43
+
44
+ #print(predictions)
45
+ my_classes=['FAKE', 'REAL']
46
+
47
+ human_bot = my_classes[tf.math.argmax(predictions)]
48
+ #print(f'The main sound is: {human_bot}')
49
+
50
+ score=dict(zip(my_classes,list(predictions.numpy())))
51
+
52
+ return {"status":1,"class":human_bot,"prediction":score}
53
+ else:
54
+
55
+ return {"status":0,"class":testing_wav_data,"prediction":None}
56
+
57
+ except Exception as e:
58
+ return {"status":0,"class":str(e),"prediction":None}
59
+
60
+
61
+ #report=infa_deepfake("sample_audio/fake2.mp3")
62
+ #print(report)
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensorflow==2.18.0
2
+ keras==3.8.0
3
+ #transformers==4.48.3
4
+ scikit-learn==1.6.1
5
+ #torch==2.5.1
6
+
7
+ numpy==2.0.2
8
+ pandas==2.2.3
9
+ librosa==0.10.2
10
+ tensorflow_hub==0.16.1
11
+
12
+ streamlit
13
+ pip install matplotlib
14
+
15
+
16
+ #fastapi==0.115.8
17
+ #uvicorn==0.34.0
18
+ python-multipart==0.0.20