bhoumik12 commited on
Commit
960b635
·
verified ·
1 Parent(s): d461d9b

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +147 -0
  2. audio_backend.py +23 -0
  3. audio_utils.py +42 -0
  4. image_backend.py +89 -0
  5. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ # ---- IMPORT BACKENDS ----
4
+ from image_backend import predict_image_pil
5
+ from audio_backend import predict_audio
6
+
7
+
8
+ # =========================
9
+ # IMAGE LOGIC (UNCHANGED)
10
+ # =========================
11
+ def analyze_image(image):
12
+ label, confidence, heatmap = predict_image_pil(image)
13
+
14
+ if label == "Fake":
15
+ if confidence >= 90:
16
+ risk = "🚨 High likelihood of Deepfake"
17
+ elif confidence >= 60:
18
+ risk = "⚠️ Possibly Deepfake"
19
+ else:
20
+ risk = "⚠️ Uncertain Deepfake"
21
+ else:
22
+ if confidence >= 90:
23
+ risk = "✅ Likely Real"
24
+ elif confidence >= 60:
25
+ risk = "⚠️ Possibly Real"
26
+ else:
27
+ risk = "⚠️ Uncertain – Needs Review"
28
+
29
+ return label, f"{confidence} %", risk, heatmap
30
+
31
+
32
+ # =========================
33
+ # AUDIO LOGIC (UNCHANGED)
34
+ # =========================
35
+ def analyze_audio(audio_path):
36
+ label, confidence = predict_audio(audio_path)
37
+
38
+ if label == "fake":
39
+ if confidence >= 90:
40
+ risk = "🚨 High likelihood of Deepfake"
41
+ elif confidence >= 60:
42
+ risk = "⚠️ Possibly Deepfake"
43
+ else:
44
+ risk = "⚠️ Uncertain – Needs Review"
45
+ else:
46
+ if confidence >= 90:
47
+ risk = "✅ Likely Real"
48
+ elif confidence >= 60:
49
+ risk = "⚠️ Possibly Real"
50
+ else:
51
+ risk = "⚠️ Uncertain – Needs Review"
52
+
53
+ return label.capitalize(), f"{confidence} %", risk
54
+
55
+
56
+ # =========================
57
+ # UI (EXACT SAME)
58
+ # =========================
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("# 🧠 Unified Deepfake Detection System")
61
+
62
+ with gr.Tabs():
63
+
64
+ # HOME TAB
65
+ with gr.Tab("🏠 Home"):
66
+ gr.Markdown(
67
+ """
68
+ ## Welcome 👋
69
+ Select the type of media you want to analyze:
70
+ """
71
+ )
72
+
73
+ gr.Markdown("### 🔍 Choose Detection Mode")
74
+ gr.Markdown("- 🖼 **Image Deepfake Detection**\n- 🎧 **Audio Deepfake Detection**")
75
+
76
+ gr.Markdown(
77
+ """
78
+ 👉 Use the tabs above to switch between Image and Audio detection.
79
+ """
80
+ )
81
+
82
+ # IMAGE TAB
83
+ with gr.Tab("🖼 Image Deepfake"):
84
+ gr.Markdown("# 🖼 Deepfake Image Detection System")
85
+
86
+ with gr.Row():
87
+ with gr.Column(scale=1):
88
+ image_input = gr.Image(
89
+ label="Upload Image",
90
+ type="pil",
91
+ height=280
92
+ )
93
+ img_submit = gr.Button("Submit")
94
+ img_clear = gr.Button("Clear")
95
+
96
+ with gr.Column(scale=2):
97
+ img_pred = gr.Text(label="Prediction")
98
+ img_conf = gr.Text(label="Confidence")
99
+ img_risk = gr.Text(label="Risk Assessment")
100
+ img_heatmap = gr.Image(
101
+ label="Explainability Heatmap",
102
+ height=280
103
+ )
104
+
105
+ img_submit.click(
106
+ fn=analyze_image,
107
+ inputs=image_input,
108
+ outputs=[img_pred, img_conf, img_risk, img_heatmap]
109
+ )
110
+
111
+ img_clear.click(
112
+ fn=lambda: (None, "", "", None),
113
+ inputs=None,
114
+ outputs=[image_input, img_pred, img_conf, img_risk]
115
+ )
116
+
117
+ # AUDIO TAB
118
+ with gr.Tab("🎧 Audio Deepfake"):
119
+ gr.Markdown("# 🎧 Deepfake Audio Detection System")
120
+
121
+ with gr.Row():
122
+ with gr.Column(scale=1):
123
+ audio_input = gr.Audio(
124
+ label="Upload Audio (.wav)",
125
+ type="filepath"
126
+ )
127
+ aud_submit = gr.Button("Submit")
128
+ aud_clear = gr.Button("Clear")
129
+
130
+ with gr.Column(scale=2):
131
+ aud_pred = gr.Text(label="Prediction")
132
+ aud_conf = gr.Text(label="Confidence")
133
+ aud_risk = gr.Text(label="Risk Assessment")
134
+
135
+ aud_submit.click(
136
+ fn=analyze_audio,
137
+ inputs=audio_input,
138
+ outputs=[aud_pred, aud_conf, aud_risk]
139
+ )
140
+
141
+ aud_clear.click(
142
+ fn=lambda: (None, "", ""),
143
+ inputs=None,
144
+ outputs=[audio_input, aud_pred, aud_conf]
145
+ )
146
+
147
+ demo.launch()
audio_backend.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from audio_utils import audio_to_spectrogram
4
+
5
+ MODEL_PATH = "models/audio_vit_savedmodel"
6
+
7
+ model = tf.saved_model.load(MODEL_PATH)
8
+ infer = model.signatures["serving_default"]
9
+
10
+
11
+ def predict_audio(wav_file):
12
+ spec_img = audio_to_spectrogram(wav_file)
13
+
14
+ x = spec_img.astype("float32") / 255.0
15
+ x = np.expand_dims(x, axis=0)
16
+
17
+ preds = infer(tf.constant(x))
18
+ prob = list(preds.values())[0].numpy()[0][0]
19
+
20
+ label = "Fake" if prob >= 0.5 else "Real"
21
+ confidence = prob * 100
22
+
23
+ return label, round(confidence, 2), spec_img
audio_utils.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import cv2
4
+
5
+ SR = 16000
6
+ DURATION = 4.0
7
+ N_MELS = 192
8
+ N_FFT = 2048
9
+ HOP_LENGTH = 160
10
+ IMG_SIZE = 224
11
+
12
+
13
+ def audio_to_spectrogram(wav_path):
14
+
15
+ y, _ = librosa.load(wav_path, sr=SR)
16
+ y, _ = librosa.effects.trim(y, top_db=30)
17
+
18
+ target = int(SR * DURATION)
19
+
20
+ if len(y) < target:
21
+ pad = target - len(y)
22
+ y = np.pad(y, (pad // 2, pad - pad // 2))
23
+ else:
24
+ y = y[:target]
25
+
26
+ mel = librosa.feature.melspectrogram(
27
+ y=y,
28
+ sr=SR,
29
+ n_fft=N_FFT,
30
+ hop_length=HOP_LENGTH,
31
+ n_mels=N_MELS
32
+ )
33
+
34
+ logmel = librosa.power_to_db(mel, ref=np.max)
35
+
36
+ logmel = (logmel - logmel.min()) / (logmel.max() - logmel.min())
37
+
38
+ img = (logmel * 255).astype(np.uint8)
39
+ img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
40
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
41
+
42
+ return img
image_backend.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torchvision import transforms
3
+ from transformers import ViTForImageClassification, ViTConfig
4
+ from PIL import Image
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import io
8
+ import os
9
+
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+
12
+ config = ViTConfig.from_pretrained(
13
+ "google/vit-base-patch16-224",
14
+ num_labels=2,
15
+ output_attentions=True
16
+ )
17
+
18
+ model = ViTForImageClassification.from_pretrained(
19
+ "google/vit-base-patch16-224",
20
+ config=config,
21
+ ignore_mismatched_sizes=True
22
+ )
23
+
24
+ model.load_state_dict(
25
+ torch.load("model/vit_real_fake_best.pth", map_location=device)
26
+ )
27
+
28
+ model.to(device)
29
+ model.eval()
30
+
31
+
32
+ transform = transforms.Compose([
33
+ transforms.Resize((224, 224)),
34
+ transforms.ToTensor(),
35
+ transforms.Normalize(
36
+ [0.485, 0.456, 0.406],
37
+ [0.229, 0.224, 0.225]
38
+ )
39
+ ])
40
+
41
+
42
+ def get_attention_map(model, img_tensor):
43
+ with torch.no_grad():
44
+ outputs = model(img_tensor, output_attentions=True)
45
+ attn = outputs.attentions[-1].mean(dim=1)[0]
46
+ cls_attn = attn[0, 1:]
47
+
48
+ grid = int(cls_attn.size(0) ** 0.5)
49
+ cls_attn = cls_attn.reshape(grid, grid).cpu().numpy()
50
+
51
+ cls_attn = (cls_attn - cls_attn.min()) / (cls_attn.max() - cls_attn.min())
52
+ return cls_attn
53
+
54
+
55
+ def overlay(image, heatmap):
56
+ heatmap = np.uint8(255 * heatmap)
57
+ heatmap = Image.fromarray(heatmap).resize(image.size)
58
+
59
+ fig, ax = plt.subplots(figsize=(4, 4))
60
+ ax.imshow(image)
61
+ ax.imshow(heatmap, cmap="jet", alpha=0.5)
62
+ ax.axis("off")
63
+
64
+ buf = io.BytesIO()
65
+ plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0)
66
+ plt.close(fig)
67
+ buf.seek(0)
68
+
69
+ return Image.open(buf)
70
+
71
+
72
+ def predict_image_pil(image):
73
+ image = image.convert("RGB")
74
+
75
+ x = transform(image).unsqueeze(0).to(device)
76
+
77
+ with torch.no_grad():
78
+ outputs = model(x)
79
+ logits = outputs.logits
80
+ pred = torch.argmax(logits, dim=1).item()
81
+
82
+ label = "Fake" if pred == 0 else "Real"
83
+
84
+ heat = get_attention_map(model, x)
85
+ heatmap_img = overlay(image, heat)
86
+
87
+ confidence = torch.softmax(logits, dim=1)[0][pred].item() * 100
88
+
89
+ return label, round(confidence, 2), heatmap_img
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ transformers
5
+ tensorflow
6
+ librosa
7
+ opencv-python
8
+ matplotlib
9
+ pillow
10
+ numpy