Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py β DeepFake
|
| 2 |
import os
|
| 3 |
import subprocess
|
| 4 |
import tempfile
|
|
@@ -16,7 +16,7 @@ import gradio as gr
|
|
| 16 |
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
|
| 17 |
|
| 18 |
# ==========================================
|
| 19 |
-
# 1. MODEL LOADING
|
| 20 |
# ==========================================
|
| 21 |
MODEL_NAME = "Hemgg/Deepfake-audio-detection"
|
| 22 |
print("[+] Loading AI forensic model...")
|
|
@@ -33,12 +33,9 @@ print(f"[+] Model loaded on {device}")
|
|
| 33 |
# 2. AUDIO / VIDEO PREPROCESSING
|
| 34 |
# ==========================================
|
| 35 |
def convert_to_audio(file_path):
|
| 36 |
-
"""Convert video to 16kHz mono WAV or pass audio through."""
|
| 37 |
ext = os.path.splitext(file_path)[1].lower().lstrip('.')
|
| 38 |
-
|
| 39 |
if ext in ["wav", "mp3", "flac", "m4a", "ogg", "aac", "wma"]:
|
| 40 |
return file_path
|
| 41 |
-
|
| 42 |
if ext in ["mp4", "mkv", "avi", "mov", "webm", "flv"]:
|
| 43 |
print("[+] Video detected β extracting audio via ffmpeg...")
|
| 44 |
out = tempfile.mktemp(suffix=".wav")
|
|
@@ -50,7 +47,6 @@ def convert_to_audio(file_path):
|
|
| 50 |
]
|
| 51 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
|
| 52 |
return out
|
| 53 |
-
|
| 54 |
raise ValueError(f"Unsupported file format: {ext}")
|
| 55 |
|
| 56 |
def load_audio(path):
|
|
@@ -59,25 +55,15 @@ def load_audio(path):
|
|
| 59 |
return audio
|
| 60 |
|
| 61 |
# ==========================================
|
| 62 |
-
# 3.
|
| 63 |
# ==========================================
|
| 64 |
def predict(audio):
|
| 65 |
-
inputs = extractor(
|
| 66 |
-
audio,
|
| 67 |
-
sampling_rate=16000,
|
| 68 |
-
return_tensors="pt",
|
| 69 |
-
padding=True
|
| 70 |
-
).to(device)
|
| 71 |
-
|
| 72 |
with torch.no_grad():
|
| 73 |
logits = model(**inputs).logits
|
| 74 |
probs = torch.softmax(logits, dim=-1)[0]
|
|
|
|
| 75 |
|
| 76 |
-
return float(probs[0]), float(probs[1]) # (human_prob, ai_prob)
|
| 77 |
-
|
| 78 |
-
# ==========================================
|
| 79 |
-
# 4. DSP FEATURE ANALYSIS
|
| 80 |
-
# ==========================================
|
| 81 |
def audio_features(audio):
|
| 82 |
mfcc = librosa.feature.mfcc(y=audio, sr=16000)
|
| 83 |
return {
|
|
@@ -85,261 +71,606 @@ def audio_features(audio):
|
|
| 85 |
"energy": float(np.mean(audio ** 2))
|
| 86 |
}
|
| 87 |
|
| 88 |
-
# ==========================================
|
| 89 |
-
# 5. ENSEMBLE SCORING ENGINE
|
| 90 |
-
# ==========================================
|
| 91 |
def analyze(file_path):
|
| 92 |
audio_path = convert_to_audio(file_path)
|
| 93 |
audio = load_audio(audio_path)
|
| 94 |
-
|
| 95 |
human_p, ai_p = predict(audio)
|
| 96 |
feats = audio_features(audio)
|
| 97 |
-
|
| 98 |
-
# Ensemble: 60% neural + 40% DSP anomaly signal
|
| 99 |
anomaly = (feats["mfcc"] / 500.0) + (feats["energy"] * 2.0)
|
| 100 |
ai_score = np.clip((ai_p * 0.6 + anomaly * 0.4), 0.0, 1.0)
|
| 101 |
|
| 102 |
if ai_score < 0.35:
|
| 103 |
-
verdict = "
|
| 104 |
level = "LOW RISK"
|
|
|
|
|
|
|
|
|
|
| 105 |
elif ai_score < 0.65:
|
| 106 |
-
verdict = "
|
| 107 |
level = "MEDIUM RISK"
|
|
|
|
|
|
|
|
|
|
| 108 |
else:
|
| 109 |
-
verdict = "
|
| 110 |
level = "HIGH RISK"
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
confidence = int(ai_score * 100)
|
| 113 |
-
return verdict, level, confidence, ai_score, feats, audio_path
|
| 114 |
|
| 115 |
# ==========================================
|
| 116 |
-
#
|
| 117 |
# ==========================================
|
| 118 |
def generate_audio_plots(audio_path):
|
| 119 |
y, sr = librosa.load(audio_path, sr=16000, duration=10)
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
#
|
| 125 |
-
ax1.
|
| 126 |
-
|
| 127 |
-
ax1.set_title('Audio Waveform', color='white', fontsize=12)
|
| 128 |
-
ax1.tick_params(colors='white')
|
| 129 |
|
| 130 |
-
#
|
| 131 |
-
ax2.set_facecolor('#1a1a2e')
|
| 132 |
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
|
| 133 |
mel_db = librosa.power_to_db(mel, ref=np.max)
|
| 134 |
-
img = librosa.display.specshow(
|
| 135 |
-
mel_db, sr=sr, ax=ax2,
|
| 136 |
-
x_axis='time', y_axis='mel', cmap='magma'
|
| 137 |
-
)
|
| 138 |
cbar = plt.colorbar(img, ax=ax2, format='%+2.0f dB')
|
| 139 |
-
cbar.ax.yaxis.set_tick_params(color='
|
| 140 |
-
plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='
|
| 141 |
-
ax2.set_title('Mel Spectrogram', color='
|
| 142 |
-
ax2.tick_params(colors='
|
| 143 |
-
ax2.yaxis.label.set_color('
|
| 144 |
-
ax2.xaxis.label.set_color('
|
|
|
|
|
|
|
| 145 |
|
| 146 |
plt.tight_layout()
|
| 147 |
plot_path = '/tmp/audio_analysis.png'
|
| 148 |
-
plt.savefig(plot_path, facecolor='#
|
| 149 |
plt.close()
|
| 150 |
return plot_path
|
| 151 |
|
| 152 |
# ==========================================
|
| 153 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
# ==========================================
|
| 155 |
def detect_audio(audio_file):
|
| 156 |
if audio_file is None:
|
| 157 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
try:
|
| 160 |
-
verdict, level, confidence, ai_score, feats, audio_path = analyze(audio_file)
|
| 161 |
plot_path = generate_audio_plots(audio_path)
|
| 162 |
percentage = ai_score * 100
|
| 163 |
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
#
|
| 178 |
-
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
""
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
except Exception as e:
|
| 189 |
-
return None, f"β
|
| 190 |
|
| 191 |
|
| 192 |
def detect_video(video_file):
|
| 193 |
if video_file is None:
|
| 194 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
try:
|
| 197 |
-
|
| 198 |
-
verdict, level, confidence, ai_score, feats, audio_path = analyze(video_file)
|
| 199 |
plot_path = generate_audio_plots(audio_path)
|
| 200 |
percentage = ai_score * 100
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
#
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
except Exception as e:
|
| 231 |
-
return None, f"β
|
|
|
|
| 232 |
|
| 233 |
# ==========================================
|
| 234 |
-
#
|
| 235 |
# ==========================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
def build_ui():
|
| 237 |
with gr.Blocks(
|
| 238 |
-
title="
|
| 239 |
theme=gr.themes.Base(
|
| 240 |
-
primary_hue="
|
| 241 |
neutral_hue="slate",
|
|
|
|
| 242 |
),
|
| 243 |
-
css=
|
| 244 |
-
.gradio-container { max-width: 1100px; margin: auto; }
|
| 245 |
-
.result-box { border-radius: 12px; padding: 16px; }
|
| 246 |
-
h1 { text-align: center; }
|
| 247 |
-
.score-display { font-size: 48px; font-weight: bold; text-align: center; }
|
| 248 |
-
"""
|
| 249 |
) as demo:
|
| 250 |
|
|
|
|
| 251 |
gr.HTML("""
|
| 252 |
-
<div style="text-align:center; padding: 20px
|
| 253 |
-
<
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
</p>
|
| 258 |
</div>
|
| 259 |
""")
|
| 260 |
|
| 261 |
with gr.Tabs():
|
| 262 |
|
| 263 |
-
#
|
| 264 |
-
|
| 265 |
-
|
|
|
|
| 266 |
with gr.Row():
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
audio_input = gr.Audio(
|
| 269 |
-
label="
|
| 270 |
-
type="filepath"
|
|
|
|
| 271 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
audio_btn = gr.Button("π Analyze Audio", variant="primary", size="lg")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
|
|
|
|
| 274 |
with gr.Column(scale=2):
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
)
|
| 280 |
-
audio_plot = gr.Image(label="π Audio Analysis")
|
| 281 |
-
audio_result = gr.Markdown(label="π Detailed Report")
|
| 282 |
|
| 283 |
audio_btn.click(
|
| 284 |
fn=detect_audio,
|
| 285 |
inputs=[audio_input],
|
| 286 |
-
outputs=[audio_plot, audio_result,
|
| 287 |
)
|
| 288 |
|
| 289 |
-
#
|
| 290 |
-
|
| 291 |
-
|
|
|
|
| 292 |
with gr.Row():
|
| 293 |
-
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
video_input = gr.Video(
|
| 295 |
-
label="
|
|
|
|
| 296 |
)
|
|
|
|
|
|
|
|
|
|
| 297 |
video_btn = gr.Button("π Analyze Video", variant="primary", size="lg")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
with gr.Column(scale=2):
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
)
|
| 305 |
-
video_plot = gr.Image(label="π Audio Analysis")
|
| 306 |
-
video_result = gr.Markdown(label="π Detailed Report")
|
| 307 |
|
| 308 |
video_btn.click(
|
| 309 |
fn=detect_video,
|
| 310 |
inputs=[video_input],
|
| 311 |
-
outputs=[video_plot, video_result,
|
| 312 |
)
|
| 313 |
|
| 314 |
-
#
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
###
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
-
|
| 339 |
-
-
|
| 340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
""")
|
| 342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
return demo
|
| 344 |
|
| 345 |
|
|
@@ -348,5 +679,5 @@ if __name__ == "__main__":
|
|
| 348 |
demo.launch(
|
| 349 |
server_name="0.0.0.0",
|
| 350 |
server_port=7860,
|
| 351 |
-
share=False
|
| 352 |
)
|
|
|
|
| 1 |
+
# app.py β DeepFake AI Forensics (Premium UI + Animated Waves + Confidence Circle)
|
| 2 |
import os
|
| 3 |
import subprocess
|
| 4 |
import tempfile
|
|
|
|
| 16 |
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
|
| 17 |
|
| 18 |
# ==========================================
|
| 19 |
+
# 1. MODEL LOADING
|
| 20 |
# ==========================================
|
| 21 |
MODEL_NAME = "Hemgg/Deepfake-audio-detection"
|
| 22 |
print("[+] Loading AI forensic model...")
|
|
|
|
| 33 |
# 2. AUDIO / VIDEO PREPROCESSING
|
| 34 |
# ==========================================
|
| 35 |
def convert_to_audio(file_path):
|
|
|
|
| 36 |
ext = os.path.splitext(file_path)[1].lower().lstrip('.')
|
|
|
|
| 37 |
if ext in ["wav", "mp3", "flac", "m4a", "ogg", "aac", "wma"]:
|
| 38 |
return file_path
|
|
|
|
| 39 |
if ext in ["mp4", "mkv", "avi", "mov", "webm", "flv"]:
|
| 40 |
print("[+] Video detected β extracting audio via ffmpeg...")
|
| 41 |
out = tempfile.mktemp(suffix=".wav")
|
|
|
|
| 47 |
]
|
| 48 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
|
| 49 |
return out
|
|
|
|
| 50 |
raise ValueError(f"Unsupported file format: {ext}")
|
| 51 |
|
| 52 |
def load_audio(path):
|
|
|
|
| 55 |
return audio
|
| 56 |
|
| 57 |
# ==========================================
|
| 58 |
+
# 3. INFERENCE & DSP
|
| 59 |
# ==========================================
|
| 60 |
def predict(audio):
|
| 61 |
+
inputs = extractor(audio, sampling_rate=16000, return_tensors="pt", padding=True).to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
with torch.no_grad():
|
| 63 |
logits = model(**inputs).logits
|
| 64 |
probs = torch.softmax(logits, dim=-1)[0]
|
| 65 |
+
return float(probs[0]), float(probs[1])
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
def audio_features(audio):
|
| 68 |
mfcc = librosa.feature.mfcc(y=audio, sr=16000)
|
| 69 |
return {
|
|
|
|
| 71 |
"energy": float(np.mean(audio ** 2))
|
| 72 |
}
|
| 73 |
|
|
|
|
|
|
|
|
|
|
| 74 |
def analyze(file_path):
|
| 75 |
audio_path = convert_to_audio(file_path)
|
| 76 |
audio = load_audio(audio_path)
|
|
|
|
| 77 |
human_p, ai_p = predict(audio)
|
| 78 |
feats = audio_features(audio)
|
|
|
|
|
|
|
| 79 |
anomaly = (feats["mfcc"] / 500.0) + (feats["energy"] * 2.0)
|
| 80 |
ai_score = np.clip((ai_p * 0.6 + anomaly * 0.4), 0.0, 1.0)
|
| 81 |
|
| 82 |
if ai_score < 0.35:
|
| 83 |
+
verdict = "HUMAN VOICE"
|
| 84 |
level = "LOW RISK"
|
| 85 |
+
color = "#00ff88"
|
| 86 |
+
icon = "π§"
|
| 87 |
+
glow = "rgba(0,255,136,0.25)"
|
| 88 |
elif ai_score < 0.65:
|
| 89 |
+
verdict = "UNCERTAIN / MIXED"
|
| 90 |
level = "MEDIUM RISK"
|
| 91 |
+
color = "#ffcc00"
|
| 92 |
+
icon = "β οΈ"
|
| 93 |
+
glow = "rgba(255,204,0,0.25)"
|
| 94 |
else:
|
| 95 |
+
verdict = "AI / SYNTHETIC VOICE"
|
| 96 |
level = "HIGH RISK"
|
| 97 |
+
color = "#ff4444"
|
| 98 |
+
icon = "π€"
|
| 99 |
+
glow = "rgba(255,68,68,0.25)"
|
| 100 |
|
| 101 |
confidence = int(ai_score * 100)
|
| 102 |
+
return verdict, level, confidence, ai_score, feats, audio_path, color, icon, glow
|
| 103 |
|
| 104 |
# ==========================================
|
| 105 |
+
# 4. VISUALIZATION
|
| 106 |
# ==========================================
|
| 107 |
def generate_audio_plots(audio_path):
|
| 108 |
y, sr = librosa.load(audio_path, sr=16000, duration=10)
|
| 109 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 7))
|
| 110 |
+
fig.patch.set_facecolor('#08080f')
|
| 111 |
|
| 112 |
+
ax1.set_facecolor('#0f0f1a')
|
| 113 |
+
librosa.display.waveshow(y, sr=sr, ax=ax1, color='#00d4ff', alpha=0.85)
|
| 114 |
+
ax1.set_title('Waveform Analysis', color='#c0c0e0', fontsize=13, fontweight='bold', pad=12)
|
| 115 |
+
ax1.tick_params(colors='#555588')
|
| 116 |
+
for spine in ax1.spines.values():
|
| 117 |
+
spine.set_color('#222244')
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
ax2.set_facecolor('#0f0f1a')
|
|
|
|
| 120 |
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
|
| 121 |
mel_db = librosa.power_to_db(mel, ref=np.max)
|
| 122 |
+
img = librosa.display.specshow(mel_db, sr=sr, ax=ax2, x_axis='time', y_axis='mel', cmap='magma')
|
|
|
|
|
|
|
|
|
|
| 123 |
cbar = plt.colorbar(img, ax=ax2, format='%+2.0f dB')
|
| 124 |
+
cbar.ax.yaxis.set_tick_params(color='#555588')
|
| 125 |
+
plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#555588')
|
| 126 |
+
ax2.set_title('Mel Spectrogram', color='#c0c0e0', fontsize=13, fontweight='bold', pad=12)
|
| 127 |
+
ax2.tick_params(colors='#555588')
|
| 128 |
+
ax2.yaxis.label.set_color('#555588')
|
| 129 |
+
ax2.xaxis.label.set_color('#555588')
|
| 130 |
+
for spine in ax2.spines.values():
|
| 131 |
+
spine.set_color('#222244')
|
| 132 |
|
| 133 |
plt.tight_layout()
|
| 134 |
plot_path = '/tmp/audio_analysis.png'
|
| 135 |
+
plt.savefig(plot_path, facecolor='#08080f', bbox_inches='tight', dpi=150)
|
| 136 |
plt.close()
|
| 137 |
return plot_path
|
| 138 |
|
| 139 |
# ==========================================
|
| 140 |
+
# 5. HTML BUILDERS
|
| 141 |
+
# ==========================================
|
| 142 |
+
def confidence_circle(percentage, color):
|
| 143 |
+
"""SVG circular progress indicator."""
|
| 144 |
+
radius = 50
|
| 145 |
+
circumference = 2 * 3.14159 * radius
|
| 146 |
+
offset = circumference - (percentage / 100) * circumference
|
| 147 |
+
return f"""
|
| 148 |
+
<div style="display: flex; flex-direction: column; align-items: center; justify-content: center; margin: 10px 0;">
|
| 149 |
+
<div style="position: relative; width: 140px; height: 140px; filter: drop-shadow(0 0 12px {color}40);">
|
| 150 |
+
<svg width="140" height="140" viewBox="0 0 120 120" style="transform: rotate(-90deg);">
|
| 151 |
+
<circle cx="60" cy="60" r="{radius}" stroke="#1a1a2e" stroke-width="10" fill="none"/>
|
| 152 |
+
<circle cx="60" cy="60" r="{radius}" stroke="{color}" stroke-width="10" fill="none"
|
| 153 |
+
stroke-linecap="round"
|
| 154 |
+
stroke-dasharray="{circumference}"
|
| 155 |
+
stroke-dashoffset="{offset}"
|
| 156 |
+
style="transition: stroke-dashoffset 1.2s ease-out;"/>
|
| 157 |
+
</svg>
|
| 158 |
+
<div style="position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center;">
|
| 159 |
+
<div style="font-size: 2em; font-weight: 800; color: {color}; line-height: 1;">{percentage}%</div>
|
| 160 |
+
<div style="font-size: 0.65em; color: #555588; text-transform: uppercase; letter-spacing: 1px;">Confidence</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
"""
|
| 165 |
+
|
| 166 |
+
EQUALIZER_HTML = """
|
| 167 |
+
<div style="display: flex; align-items: flex-end; justify-content: center; height: 50px; gap: 5px; margin: 16px 0;">
|
| 168 |
+
<div class="eq-bar" style="width: 6px; height: 40%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.8s infinite ease-in-out 0s;"></div>
|
| 169 |
+
<div class="eq-bar" style="width: 6px; height: 70%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.9s infinite ease-in-out 0.1s;"></div>
|
| 170 |
+
<div class="eq-bar" style="width: 6px; height: 50%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.7s infinite ease-in-out 0.2s;"></div>
|
| 171 |
+
<div class="eq-bar" style="width: 6px; height: 80%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 1.0s infinite ease-in-out 0.15s;"></div>
|
| 172 |
+
<div class="eq-bar" style="width: 6px; height: 60%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.85s infinite ease-in-out 0.05s;"></div>
|
| 173 |
+
<div class="eq-bar" style="width: 6px; height: 90%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.75s infinite ease-in-out 0.25s;"></div>
|
| 174 |
+
<div class="eq-bar" style="width: 6px; height: 45%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.95s infinite ease-in-out 0.3s;"></div>
|
| 175 |
+
<div class="eq-bar" style="width: 6px; height: 65%; background: linear-gradient(to top, #4f46e5, #00d4ff); border-radius: 3px; animation: eq-bounce 0.8s infinite ease-in-out 0.12s;"></div>
|
| 176 |
+
</div>
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
# ==========================================
|
| 180 |
+
# 6. GRADIO HANDLERS
|
| 181 |
# ==========================================
|
| 182 |
def detect_audio(audio_file):
|
| 183 |
if audio_file is None:
|
| 184 |
+
return (
|
| 185 |
+
None,
|
| 186 |
+
'<div style="text-align:center;color:#ff4444;padding:30px;">β No audio file provided</div>',
|
| 187 |
+
"Waiting...",
|
| 188 |
+
"#666666",
|
| 189 |
+
EQUALIZER_HTML + '<div style="text-align:center;color:#444466;font-size:0.9em;">Upload audio to begin forensic analysis</div>'
|
| 190 |
+
)
|
| 191 |
|
| 192 |
try:
|
| 193 |
+
verdict, level, confidence, ai_score, feats, audio_path, color, icon, glow = analyze(audio_file)
|
| 194 |
plot_path = generate_audio_plots(audio_path)
|
| 195 |
percentage = ai_score * 100
|
| 196 |
|
| 197 |
+
status_emoji = "π’" if percentage < 35 else "π‘" if percentage < 65 else "π΄"
|
| 198 |
+
status_text = "LIKELY REAL" if percentage < 35 else "SUSPICIOUS" if percentage < 65 else "HIGH RISK"
|
| 199 |
+
|
| 200 |
+
circle = confidence_circle(confidence, color)
|
| 201 |
+
|
| 202 |
+
result_html = f"""
|
| 203 |
+
<div style="background: linear-gradient(145deg, #0c0c14 0%, #141424 100%);
|
| 204 |
+
border: 1px solid {color}30; border-radius: 20px; padding: 28px;
|
| 205 |
+
box-shadow: 0 0 40px {glow}, inset 0 1px 0 rgba(255,255,255,0.03);">
|
| 206 |
+
|
| 207 |
+
<div style="display: flex; align-items: center; gap: 20px; margin-bottom: 24px; flex-wrap: wrap;">
|
| 208 |
+
<div style="font-size: 3em; line-height: 1;">{icon}</div>
|
| 209 |
+
<div style="flex: 1; min-width: 200px;">
|
| 210 |
+
<div style="font-size: 0.8em; color: #555588; text-transform: uppercase; letter-spacing: 2px; margin-bottom: 4px;">Final Verdict</div>
|
| 211 |
+
<div style="font-size: 1.5em; font-weight: 800; color: {color}; letter-spacing: -0.5px;">{verdict}</div>
|
| 212 |
+
</div>
|
| 213 |
+
<div style="min-width: 140px;">
|
| 214 |
+
{circle}
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
|
| 218 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 12px; margin-bottom: 24px;">
|
| 219 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid {color};">
|
| 220 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">AI Probability</div>
|
| 221 |
+
<div style="font-size: 1.5em; font-weight: 700; color: {color};">{percentage:.1f}%</div>
|
| 222 |
+
</div>
|
| 223 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid {color};">
|
| 224 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Risk Level</div>
|
| 225 |
+
<div style="font-size: 1.2em; font-weight: 700; color: {color};">{level}</div>
|
| 226 |
+
</div>
|
| 227 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid #00d4ff;">
|
| 228 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Confidence</div>
|
| 229 |
+
<div style="font-size: 1.2em; font-weight: 700; color: #00d4ff;">{confidence}%</div>
|
| 230 |
+
</div>
|
| 231 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid #ffcc00;">
|
| 232 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Status</div>
|
| 233 |
+
<div style="font-size: 1em; font-weight: 600; color: #ffcc00;">{status_emoji} {status_text}</div>
|
| 234 |
+
</div>
|
| 235 |
+
</div>
|
| 236 |
+
|
| 237 |
+
<div style="background: #08080f; border-radius: 12px; padding: 18px; margin-bottom: 20px;">
|
| 238 |
+
<div style="font-size: 0.75em; color: #444466; text-transform: uppercase; letter-spacing: 1.5px; margin-bottom: 12px;">π¬ DSP Forensic Signatures</div>
|
| 239 |
+
<div style="display: flex; justify-content: space-around; font-family: 'SF Mono', monospace; font-size: 0.9em; flex-wrap: wrap; gap: 12px;">
|
| 240 |
+
<div style="text-align: center;">
|
| 241 |
+
<div style="color: #555588; font-size: 0.8em;">MFCC Variance</div>
|
| 242 |
+
<div style="color: #e0e0ff; font-weight: 600;">{feats['mfcc']:.4f}</div>
|
| 243 |
+
</div>
|
| 244 |
+
<div style="text-align: center;">
|
| 245 |
+
<div style="color: #555588; font-size: 0.8em;">Signal Energy</div>
|
| 246 |
+
<div style="color: #e0e0ff; font-weight: 600;">{feats['energy']:.6f}</div>
|
| 247 |
+
</div>
|
| 248 |
+
<div style="text-align: center;">
|
| 249 |
+
<div style="color: #555588; font-size: 0.8em;">Neural Score</div>
|
| 250 |
+
<div style="color: #e0e0ff; font-weight: 600;">{ai_score:.4f}</div>
|
| 251 |
+
</div>
|
| 252 |
+
</div>
|
| 253 |
+
</div>
|
| 254 |
+
|
| 255 |
+
<div style="font-size: 0.8em; color: #3a3a55; border-top: 1px solid #1a1a2e; padding-top: 14px; line-height: 1.6;">
|
| 256 |
+
<strong style="color: #555588;">Interpretation Guide:</strong><br>
|
| 257 |
+
<span style="color: #00ff88;">β 0β35%</span> Very likely genuine human voice |
|
| 258 |
+
<span style="color: #ffcc00;">β 35β65%</span> Mixed signal, manual review advised |
|
| 259 |
+
<span style="color: #ff4444;">β 65β100%</span> Strong synthetic / AI indicators detected
|
| 260 |
+
</div>
|
| 261 |
+
</div>
|
| 262 |
+
"""
|
| 263 |
+
return plot_path, result_html, f"{percentage:.1f}%", color, ""
|
| 264 |
|
| 265 |
except Exception as e:
|
| 266 |
+
return None, f"<div style='color:#ff4444;padding:30px;'>β Analysis Error: {str(e)}</div>", "Error", "#ff4444", ""
|
| 267 |
|
| 268 |
|
| 269 |
def detect_video(video_file):
|
| 270 |
if video_file is None:
|
| 271 |
+
return (
|
| 272 |
+
None,
|
| 273 |
+
'<div style="text-align:center;color:#ff4444;padding:30px;">β No video file provided</div>',
|
| 274 |
+
"Waiting...",
|
| 275 |
+
"#666666",
|
| 276 |
+
EQUALIZER_HTML + '<div style="text-align:center;color:#444466;font-size:0.9em;">Upload video to extract & analyze audio track</div>'
|
| 277 |
+
)
|
| 278 |
|
| 279 |
try:
|
| 280 |
+
verdict, level, confidence, ai_score, feats, audio_path, color, icon, glow = analyze(video_file)
|
|
|
|
| 281 |
plot_path = generate_audio_plots(audio_path)
|
| 282 |
percentage = ai_score * 100
|
| 283 |
|
| 284 |
+
status_emoji = "π’" if percentage < 35 else "π‘" if percentage < 65 else "π΄"
|
| 285 |
+
status_text = "LIKELY REAL" if percentage < 35 else "SUSPICIOUS" if percentage < 65 else "HIGH RISK"
|
| 286 |
+
|
| 287 |
+
circle = confidence_circle(confidence, color)
|
| 288 |
+
|
| 289 |
+
result_html = f"""
|
| 290 |
+
<div style="background: linear-gradient(145deg, #0c0c14 0%, #141424 100%);
|
| 291 |
+
border: 1px solid {color}30; border-radius: 20px; padding: 28px;
|
| 292 |
+
box-shadow: 0 0 40px {glow}, inset 0 1px 0 rgba(255,255,255,0.03);">
|
| 293 |
+
|
| 294 |
+
<div style="display: flex; align-items: center; gap: 20px; margin-bottom: 24px; flex-wrap: wrap;">
|
| 295 |
+
<div style="font-size: 3em; line-height: 1;">{icon}</div>
|
| 296 |
+
<div style="flex: 1; min-width: 200px;">
|
| 297 |
+
<div style="font-size: 0.8em; color: #555588; text-transform: uppercase; letter-spacing: 2px; margin-bottom: 4px;">Audio Track Verdict</div>
|
| 298 |
+
<div style="font-size: 1.5em; font-weight: 800; color: {color}; letter-spacing: -0.5px;">{verdict}</div>
|
| 299 |
+
</div>
|
| 300 |
+
<div style="min-width: 140px;">
|
| 301 |
+
{circle}
|
| 302 |
+
</div>
|
| 303 |
+
</div>
|
| 304 |
+
|
| 305 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 12px; margin-bottom: 24px;">
|
| 306 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid {color};">
|
| 307 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">AI Probability</div>
|
| 308 |
+
<div style="font-size: 1.5em; font-weight: 700; color: {color};">{percentage:.1f}%</div>
|
| 309 |
+
</div>
|
| 310 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid {color};">
|
| 311 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Risk Level</div>
|
| 312 |
+
<div style="font-size: 1.2em; font-weight: 700; color: {color};">{level}</div>
|
| 313 |
+
</div>
|
| 314 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid #00d4ff;">
|
| 315 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Confidence</div>
|
| 316 |
+
<div style="font-size: 1.2em; font-weight: 700; color: #00d4ff;">{confidence}%</div>
|
| 317 |
+
</div>
|
| 318 |
+
<div style="background: #08080f; border-radius: 12px; padding: 16px; border-left: 3px solid #ffcc00;">
|
| 319 |
+
<div style="font-size: 0.7em; color: #444466; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px;">Status</div>
|
| 320 |
+
<div style="font-size: 1em; font-weight: 600; color: #ffcc00;">{status_emoji} {status_text}</div>
|
| 321 |
+
</div>
|
| 322 |
+
</div>
|
| 323 |
+
|
| 324 |
+
<div style="background: #08080f; border-radius: 12px; padding: 18px; margin-bottom: 20px;">
|
| 325 |
+
<div style="font-size: 0.75em; color: #444466; text-transform: uppercase; letter-spacing: 1.5px; margin-bottom: 12px;">π¬ DSP Forensic Signatures (Audio Track)</div>
|
| 326 |
+
<div style="display: flex; justify-content: space-around; font-family: 'SF Mono', monospace; font-size: 0.9em; flex-wrap: wrap; gap: 12px;">
|
| 327 |
+
<div style="text-align: center;">
|
| 328 |
+
<div style="color: #555588; font-size: 0.8em;">MFCC Variance</div>
|
| 329 |
+
<div style="color: #e0e0ff; font-weight: 600;">{feats['mfcc']:.4f}</div>
|
| 330 |
+
</div>
|
| 331 |
+
<div style="text-align: center;">
|
| 332 |
+
<div style="color: #555588; font-size: 0.8em;">Signal Energy</div>
|
| 333 |
+
<div style="color: #e0e0ff; font-weight: 600;">{feats['energy']:.6f}</div>
|
| 334 |
+
</div>
|
| 335 |
+
<div style="text-align: center;">
|
| 336 |
+
<div style="color: #555588; font-size: 0.8em;">Neural Score</div>
|
| 337 |
+
<div style="color: #e0e0ff; font-weight: 600;">{ai_score:.4f}</div>
|
| 338 |
+
</div>
|
| 339 |
+
</div>
|
| 340 |
+
</div>
|
| 341 |
+
|
| 342 |
+
<div style="font-size: 0.8em; color: #3a3a55; border-top: 1px solid #1a1a2e; padding-top: 14px; line-height: 1.6;">
|
| 343 |
+
<strong style="color: #555588;">Analysis Note:</strong> Deepfake videos frequently contain synthetic audio tracks.
|
| 344 |
+
This scan was performed on the extracted audio waveform. Always verify visual cues separately for complete assessment.
|
| 345 |
+
</div>
|
| 346 |
+
</div>
|
| 347 |
+
"""
|
| 348 |
+
return plot_path, result_html, f"{percentage:.1f}%", color, ""
|
| 349 |
|
| 350 |
except Exception as e:
|
| 351 |
+
return None, f"<div style='color:#ff4444;padding:30px;'>β Analysis Error: {str(e)}</div>", "Error", "#ff4444", ""
|
| 352 |
+
|
| 353 |
|
| 354 |
# ==========================================
|
| 355 |
+
# 7. GRADIO UI
|
| 356 |
# ==========================================
|
| 357 |
+
CUSTOM_CSS = """
|
| 358 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap');
|
| 359 |
+
|
| 360 |
+
.gradio-container {
|
| 361 |
+
max-width: 1250px !important;
|
| 362 |
+
margin: auto !important;
|
| 363 |
+
font-family: 'Inter', sans-serif !important;
|
| 364 |
+
background: #050508 !important;
|
| 365 |
+
}
|
| 366 |
+
body { background: #050508 !important; }
|
| 367 |
+
|
| 368 |
+
/* Tabs */
|
| 369 |
+
.tab-nav {
|
| 370 |
+
background: #0f0f1a !important;
|
| 371 |
+
border-radius: 14px !important;
|
| 372 |
+
padding: 6px !important;
|
| 373 |
+
border: 1px solid #1e1e32 !important;
|
| 374 |
+
margin-bottom: 24px !important;
|
| 375 |
+
gap: 6px !important;
|
| 376 |
+
}
|
| 377 |
+
.tab-nav button {
|
| 378 |
+
color: #555588 !important;
|
| 379 |
+
font-weight: 600 !important;
|
| 380 |
+
border-radius: 10px !important;
|
| 381 |
+
padding: 10px 28px !important;
|
| 382 |
+
border: none !important;
|
| 383 |
+
background: transparent !important;
|
| 384 |
+
transition: all 0.3s ease !important;
|
| 385 |
+
}
|
| 386 |
+
.tab-nav button:hover { color: #8888bb !important; }
|
| 387 |
+
.tab-nav button.selected {
|
| 388 |
+
background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%) !important;
|
| 389 |
+
color: #ffffff !important;
|
| 390 |
+
box-shadow: 0 4px 20px rgba(124, 58, 237, 0.35) !important;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
/* Inputs & Upload */
|
| 394 |
+
.upload-container {
|
| 395 |
+
background: #0a0a12 !important;
|
| 396 |
+
border: 2px dashed #252540 !important;
|
| 397 |
+
border-radius: 16px !important;
|
| 398 |
+
transition: all 0.3s ease !important;
|
| 399 |
+
}
|
| 400 |
+
.upload-container:hover {
|
| 401 |
+
border-color: #4f46e5 !important;
|
| 402 |
+
background: #0f0f1a !important;
|
| 403 |
+
box-shadow: 0 0 30px rgba(79, 70, 229, 0.1) !important;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
/* Buttons */
|
| 407 |
+
button.primary {
|
| 408 |
+
background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%) !important;
|
| 409 |
+
border: none !important;
|
| 410 |
+
border-radius: 12px !important;
|
| 411 |
+
font-weight: 700 !important;
|
| 412 |
+
letter-spacing: 0.5px !important;
|
| 413 |
+
padding: 14px 32px !important;
|
| 414 |
+
box-shadow: 0 4px 24px rgba(124, 58, 237, 0.3) !important;
|
| 415 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 416 |
+
}
|
| 417 |
+
button.primary:hover {
|
| 418 |
+
transform: translateY(-2px) !important;
|
| 419 |
+
box-shadow: 0 8px 32px rgba(124, 58, 237, 0.45) !important;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
/* Equalizer Animation */
|
| 423 |
+
@keyframes eq-bounce {
|
| 424 |
+
0%, 100% { transform: scaleY(0.25); opacity: 0.5; }
|
| 425 |
+
50% { transform: scaleY(1); opacity: 1; }
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
/* Scrollbar */
|
| 429 |
+
::-webkit-scrollbar { width: 8px; }
|
| 430 |
+
::-webkit-scrollbar-track { background: #08080f; }
|
| 431 |
+
::-webkit-scrollbar-thumb { background: #252540; border-radius: 4px; }
|
| 432 |
+
::-webkit-scrollbar-thumb:hover { background: #4f46e5; }
|
| 433 |
+
|
| 434 |
+
/* Format badges helper */
|
| 435 |
+
.format-badge {
|
| 436 |
+
display: inline-block;
|
| 437 |
+
background: #12121a;
|
| 438 |
+
border: 1px solid #252540;
|
| 439 |
+
color: #6666aa;
|
| 440 |
+
padding: 4px 12px;
|
| 441 |
+
border-radius: 20px;
|
| 442 |
+
font-size: 0.75em;
|
| 443 |
+
font-weight: 600;
|
| 444 |
+
letter-spacing: 0.5px;
|
| 445 |
+
}
|
| 446 |
+
"""
|
| 447 |
+
|
| 448 |
def build_ui():
|
| 449 |
with gr.Blocks(
|
| 450 |
+
title="DeepFake AI Forensics",
|
| 451 |
theme=gr.themes.Base(
|
| 452 |
+
primary_hue="violet",
|
| 453 |
neutral_hue="slate",
|
| 454 |
+
font=["Inter", "system-ui", "sans-serif"],
|
| 455 |
),
|
| 456 |
+
css=CUSTOM_CSS,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
) as demo:
|
| 458 |
|
| 459 |
+
# Header
|
| 460 |
gr.HTML("""
|
| 461 |
+
<div style="text-align: center; padding: 40px 20px 10px 20px;">
|
| 462 |
+
<div style="display: inline-block; position: relative;">
|
| 463 |
+
<div style="position: absolute; top: -30px; left: 50%; transform: translateX(-50%); width: 280px; height: 280px;
|
| 464 |
+
background: radial-gradient(circle, rgba(124,58,237,0.12) 0%, transparent 70%); border-radius: 50%; pointer-events: none;"></div>
|
| 465 |
+
<h1 style="font-size: 2.8em; font-weight: 800; margin: 0;
|
| 466 |
+
background: linear-gradient(135deg, #c4b5fd 0%, #60a5fa 40%, #00d4ff 100%);
|
| 467 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;
|
| 468 |
+
letter-spacing: -1.5px; position: relative;">
|
| 469 |
+
π DeepFake AI Forensics
|
| 470 |
+
</h1>
|
| 471 |
+
</div>
|
| 472 |
+
<p style="font-size: 1.05em; color: #444466; margin-top: 14px; max-width: 560px; margin-left: auto; margin-right: auto; line-height: 1.6;">
|
| 473 |
+
Neural + DSP ensemble detection for synthetic voice identification.
|
| 474 |
</p>
|
| 475 |
</div>
|
| 476 |
""")
|
| 477 |
|
| 478 |
with gr.Tabs():
|
| 479 |
|
| 480 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 481 |
+
# AUDIO TAB
|
| 482 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 483 |
+
with gr.TabItem("π Audio Detection", id=0):
|
| 484 |
with gr.Row():
|
| 485 |
+
# LEFT: Upload & Controls
|
| 486 |
+
with gr.Column(scale=1, min_width=340):
|
| 487 |
+
gr.Markdown("### Upload Audio File", elem_classes="section-title")
|
| 488 |
+
|
| 489 |
+
# Format badges
|
| 490 |
+
gr.HTML("""
|
| 491 |
+
<div style="margin-bottom: 12px; display: flex; flex-wrap: wrap; gap: 6px;">
|
| 492 |
+
<span class="format-badge">MP3</span>
|
| 493 |
+
<span class="format-badge">WAV</span>
|
| 494 |
+
<span class="format-badge">M4A</span>
|
| 495 |
+
<span class="format-badge">FLAC</span>
|
| 496 |
+
<span class="format-badge">OGG</span>
|
| 497 |
+
</div>
|
| 498 |
+
<div style="font-size: 0.8em; color: #444466; margin-bottom: 16px; display: flex; align-items: center; gap: 6px;">
|
| 499 |
+
<span style="font-size: 1.2em;">π</span>
|
| 500 |
+
<span>Maximum file size: <strong style="color: #6666aa;">50 MB</strong></span>
|
| 501 |
+
</div>
|
| 502 |
+
""")
|
| 503 |
+
|
| 504 |
audio_input = gr.Audio(
|
| 505 |
+
label="",
|
| 506 |
+
type="filepath",
|
| 507 |
+
elem_classes="upload-container"
|
| 508 |
)
|
| 509 |
+
|
| 510 |
+
# Animated equalizer (shows when audio present, decorative)
|
| 511 |
+
audio_waves = gr.HTML(value=EQUALIZER_HTML + '<div style="text-align:center;color:#333355;font-size:0.85em;">Audio waveform ready for analysis</div>')
|
| 512 |
+
|
| 513 |
audio_btn = gr.Button("π Analyze Audio", variant="primary", size="lg")
|
| 514 |
+
|
| 515 |
+
# Big score display
|
| 516 |
+
audio_score_text = gr.Textbox(
|
| 517 |
+
label="",
|
| 518 |
+
value="--%",
|
| 519 |
+
interactive=False,
|
| 520 |
+
elem_classes="score-display"
|
| 521 |
+
)
|
| 522 |
|
| 523 |
+
# RIGHT: Results
|
| 524 |
with gr.Column(scale=2):
|
| 525 |
+
gr.Markdown("### Forensic Analysis Report", elem_classes="section-title")
|
| 526 |
+
|
| 527 |
+
audio_plot = gr.Image(
|
| 528 |
+
label="",
|
| 529 |
+
show_label=False,
|
| 530 |
+
elem_classes="result-image"
|
| 531 |
+
)
|
| 532 |
+
|
| 533 |
+
audio_result = gr.HTML(
|
| 534 |
+
value="""
|
| 535 |
+
<div style="background: #0a0a12; border: 2px dashed #1e1e32; border-radius: 20px; padding: 50px 30px; text-align: center; margin-top: 8px;">
|
| 536 |
+
<div style="font-size: 3em; margin-bottom: 16px;">π</div>
|
| 537 |
+
<div style="color: #333355; font-size: 1.1em; font-weight: 600;">Results will appear here</div>
|
| 538 |
+
<div style="color: #222244; font-size: 0.9em; margin-top: 8px;">Upload an audio file and click analyze to begin</div>
|
| 539 |
+
</div>
|
| 540 |
+
"""
|
| 541 |
)
|
|
|
|
|
|
|
| 542 |
|
| 543 |
audio_btn.click(
|
| 544 |
fn=detect_audio,
|
| 545 |
inputs=[audio_input],
|
| 546 |
+
outputs=[audio_plot, audio_result, audio_score_text, audio_score_text, audio_waves]
|
| 547 |
)
|
| 548 |
|
| 549 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 550 |
+
# VIDEO TAB
|
| 551 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 552 |
+
with gr.TabItem("π¬ Video Detection", id=1):
|
| 553 |
with gr.Row():
|
| 554 |
+
with gr.Column(scale=1, min_width=340):
|
| 555 |
+
gr.Markdown("### Upload Video File", elem_classes="section-title")
|
| 556 |
+
|
| 557 |
+
gr.HTML("""
|
| 558 |
+
<div style="margin-bottom: 12px; display: flex; flex-wrap: wrap; gap: 6px;">
|
| 559 |
+
<span class="format-badge">MP4</span>
|
| 560 |
+
<span class="format-badge">MOV</span>
|
| 561 |
+
<span class="format-badge">AVI</span>
|
| 562 |
+
<span class="format-badge">MKV</span>
|
| 563 |
+
<span class="format-badge">WEBM</span>
|
| 564 |
+
</div>
|
| 565 |
+
<div style="font-size: 0.8em; color: #444466; margin-bottom: 16px; display: flex; align-items: center; gap: 6px;">
|
| 566 |
+
<span style="font-size: 1.2em;">π</span>
|
| 567 |
+
<span>Maximum file size: <strong style="color: #6666aa;">100 MB</strong></span>
|
| 568 |
+
</div>
|
| 569 |
+
""")
|
| 570 |
+
|
| 571 |
video_input = gr.Video(
|
| 572 |
+
label="",
|
| 573 |
+
elem_classes="upload-container"
|
| 574 |
)
|
| 575 |
+
|
| 576 |
+
video_waves = gr.HTML(value=EQUALIZER_HTML + '<div style="text-align:center;color:#333355;font-size:0.85em;">Video loaded β audio track ready for extraction</div>')
|
| 577 |
+
|
| 578 |
video_btn = gr.Button("π Analyze Video", variant="primary", size="lg")
|
| 579 |
+
|
| 580 |
+
video_score_text = gr.Textbox(
|
| 581 |
+
label="",
|
| 582 |
+
value="--%",
|
| 583 |
+
interactive=False
|
| 584 |
+
)
|
| 585 |
|
| 586 |
with gr.Column(scale=2):
|
| 587 |
+
gr.Markdown("### Forensic Analysis Report", elem_classes="section-title")
|
| 588 |
+
|
| 589 |
+
video_plot = gr.Image(
|
| 590 |
+
label="",
|
| 591 |
+
show_label=False
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
video_result = gr.HTML(
|
| 595 |
+
value="""
|
| 596 |
+
<div style="background: #0a0a12; border: 2px dashed #1e1e32; border-radius: 20px; padding: 50px 30px; text-align: center; margin-top: 8px;">
|
| 597 |
+
<div style="font-size: 3em; margin-bottom: 16px;">π¬</div>
|
| 598 |
+
<div style="color: #333355; font-size: 1.1em; font-weight: 600;">Results will appear here</div>
|
| 599 |
+
<div style="color: #222244; font-size: 0.9em; margin-top: 8px;">Upload a video to extract & analyze its audio track</div>
|
| 600 |
+
</div>
|
| 601 |
+
"""
|
| 602 |
)
|
|
|
|
|
|
|
| 603 |
|
| 604 |
video_btn.click(
|
| 605 |
fn=detect_video,
|
| 606 |
inputs=[video_input],
|
| 607 |
+
outputs=[video_plot, video_result, video_score_text, video_score_text, video_waves]
|
| 608 |
)
|
| 609 |
|
| 610 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 611 |
+
# ABOUT TAB
|
| 612 |
+
# βββββββββββββββββββββββββββββββββββββββββββ
|
| 613 |
+
with gr.TabItem("βΉοΈ How It Works", id=2):
|
| 614 |
+
gr.HTML("""
|
| 615 |
+
<div style="max-width: 900px; margin: auto; padding: 20px 0 40px 0;">
|
| 616 |
+
<h2 style="color: #c0c0e0; font-size: 1.7em; margin-bottom: 28px; text-align: center; font-weight: 700;">π§ Detection Pipeline</h2>
|
| 617 |
+
|
| 618 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 18px; margin-bottom: 36px;">
|
| 619 |
+
<div style="background: linear-gradient(145deg, #0c0c14 0%, #141424 100%); border: 1px solid #1e1e32; border-radius: 16px; padding: 24px;">
|
| 620 |
+
<div style="font-size: 2em; margin-bottom: 10px;">π§ </div>
|
| 621 |
+
<h3 style="color: #a78bfa; margin: 0 0 6px 0; font-size: 1.1em;">Transformer Classifier</h3>
|
| 622 |
+
<p style="color: #444466; font-size: 0.9em; line-height: 1.5; margin: 0;">
|
| 623 |
+
<code style="background: #0a0a12; padding: 2px 6px; border-radius: 4px; color: #8888bb;">Hemgg/Deepfake-audio-detection</code>
|
| 624 |
+
base model running on GPU/CPU with HuggingFace Transformers.
|
| 625 |
+
</p>
|
| 626 |
+
</div>
|
| 627 |
+
<div style="background: linear-gradient(145deg, #0c0c14 0%, #141424 100%); border: 1px solid #1e1e32; border-radius: 16px; padding: 24px;">
|
| 628 |
+
<div style="font-size: 2em; margin-bottom: 10px;">π</div>
|
| 629 |
+
<h3 style="color: #60a5fa; margin: 0 0 6px 0; font-size: 1.1em;">DSP Ensemble</h3>
|
| 630 |
+
<p style="color: #444466; font-size: 0.9em; line-height: 1.5; margin: 0;">
|
| 631 |
+
MFCC variance + signal energy anomaly detection fused with neural output (60/40 weighting).
|
| 632 |
+
</p>
|
| 633 |
+
</div>
|
| 634 |
+
<div style="background: linear-gradient(145deg, #0c0c14 0%, #141424 100%); border: 1px solid #1e1e32; border-radius: 16px; padding: 24px;">
|
| 635 |
+
<div style="font-size: 2em; margin-bottom: 10px;">π¬</div>
|
| 636 |
+
<h3 style="color: #00d4ff; margin: 0 0 6px 0; font-size: 1.1em;">Video Extraction</h3>
|
| 637 |
+
<p style="color: #444466; font-size: 0.9em; line-height: 1.5; margin: 0;">
|
| 638 |
+
FFmpeg extracts mono 16kHz audio from any video format before forensic scanning.
|
| 639 |
+
</p>
|
| 640 |
+
</div>
|
| 641 |
+
</div>
|
| 642 |
+
|
| 643 |
+
<h3 style="color: #c0c0e0; margin-bottom: 16px; font-size: 1.2em;">π Score Interpretation</h3>
|
| 644 |
+
<div style="background: #08080f; border-radius: 14px; padding: 20px; border: 1px solid #1a1a2e; margin-bottom: 28px;">
|
| 645 |
+
<div style="display: flex; align-items: center; margin-bottom: 10px; padding: 10px 14px; background: #0c0c14; border-radius: 10px; border-left: 4px solid #00ff88;">
|
| 646 |
+
<span style="color: #00ff88; font-weight: 700; min-width: 70px; font-size: 0.95em;">0β35%</span>
|
| 647 |
+
<span style="color: #444466; margin-left: 12px; font-size: 0.9em;">π’ Very likely genuine / human-created</span>
|
| 648 |
+
</div>
|
| 649 |
+
<div style="display: flex; align-items: center; margin-bottom: 10px; padding: 10px 14px; background: #0c0c14; border-radius: 10px; border-left: 4px solid #ffcc00;">
|
| 650 |
+
<span style="color: #ffcc00; font-weight: 700; min-width: 70px; font-size: 0.95em;">35β65%</span>
|
| 651 |
+
<span style="color: #444466; margin-left: 12px; font-size: 0.9em;">π‘ Uncertain / mixed signal β manual review recommended</span>
|
| 652 |
+
</div>
|
| 653 |
+
<div style="display: flex; align-items: center; padding: 10px 14px; background: #0c0c14; border-radius: 10px; border-left: 4px solid #ff4444;">
|
| 654 |
+
<span style="color: #ff4444; font-weight: 700; min-width: 70px; font-size: 0.95em;">65β100%</span>
|
| 655 |
+
<span style="color: #444466; margin-left: 12px; font-size: 0.9em;">π΄ Strong AI-generated / synthetic voice indicators</span>
|
| 656 |
+
</div>
|
| 657 |
+
</div>
|
| 658 |
+
|
| 659 |
+
<div style="background: #140a0a; border: 1px solid #331a1a; border-radius: 14px; padding: 20px; color: #884444; font-size: 0.88em; line-height: 1.6;">
|
| 660 |
+
<strong style="color: #cc5555;">β οΈ Important Limitations</strong><br><br>
|
| 661 |
+
No automated detector is 100% accurate. Adversarial AI models may evade detection.
|
| 662 |
+
Compressed or noisy audio reduces reliability. Always use human expert judgment for critical decisions.
|
| 663 |
+
</div>
|
| 664 |
+
</div>
|
| 665 |
""")
|
| 666 |
|
| 667 |
+
# Footer
|
| 668 |
+
gr.HTML("""
|
| 669 |
+
<div style="text-align: center; padding: 30px 20px; color: #2a2a44; font-size: 0.82em; border-top: 1px solid #12121a; margin-top: 10px;">
|
| 670 |
+
Neural Audio Forensics β’ Powered by HuggingFace Transformers & DSP Signal Processing
|
| 671 |
+
</div>
|
| 672 |
+
""")
|
| 673 |
+
|
| 674 |
return demo
|
| 675 |
|
| 676 |
|
|
|
|
| 679 |
demo.launch(
|
| 680 |
server_name="0.0.0.0",
|
| 681 |
server_port=7860,
|
| 682 |
+
share=False
|
| 683 |
)
|