Spaces:

UAJK-Practice
/

DEEPFAKEAI

Sleeping

App Files Files Community

DEEPFAKEAI / app.py

Khalil09

Create app.py

5249f52 verified 29 days ago

raw

history blame contribute delete

14.4 kB

	# app.py — This is your main Hugging Face Spaces app
	import gradio as gr
	import torch
	import numpy as np
	import librosa
	import cv2
	import os
	import matplotlib.pyplot as plt
	import matplotlib
	matplotlib.use('Agg')
	from PIL import Image
	import io
	import base64
	import warnings
	warnings.filterwarnings('ignore')

	# ─── Paste your model classes here (or import them) ──────────────────────────
	# Copy the AudioDeepfakeDetector and VideoDeepfakeDetector classes from above
	# OR use import statements if you structured it as a package:
	# from model.audio_detector import AudioDeepfakeDetector, extract_audio_features, predict_audio
	# from model.video_detector import VideoDeepfakeDetector, predict_video

	# For Spaces, we'll use lightweight pretrained models from HuggingFace Hub
	# as fallback if custom models aren't trained yet:

	from transformers import pipeline

	# Load pre-trained audio classifier (for demo)
	# For production, replace with your trained model weights
	AUDIO_MODEL_PATH = "audio_model.pth"
	VIDEO_MODEL_PATH = "video_model.pth"

	USE_TRAINED_MODEL = os.path.exists(AUDIO_MODEL_PATH)

	# ─── Fallback: Use transformers pipeline ─────────────────────────────────────
	def analyze_audio_transformers(audio_path):
	"""
	Uses a HuggingFace pipeline for audio classification.
	Replace this with your trained model for better accuracy.
	"""
	try:
	classifier = pipeline(
	"audio-classification",
	model="facebook/wav2vec2-base",
	# For real deepfake detection use:
	# model="mo-aqrabi/deepfake-audio-detection"
	)
	# This is a placeholder — replace with actual deepfake model
	# For now returns heuristic based on spectral analysis
	y, sr = librosa.load(audio_path, sr=16000, duration=5)
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	zcr = librosa.feature.zero_crossing_rate(y)
	spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

	# Heuristic: AI audio tends to have very regular ZCR and centroid
	zcr_regularity = 1 - (np.std(zcr) / (np.mean(zcr) + 1e-6))
	spectral_regularity = 1 - (np.std(spectral_centroid) / (np.mean(spectral_centroid) + 1e-6))

	# Combine into a rough AI score
	ai_score = np.clip((zcr_regularity + spectral_regularity) / 2, 0, 1)
	return float(ai_score)
	except Exception as e:
	print(f"Fallback audio analysis error: {e}")
	return 0.5 # Uncertain


	# ─── Main Detection Functions ─────────────────────────────────────────────────

	def detect_audio(audio_file):
	"""Full audio analysis pipeline."""
	if audio_file is None:
	return None, "❌ No audio file provided", None

	try:
	if USE_TRAINED_MODEL:
	score = predict_audio(audio_file, AUDIO_MODEL_PATH)
	else:
	score = analyze_audio_transformers(audio_file)

	percentage = score * 100
	verdict = "🤖 AI GENERATED" if score > 0.5 else "✅ REAL / HUMAN"
	confidence = max(score, 1 - score) * 100

	# Generate waveform + spectrogram plot
	y, sr = librosa.load(audio_file, sr=16000, duration=10)
	fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6))
	fig.patch.set_facecolor('#0d0d0d')

	# Waveform
	ax1.set_facecolor('#1a1a2e')
	librosa.display.waveshow(y, sr=sr, ax=ax1, color='#00d4ff')
	ax1.set_title('Audio Waveform', color='white', fontsize=12)
	ax1.tick_params(colors='white')

	# Mel Spectrogram
	ax2.set_facecolor('#1a1a2e')
	mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
	mel_db = librosa.power_to_db(mel, ref=np.max)
	img = librosa.display.specshow(mel_db, sr=sr, ax=ax2,
	x_axis='time', y_axis='mel', cmap='magma')
	plt.colorbar(img, ax=ax2, format='%+2.0f dB')
	ax2.set_title('Mel Spectrogram', color='white', fontsize=12)
	ax2.tick_params(colors='white')
	ax2.yaxis.label.set_color('white')
	ax2.xaxis.label.set_color('white')

	plt.tight_layout()
	plot_path = '/tmp/audio_analysis.png'
	plt.savefig(plot_path, facecolor='#0d0d0d', bbox_inches='tight')
	plt.close()

	result_text = f"""
	## 🔊 Audio Analysis Result

	\| Metric \| Value \|
	\|--------\|-------\|
	\| AI Probability \| {percentage:.1f}% \|
	\| Verdict \| {verdict} \|
	\| Confidence \| {confidence:.1f}% \|
	\| Status \| {'⚠️ HIGH RISK' if percentage > 75 else '🟡 SUSPICIOUS' if percentage > 50 else '🟢 LIKELY REAL'} \|

	### Interpretation
	- 0-30%: Very likely genuine/human-created
	- 30-50%: Possibly human, some AI characteristics
	- 50-70%: Likely AI-generated, needs verification
	- 70-100%: Almost certainly AI-generated
	"""
	return plot_path, result_text, f"{percentage:.1f}%"

	except Exception as e:
	return None, f"❌ Error analyzing audio: {str(e)}", "N/A"


	def detect_video(video_file):
	"""Full video analysis pipeline."""
	if video_file is None:
	return None, "❌ No video file provided", None

	try:
	if USE_TRAINED_MODEL and os.path.exists(VIDEO_MODEL_PATH):
	result = predict_video(video_file, VIDEO_MODEL_PATH)
	score = result['final_score']
	frame_scores = result['per_frame_scores']
	temporal = result['temporal_score']
	else:
	# Fallback: basic frame analysis
	score, frame_scores, temporal = analyze_video_basic(video_file)

	percentage = score * 100
	verdict = "🤖 AI GENERATED" if score > 0.5 else "✅ REAL / HUMAN"

	# Generate frame score visualization
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
	fig.patch.set_facecolor('#0d0d0d')

	# Frame scores over time
	ax1.set_facecolor('#1a1a2e')
	frames_x = range(len(frame_scores))
	ax1.plot(frames_x, [f * 100 for f in frame_scores], 'cyan', linewidth=2)
	ax1.fill_between(frames_x, [f * 100 for f in frame_scores], alpha=0.3, color='cyan')
	ax1.axhline(y=50, color='red', linestyle='--', alpha=0.7, label='Threshold (50%)')
	ax1.set_facecolor('#1a1a2e')
	ax1.set_xlabel('Frame', color='white')
	ax1.set_ylabel('AI Score (%)', color='white')
	ax1.set_title('Per-Frame AI Score', color='white')
	ax1.tick_params(colors='white')
	ax1.legend(facecolor='#1a1a2e', labelcolor='white')
	ax1.set_ylim(0, 100)

	# Score breakdown pie
	ax2.set_facecolor('#1a1a2e')
	sizes = [score * 100, (1 - score) * 100]
	colors_pie = ['#FF4444', '#00CC44']
	wedges, texts, autotexts = ax2.pie(sizes, labels=['AI Generated', 'Real/Human'],
	colors=colors_pie, autopct='%1.1f%%',
	textprops={'color': 'white', 'fontsize': 12},
	startangle=90)
	ax2.set_title(f'Final Verdict: {verdict}', color='white', fontsize=13)

	plt.tight_layout()
	plot_path = '/tmp/video_analysis.png'
	plt.savefig(plot_path, facecolor='#0d0d0d', bbox_inches='tight')
	plt.close()

	result_text = f"""
	## 🎬 Video Analysis Result

	\| Metric \| Value \|
	\|--------\|-------\|
	\| AI Probability \| {percentage:.1f}% \|
	\| Verdict \| {verdict} \|
	\| Frames Analyzed \| {len(frame_scores)} \|
	\| Temporal Inconsistency \| {temporal*100:.1f}% \|
	\| Status \| {'⚠️ HIGH RISK' if percentage > 75 else '🟡 SUSPICIOUS' if percentage > 50 else '🟢 LIKELY REAL'} \|

	### What we checked:
	- ✓ Face region analysis per frame
	- ✓ Temporal consistency between frames
	- ✓ Artifact patterns typical of AI generation
	- ✓ Natural motion flow analysis
	"""
	return plot_path, result_text, f"{percentage:.1f}%"

	except Exception as e:
	return None, f"❌ Error analyzing video: {str(e)}", "N/A"


	def analyze_video_basic(video_path):
	"""Basic video analysis fallback without trained model."""
	cap = cv2.VideoCapture(video_path)
	total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	indices = np.linspace(0, total - 1, 20, dtype=int)

	frame_scores = []
	prev_gray = None

	for idx in indices:
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame = cap.read()
	if not ret:
	continue
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	if prev_gray is not None:
	# Check for unnatural sharpness transitions (common in deepfakes)
	diff = cv2.absdiff(gray, prev_gray)
	score = float(np.std(diff)) / 50.0
	score = np.clip(score, 0, 1)
	frame_scores.append(score)
	prev_gray = gray

	cap.release()

	if not frame_scores:
	return 0.5, [0.5], 0.5

	avg_score = np.mean(frame_scores)
	temporal = float(np.std(frame_scores))
	return avg_score, frame_scores, min(temporal, 1.0)


	# ─── Gradio UI ────────────────────────────────────────────────────────────────

	def build_ui():
	with gr.Blocks(
	title="🔍 DeepFake AI Detector",
	theme=gr.themes.Base(
	primary_hue="blue",
	neutral_hue="slate",
	),
	css="""
	.gradio-container { max-width: 1100px; margin: auto; }
	.result-box { border-radius: 12px; padding: 16px; }
	h1 { text-align: center; }
	.score-display { font-size: 48px; font-weight: bold; text-align: center; }
	"""
	) as demo:

	gr.HTML("""
	<div style="text-align:center; padding: 20px 0;">
	<h1 style="font-size: 2.5em; font-weight: 800;">🔍 DeepFake AI Detector</h1>
	<p style="font-size: 1.1em; color: #888;">
	Upload audio or video to detect AI generation.<br>
	Get a precise percentage score of how much AI was used.
	</p>
	</div>
	""")

	with gr.Tabs():

	# ── Audio Tab ──────────────────────────────────────────────────────
	with gr.TabItem("🔊 Audio Detection"):
	gr.Markdown("### Upload an audio file to check if it's AI-generated")
	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	label="Upload Audio (MP3, WAV, M4A)",
	type="filepath"
	)
	audio_btn = gr.Button("🔍 Analyze Audio", variant="primary", size="lg")

	with gr.Column(scale=2):
	audio_score = gr.Label(label="🎯 AI Score", num_top_classes=1)
	audio_plot = gr.Image(label="📊 Audio Analysis")
	audio_result = gr.Markdown(label="📋 Detailed Report")

	audio_btn.click(
	fn=detect_audio,
	inputs=[audio_input],
	outputs=[audio_plot, audio_result, audio_score]
	)

	gr.Examples(
	examples=[],
	inputs=audio_input,
	label="Example files (add your own samples)"
	)

	# ── Video Tab ──────────────────────────────────────────────────────
	with gr.TabItem("🎬 Video Detection"):
	gr.Markdown("### Upload a video file to check if it's a deepfake")
	with gr.Row():
	with gr.Column(scale=1):
	video_input = gr.Video(
	label="Upload Video (MP4, AVI, MOV)",
	)
	video_btn = gr.Button("🔍 Analyze Video", variant="primary", size="lg")

	with gr.Column(scale=2):
	video_score = gr.Label(label="🎯 AI Score", num_top_classes=1)
	video_plot = gr.Image(label="📊 Frame Analysis")
	video_result = gr.Markdown(label="📋 Detailed Report")

	video_btn.click(
	fn=detect_video,
	inputs=[video_input],
	outputs=[video_plot, video_result, video_score]
	)

	# ── About Tab ─────────────────────────────────────────────────────
	with gr.TabItem("ℹ️ How It Works"):
	gr.Markdown("""
	## 🧠 Detection Methodology

	### Audio Analysis
	\| Feature \| What it detects \|
	\|---------\|----------------\|
	\| MFCC (40 coefficients) \| Unnatural vocal tract patterns \|
	\| Mel Spectrogram \| Frequency distribution anomalies \|
	\| Zero Crossing Rate \| Overly smooth AI transitions \|
	\| Spectral Centroid \| Frequency center shifts \|
	\| Tonnetz \| Harmonic content irregularities \|

	### Video Analysis
	\| Method \| What it detects \|
	\|--------\|----------------\|
	\| EfficientNet-B4 CNN \| Per-frame visual artifacts \|
	\| Optical Flow \| Temporal inconsistencies \|
	\| Face Detection \| Blending boundary anomalies \|
	\| Ensemble Scoring \| Combined confidence score \|

	### Score Interpretation
	- 0-30%: 🟢 Very likely genuine
	- 30-50%: 🟡 Some AI characteristics, inconclusive
	- 50-70%: 🟠 Likely AI-generated
	- 70-100%: 🔴 Almost certainly AI-generated

	### ⚠️ Limitations
	- No detector is 100% accurate
	- Newer AI models may evade detection
	- Low quality media reduces accuracy
	- Always combine with human judgment
	""")

	return demo


	if __name__ == "__main__":
	demo = build_ui()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True # Creates public URL for testing
	)