Spaces:

Jabrave
/

deepfake-api

Running

App Files Files Community

deepfake-api / app.py

Jabrave

Update app.py

6495d4e verified 13 days ago

raw

history blame contribute delete

7.72 kB

	from transformers import AutoFeatureExtractor
	from transformers import AutoModelForAudioClassification
	import librosa
	from detect_face import detect_face
	from transformers import AutoModelForImageClassification
	from transformers import AutoImageProcessor
	from PIL import Image
	import torch
	import gradio as gr
	from extract_frames import extract_frames

	import os
	import shutil

	# =========================
	# โหลดโมเดลหลัก
	# =========================
	model = AutoModelForImageClassification.from_pretrained(
	"Jabrave/deepfake-detector"
	)

	processor = AutoImageProcessor.from_pretrained(
	"Jabrave/deepfake-detector"
	)

	# =========================
	# โหลดโมเดลใบหน้า
	# =========================
	face_model = AutoModelForImageClassification.from_pretrained(
	"Jabrave/face-detector"
	)

	face_processor = AutoImageProcessor.from_pretrained(
	"Jabrave/face-detector"
	)

	voice_model = AutoModelForAudioClassification.from_pretrained(
	"Jabrave/voice-detector"
	)

	voice_processor = AutoFeatureExtractor.from_pretrained(
	"Jabrave/voice-detector"
	)

	# =========================
	# function predict model
	# =========================
	def predict_with_model(image, model, processor):

	inputs = processor(
	images=image,
	return_tensors="pt"
	)

	with torch.no_grad():
	outputs = model(**inputs)

	logits = outputs.logits

	predicted_class = logits.argmax(-1).item()

	confidence = torch.softmax(
	logits,
	dim=1
	)[0][predicted_class].item()

	label = model.config.id2label[predicted_class]

	return {
	"label": label,
	"confidence": round(confidence * 100, 2)
	}


	def predict_audio(audio_path):

	waveform, sr = librosa.load(audio_path, sr=16000)

	inputs = voice_processor(
	waveform,
	sampling_rate=16000,
	return_tensors="pt"
	)

	with torch.no_grad():
	outputs = voice_model(**inputs)

	logits = outputs.logits
	predicted_class = logits.argmax(-1).item()

	confidence = torch.softmax(logits, dim=1)[0][predicted_class].item()

	label = voice_model.config.id2label[predicted_class]

	return {
	"label": label,
	"confidence": round(confidence * 100, 2)
	}

	# =========================
	# IMAGE PREDICT
	# =========================
	def predict(image):

	temp_path = "temp_image.jpg"

	Image.fromarray(image).save(temp_path)

	# ----------------------
	# วิเคราะห์ภาพเต็ม
	# ----------------------
	full_image = Image.open(temp_path)

	full_result = predict_with_model(
	full_image,
	model,
	processor
	)

	# ----------------------
	# detect faces
	# ----------------------
	os.makedirs("faces", exist_ok=True)

	faces = detect_face(temp_path)

	face_scores = []

	fake_face_found = False

	for face_path in faces:

	face_image = Image.open(face_path)

	face_result = predict_with_model(
	face_image,
	face_model,
	face_processor
	)

	face_scores.append(
	face_result["confidence"]
	)

	if face_result["label"] != "real":
	fake_face_found = True

	# ----------------------
	# combine score
	# ----------------------
	full_score = full_result["confidence"]

	avg_face_score = (
	sum(face_scores) / len(face_scores)
	if face_scores else full_score
	)

	final_score = (
	full_score + avg_face_score
	) / 2

	final_label = (
	"artificial"
	if (
	full_result["label"] != "real"
	or fake_face_found
	)
	else "real"
	)

	# cleanup
	if os.path.exists(temp_path):
	os.remove(temp_path)

	if os.path.exists("faces"):
	shutil.rmtree("faces")

	return {
	"label": final_label,
	"final_score": round(final_score, 2),
	"full_image_score": round(full_score, 2),
	"face_score": round(avg_face_score, 2),
	"faces_detected": len(faces)
	}

	# =========================
	# VIDEO PREDICT
	# =========================
	def predict_video(video_path):

	# cleanup folders
	if os.path.exists("frames"):
	shutil.rmtree("frames")

	if os.path.exists("faces"):
	shutil.rmtree("faces")

	os.makedirs("frames", exist_ok=True)
	os.makedirs("faces", exist_ok=True)

	# extract frames
	extract_frames(
	video_path,
	"frames"
	)

	frame_files = os.listdir("frames")

	fake_frames = 0

	total_frames = 0

	full_scores = []

	face_scores = []

	for frame in frame_files:

	frame_path = os.path.join(
	"frames",
	frame
	)

	# ----------------------
	# วิเคราะห์ภาพเต็ม
	# ----------------------
	frame_image = Image.open(frame_path)

	full_result = predict_with_model(
	frame_image,
	model,
	processor
	)

	full_scores.append(
	full_result["confidence"]
	)

	# ----------------------
	# detect faces
	# ----------------------
	faces = detect_face(frame_path)

	face_fake_found = False

	for face_path in faces:

	face_image = Image.open(face_path)

	face_result = predict_with_model(
	face_image,
	face_model,
	face_processor
	)

	face_scores.append(
	face_result["confidence"]
	)

	if face_result["label"] != "real":
	face_fake_found = True

	# ----------------------
	# final frame decision
	# ----------------------
	if (
	full_result["label"] != "real"
	or face_fake_found
	):
	fake_frames += 1

	total_frames += 1

	# ----------------------
	# final score
	# ----------------------
	avg_full = (
	sum(full_scores) / len(full_scores)
	if full_scores else 0
	)

	avg_face = (
	sum(face_scores) / len(face_scores)
	if face_scores else avg_full
	)

	final_score = (
	avg_full + avg_face
	) / 2

	final_label = (
	"artificial"
	if fake_frames > total_frames * 0.3
	else "real"
	)

	# cleanup
	if os.path.exists("frames"):
	shutil.rmtree("frames")

	if os.path.exists("faces"):
	shutil.rmtree("faces")

	return {
	"label": final_label,
	"final_score": round(final_score, 2),
	"fake_frames": fake_frames,
	"total_frames": total_frames,
	"full_image_score": round(avg_full, 2),
	"face_score": round(avg_face, 2)
	}

	# =========================
	# UI
	# =========================
	image_ui = gr.Interface(
	fn=predict,
	inputs=gr.Image(),
	outputs=gr.JSON(),
	title="Image Deepfake Detector"
	)

	video_ui = gr.Interface(
	fn=predict_video,
	inputs=gr.Video(),
	outputs=gr.JSON(),
	title="Video Deepfake Detector"
	)

	audio_ui = gr.Interface(
	fn=predict_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.JSON(),
	title="Voice Deepfake Detector"
	)

	demo = gr.TabbedInterface(
	[image_ui, video_ui, audio_ui],
	["Image", "Video", "Audio"]
	)

	demo.launch()