Spaces:
Running
Running
| from transformers import AutoFeatureExtractor | |
| from transformers import AutoModelForAudioClassification | |
| import librosa | |
| from detect_face import detect_face | |
| from transformers import AutoModelForImageClassification | |
| from transformers import AutoImageProcessor | |
| from PIL import Image | |
| import torch | |
| import gradio as gr | |
| from extract_frames import extract_frames | |
| import os | |
| import shutil | |
| # ========================= | |
| # โหลดโมเดลหลัก | |
| # ========================= | |
| model = AutoModelForImageClassification.from_pretrained( | |
| "Jabrave/deepfake-detector" | |
| ) | |
| processor = AutoImageProcessor.from_pretrained( | |
| "Jabrave/deepfake-detector" | |
| ) | |
| # ========================= | |
| # โหลดโมเดลใบหน้า | |
| # ========================= | |
| face_model = AutoModelForImageClassification.from_pretrained( | |
| "Jabrave/face-detector" | |
| ) | |
| face_processor = AutoImageProcessor.from_pretrained( | |
| "Jabrave/face-detector" | |
| ) | |
| voice_model = AutoModelForAudioClassification.from_pretrained( | |
| "Jabrave/voice-detector" | |
| ) | |
| voice_processor = AutoFeatureExtractor.from_pretrained( | |
| "Jabrave/voice-detector" | |
| ) | |
| # ========================= | |
| # function predict model | |
| # ========================= | |
| def predict_with_model(image, model, processor): | |
| inputs = processor( | |
| images=image, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| predicted_class = logits.argmax(-1).item() | |
| confidence = torch.softmax( | |
| logits, | |
| dim=1 | |
| )[0][predicted_class].item() | |
| label = model.config.id2label[predicted_class] | |
| return { | |
| "label": label, | |
| "confidence": round(confidence * 100, 2) | |
| } | |
| def predict_audio(audio_path): | |
| waveform, sr = librosa.load(audio_path, sr=16000) | |
| inputs = voice_processor( | |
| waveform, | |
| sampling_rate=16000, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| outputs = voice_model(**inputs) | |
| logits = outputs.logits | |
| predicted_class = logits.argmax(-1).item() | |
| confidence = torch.softmax(logits, dim=1)[0][predicted_class].item() | |
| label = voice_model.config.id2label[predicted_class] | |
| return { | |
| "label": label, | |
| "confidence": round(confidence * 100, 2) | |
| } | |
| # ========================= | |
| # IMAGE PREDICT | |
| # ========================= | |
| def predict(image): | |
| temp_path = "temp_image.jpg" | |
| Image.fromarray(image).save(temp_path) | |
| # ---------------------- | |
| # วิเคราะห์ภาพเต็ม | |
| # ---------------------- | |
| full_image = Image.open(temp_path) | |
| full_result = predict_with_model( | |
| full_image, | |
| model, | |
| processor | |
| ) | |
| # ---------------------- | |
| # detect faces | |
| # ---------------------- | |
| os.makedirs("faces", exist_ok=True) | |
| faces = detect_face(temp_path) | |
| face_scores = [] | |
| fake_face_found = False | |
| for face_path in faces: | |
| face_image = Image.open(face_path) | |
| face_result = predict_with_model( | |
| face_image, | |
| face_model, | |
| face_processor | |
| ) | |
| face_scores.append( | |
| face_result["confidence"] | |
| ) | |
| if face_result["label"] != "real": | |
| fake_face_found = True | |
| # ---------------------- | |
| # combine score | |
| # ---------------------- | |
| full_score = full_result["confidence"] | |
| avg_face_score = ( | |
| sum(face_scores) / len(face_scores) | |
| if face_scores else full_score | |
| ) | |
| final_score = ( | |
| full_score + avg_face_score | |
| ) / 2 | |
| final_label = ( | |
| "artificial" | |
| if ( | |
| full_result["label"] != "real" | |
| or fake_face_found | |
| ) | |
| else "real" | |
| ) | |
| # cleanup | |
| if os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| if os.path.exists("faces"): | |
| shutil.rmtree("faces") | |
| return { | |
| "label": final_label, | |
| "final_score": round(final_score, 2), | |
| "full_image_score": round(full_score, 2), | |
| "face_score": round(avg_face_score, 2), | |
| "faces_detected": len(faces) | |
| } | |
| # ========================= | |
| # VIDEO PREDICT | |
| # ========================= | |
| def predict_video(video_path): | |
| # cleanup folders | |
| if os.path.exists("frames"): | |
| shutil.rmtree("frames") | |
| if os.path.exists("faces"): | |
| shutil.rmtree("faces") | |
| os.makedirs("frames", exist_ok=True) | |
| os.makedirs("faces", exist_ok=True) | |
| # extract frames | |
| extract_frames( | |
| video_path, | |
| "frames" | |
| ) | |
| frame_files = os.listdir("frames") | |
| fake_frames = 0 | |
| total_frames = 0 | |
| full_scores = [] | |
| face_scores = [] | |
| for frame in frame_files: | |
| frame_path = os.path.join( | |
| "frames", | |
| frame | |
| ) | |
| # ---------------------- | |
| # วิเคราะห์ภาพเต็ม | |
| # ---------------------- | |
| frame_image = Image.open(frame_path) | |
| full_result = predict_with_model( | |
| frame_image, | |
| model, | |
| processor | |
| ) | |
| full_scores.append( | |
| full_result["confidence"] | |
| ) | |
| # ---------------------- | |
| # detect faces | |
| # ---------------------- | |
| faces = detect_face(frame_path) | |
| face_fake_found = False | |
| for face_path in faces: | |
| face_image = Image.open(face_path) | |
| face_result = predict_with_model( | |
| face_image, | |
| face_model, | |
| face_processor | |
| ) | |
| face_scores.append( | |
| face_result["confidence"] | |
| ) | |
| if face_result["label"] != "real": | |
| face_fake_found = True | |
| # ---------------------- | |
| # final frame decision | |
| # ---------------------- | |
| if ( | |
| full_result["label"] != "real" | |
| or face_fake_found | |
| ): | |
| fake_frames += 1 | |
| total_frames += 1 | |
| # ---------------------- | |
| # final score | |
| # ---------------------- | |
| avg_full = ( | |
| sum(full_scores) / len(full_scores) | |
| if full_scores else 0 | |
| ) | |
| avg_face = ( | |
| sum(face_scores) / len(face_scores) | |
| if face_scores else avg_full | |
| ) | |
| final_score = ( | |
| avg_full + avg_face | |
| ) / 2 | |
| final_label = ( | |
| "artificial" | |
| if fake_frames > total_frames * 0.3 | |
| else "real" | |
| ) | |
| # cleanup | |
| if os.path.exists("frames"): | |
| shutil.rmtree("frames") | |
| if os.path.exists("faces"): | |
| shutil.rmtree("faces") | |
| return { | |
| "label": final_label, | |
| "final_score": round(final_score, 2), | |
| "fake_frames": fake_frames, | |
| "total_frames": total_frames, | |
| "full_image_score": round(avg_full, 2), | |
| "face_score": round(avg_face, 2) | |
| } | |
| # ========================= | |
| # UI | |
| # ========================= | |
| image_ui = gr.Interface( | |
| fn=predict, | |
| inputs=gr.Image(), | |
| outputs=gr.JSON(), | |
| title="Image Deepfake Detector" | |
| ) | |
| video_ui = gr.Interface( | |
| fn=predict_video, | |
| inputs=gr.Video(), | |
| outputs=gr.JSON(), | |
| title="Video Deepfake Detector" | |
| ) | |
| audio_ui = gr.Interface( | |
| fn=predict_audio, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=gr.JSON(), | |
| title="Voice Deepfake Detector" | |
| ) | |
| demo = gr.TabbedInterface( | |
| [image_ui, video_ui, audio_ui], | |
| ["Image", "Video", "Audio"] | |
| ) | |
| demo.launch() |