faisalishfaq2005 commited on
Commit
31ad805
·
1 Parent(s): 8fbfa4a

uploading data on huggingface

Browse files
Files changed (6) hide show
  1. README.md +115 -1
  2. config.json +14 -0
  3. inference.py +91 -0
  4. model.pth +3 -0
  5. model.py +0 -0
  6. requirements.txt +0 -0
README.md CHANGED
@@ -1,3 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deepfake Detection with Improved EfficientViT
2
+
3
+ ## Model Architecture
4
+
5
+ ![Model Architecture](assets/architecture.png)
6
+
7
+ ## Inference Pipeline
8
+
9
+ ![Inference Pipeline](assets/inference_pipeline.png)
10
+
11
+
12
+ This repository contains a **PyTorch model for deepfake detection** based on an improved **EfficientViT** architecture, trained on video data.
13
+
14
+ The model predicts whether a video is **real (0)** or **fake (1)** using both visual information and temporal cues.
15
+
16
  ---
17
+
18
+ ## 🧩 Model Description
19
+
20
+ **Architecture:** Improved EfficientViT
21
+ **Backbone:** EfficientNet-B0 for feature extraction
22
+ **Head:** Transformer-based temporal modeling with classification head
23
+ **Input:** Video frames (224×224 RGB images)
24
+ **Output:** Binary label (0=Real, 1=Fake) and frame-level probabilities
25
+
26
+ **Key Features:**
27
+
28
+ - Extracts faces from frames using MTCNN
29
+ - Supports inference on raw video files
30
+ - Provides frame-level probabilities for fine-grained analysis
31
+
32
  ---
33
+
34
+ ## 📁 Repository Structure
35
+
36
+ ```
37
+ deepfake-efficientvit/
38
+
39
+ ├── model.py # ImprovedEfficientViT class
40
+ ├── inference.py # Functions to run inference on videos
41
+ ├── model.pth # Trained weights
42
+ ├── config.json # Optional model metadata
43
+ ├── requirements.txt # Required packages
44
+ ├── README.md
45
+
46
+ ```
47
+
48
+ ## ⚡ Installation
49
+ git clone https://huggingface.co/faisalishfaq2005/deepfake-detection-efficientnet-vit
50
+
51
+ cd deepfake-detection-efficientnet-vit
52
+
53
+ pip install -r requirements.txt
54
+
55
+ ## 🚀 Usage
56
+ # 1.Programmatic Inference
57
+
58
+ ```python
59
+
60
+ from huggingface_hub import hf_hub_download
61
+ import torch
62
+ from model import ImprovedEfficientViT
63
+ from inference import predict_vedio # your inference function
64
+
65
+ # 1️⃣ Download the checkpoint from Hugging Face
66
+ checkpoint_path = hf_hub_download(
67
+ repo_id="faisalishfaq2005/deepfake-detection-efficientnet-vit",
68
+ filename="model.pth"
69
+ )
70
+
71
+ # 2️⃣ Load the model
72
+ model = ImprovedEfficientViT()
73
+ model.load_state_dict(torch.load(checkpoint_path, map_location="cpu"))
74
+ model.eval()
75
+
76
+ # 3️⃣ Run inference on a video
77
+ video_path = "sample_video.mp4"
78
+ result = predict_vedio(video_path, model)
79
+ print(result)
80
+ # Example Output: {'class': 1}
81
+
82
+
83
+ ```
84
+ # 2. Manual Download
85
+
86
+ Go to the Hugging Face model page
87
+
88
+ Download:
89
+
90
+ model.pth
91
+
92
+ model.py
93
+
94
+ inference.py
95
+
96
+ Place them in the same folder locally.
97
+
98
+ Install requirements and run predict_video().
99
+
100
+ ## 📄 License
101
+
102
+ This model is released under the MIT License.
103
+ You are free to use, modify, and distribute it, with attribution.
104
+
105
+ ## 📚 Citation
106
+
107
+ If you use this model in your research, please cite:
108
+
109
+ ```bibtex
110
+ @inproceedings{faisalishfaq2025efficientvit,
111
+ title={Deepfake Detection with Efficientnet and ViT},
112
+ author={Faisal Ishfaq},
113
+ year={2025}
114
+ }
115
+ ```
116
+
117
+
config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "efficientnetb0_Vit_blocks_multi_head_attention",
3
+ "framework": "pytorch",
4
+ "architecture": {
5
+ "input": {
6
+ "video_frames":"20 frames per video" ,
7
+ "image_size": [224, 224]
8
+
9
+ },
10
+ "output_classes": ["real", "fake"]
11
+ },
12
+ "pretrained": true,
13
+ "model_file": "model.pth"
14
+ }
inference.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torchvision import transforms
2
+ import torch
3
+ from PIL import Image
4
+ from model import ImprovedEfficientViT
5
+
6
+ import os
7
+ import cv2
8
+ from mtcnn import MTCNN
9
+
10
+ def extract_faces(video_path, target_frames=20):
11
+
12
+ detector = MTCNN()
13
+
14
+ cap = cv2.VideoCapture(video_path)
15
+ if not cap.isOpened():
16
+ print(f"Error: Could not open video {video_path}")
17
+ return []
18
+
19
+ fps = cap.get(cv2.CAP_PROP_FPS)
20
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
21
+
22
+ frame_interval = max(total_frames // target_frames, 1)
23
+
24
+ face_images = []
25
+
26
+ for i in range(0, total_frames, frame_interval):
27
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
28
+ ret, frame = cap.read()
29
+ if not ret:
30
+ continue
31
+
32
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
33
+ faces = detector.detect_faces(rgb_frame)
34
+
35
+ for face in faces:
36
+ if face['confidence'] < 0.9:
37
+ continue
38
+ x, y, w, h = face['box']
39
+ x, y = max(x, 0), max(y, 0)
40
+ face_img = rgb_frame[y:y+h, x:x+w]
41
+
42
+ if face_img.size == 0:
43
+ continue
44
+
45
+ face_img = cv2.resize(face_img, (224, 224))
46
+ face_images.append(face_img)
47
+
48
+ cap.release()
49
+ return face_images
50
+
51
+ from torchvision import transforms
52
+ transform_vedio=transforms.Compose([
53
+ transforms.ToPILImage(),
54
+ transforms.Resize((224,224)),
55
+ transforms.ToTensor(),
56
+ transforms.Normalize(mean=[0.5],std=[0.5])
57
+
58
+ ])
59
+
60
+
61
+ def predict_vedio(video_path,model_vedio):
62
+
63
+ pred_list = []
64
+ prob_list=[]
65
+
66
+ faces = extract_faces(video_path, target_frames=20)
67
+
68
+ transformed_faces = [transform_vedio(face) for face in faces]
69
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
70
+ model_vedio.to(device)
71
+
72
+ for face in transformed_faces:
73
+ face = face.to(device).unsqueeze(0)
74
+
75
+ with torch.no_grad():
76
+ logit = model_vedio(face)
77
+ prob = torch.sigmoid(logit)
78
+ pred = int(prob.item() > 0.5)
79
+ pred_list.append(pred)
80
+ prob_list.append(prob)
81
+
82
+ count=0
83
+ for ele in pred_list:
84
+ if ele==0:
85
+ count+=1
86
+
87
+ predicted_class=0 if count>3 else 1
88
+ return{
89
+ "class":predicted_class
90
+ }
91
+
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734e07e648d846edff79edc9a25fb35ae3d885b732a12032698ef70948a47904
3
+ size 66414828
model.py ADDED
File without changes
requirements.txt ADDED
File without changes