shunliwang commited on May 7

Commit

8bc3305

1 Parent(s): 2e0a93e

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -35
README.md +271 -3
preprocessing/config.yaml +52 -0
preprocessing/dataset2lmdb_test.py +99 -0
preprocessing/dataset_json/Celeb-DF-v2.json +3 -0
preprocessing/dataset_json/DF40_all.json +3 -0
preprocessing/dataset_json/DFDC.json +3 -0
preprocessing/dataset_json/DFDCP.json +3 -0
preprocessing/dataset_json/DeepFakeDetection.json +3 -0
preprocessing/dataset_json/DiffFace.json +3 -0
preprocessing/dataset_json/DreamBooth.json +3 -0
preprocessing/dataset_json/FF-DF.json +3 -0
preprocessing/dataset_json/FF-F2F.json +3 -0
preprocessing/dataset_json/FF-FS.json +3 -0
preprocessing/dataset_json/FF-NT.json +3 -0
preprocessing/dataset_json/FaceForensics++.json +3 -0
preprocessing/dataset_json/FaceShifter.json +3 -0
preprocessing/dataset_json/GPT4o.json +3 -0
preprocessing/dataset_json/HPS.json +3 -0
preprocessing/dataset_json/Hart.json +3 -0
preprocessing/dataset_json/Imagic.json +3 -0
preprocessing/dataset_json/Infinity.json +3 -0
preprocessing/dataset_json/LoRA.json +3 -0
preprocessing/dataset_json/MidJourney.json +3 -0
preprocessing/dataset_json/Midjourney_diff.json +3 -0
preprocessing/dataset_json/SRI.json +3 -0
preprocessing/dataset_json/SRI_hq.json +3 -0
preprocessing/dataset_json/abstract_dataset.py +668 -0
preprocessing/dataset_json/gpa.json +3 -0
preprocessing/dataset_json/heygen.json +3 -0
preprocessing/dataset_json/others/Chameleon.json +3 -0
preprocessing/dataset_json/others/CoDiff.json +3 -0
preprocessing/dataset_json/others/CollabDiff.json +3 -0
preprocessing/dataset_json/others/DCFace.json +3 -0
preprocessing/dataset_json/others/DeeperForensics-1.0.json +3 -0
preprocessing/dataset_json/others/DiT_cdf.json +3 -0
preprocessing/dataset_json/others/DiT_ff.json +3 -0
preprocessing/dataset_json/others/EFSAll_cdf.json +3 -0
preprocessing/dataset_json/others/EFSAll_ff.json +3 -0
preprocessing/dataset_json/others/FRAll_cdf.json +3 -0
preprocessing/dataset_json/others/FRAll_ff.json +3 -0
preprocessing/dataset_json/others/FSAll_cdf.json +3 -0
preprocessing/dataset_json/others/FSAll_ff.json +3 -0
preprocessing/dataset_json/others/FaceForensics++_vae.json +3 -0
preprocessing/dataset_json/others/FreeDoM_I.json +3 -0
preprocessing/dataset_json/others/FreeDoM_T.json +3 -0
preprocessing/dataset_json/others/MRAA_cdf.json +3 -0
preprocessing/dataset_json/others/MRAA_ff.json +3 -0
preprocessing/dataset_json/others/SDXL.json +3 -0
preprocessing/dataset_json/others/SDXL_Refine.json +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,2 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ *.json filter=lfs diff=lfs merge=lfs -text
2	+ *.dat filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,271 @@
----
-license: mit
----

+# DFG - Deepfake Genome Codebase
+## 1. Environment Setup
+Create and activate the conda environment:
+```bash
+# Create a new conda environment (Python 3.10 recommended)
+conda create -n dfg python=3.10 -y
+# Activate the environment
+conda activate dfg
+# Install dependencies
+pip install -r requirements.txt
+```
+## 2. Dataset Configuration
+Before training or testing, you need to update the **dataset global path** to match your actual data location.
+Open `training/dataset/abstract_dataset.py` and modify the `DATASET_GLOBAL_PATH` variable:
+```python
+# Change this to your actual dataset root path
+DATASET_GLOBAL_PATH = "/your/actual/dataset/path/"
+```
+This path should point to the root directory containing your deepfake detection datasets (e.g., `DeepFakeGenome`, `deepfake_detecton_dataset`, etc.).
+## 3. Project and Dataset Structure
+```
+DFG/
+├── preprocessing/
+│   └── dataset_json/          # Dataset index JSON files
+│       ├── protocol_2_train.json
+│       ├── protocol_2_test.json
+│       ├── protocol_3_test.json
+│       ├── protocol_4_test.json
+│       └── ...
+├── training/
+│   ├── config/
+│   │   └── detector/          # Detector config YAML files
+│   ├── detectors/             # Detector implementations
+│   │   ├── __init__.py        # Register all detectors here
+│   │   ├── base_detector.py
+│   │   └── ...
+│   ├── networks/              # Backbone network implementations
+│   ├── loss/                  # Loss function definitions
+│   ├── metrics/               # Evaluation metrics
+│   ├── train.py               # Training entry point
+│   └── test_pall.py           # Testing entry point
+├── train.sh                   # Training script examples
+├── test.sh                    # Testing script examples
+├── requirements.txt           # Python dependencies
+└── README.md
+```
+## 4. Training
+Refer to `train.sh` for all training commands. Example:
+```bash
+python -m torch.distributed.launch --master_port=29503 --nproc_per_node=8 training/train.py \
+    --detector_path ./training/config/detector/clip_large_fft.yaml \
+    --no-save_feat --ddp
+```
+Key arguments:
+- `--master_port`: port for distributed training (change if port conflicts occur)
+- `--nproc_per_node`: number of GPUs
+- `--detector_path`: path to the detector config YAML
+- `--no-save_feat`: disable feature saving during training
+- `--ddp`: enable DistributedDataParallel
+## 5. Testing
+Refer to `test.sh` for all testing commands. Example:
+```bash
+# Test on protocol 2 & 3
+python -m torch.distributed.launch --master_port=29510 --nproc_per_node=8 training/test_pall.py --ddp \
+    --test_dataset "protocol_2_test" "protocol_3_test" \
+    --detector_path ./training/config/detector/clip_large_fft.yaml \
+    --weights_path logs/clip_models/clip_large_fft_2025-11-08-13-56-51
+# Test on protocol 4
+python -m torch.distributed.launch --master_port=29512 --nproc_per_node=8 training/test_pall.py --ddp \
+    --test_dataset "protocol_4_test" \
+    --detector_path ./training/config/detector/clip_large_fft.yaml \
+    --weights_path logs/clip_models/clip_large_fft_2025-11-08-13-56-51 \
+    --test_config test_config_p4.yaml
+```
+Key arguments:
+- `--test_dataset`: one or more dataset names (must match JSON filenames under `preprocessing/dataset_json/`)
+- `--weights_path`: path to trained model checkpoint directory
+- `--test_config`: additional test configuration (required for protocol 4)
+## 6. Adding a Custom Detector
+To integrate your own detector into the framework, follow these three steps:
+### Step 1: Create the detector config YAML
+Create a new file under `training/config/detector/`, e.g., `my_detector.yaml`:
+```yaml
+# log dir
+log_dir: logs/my_detector
+# model setting
+pretrained: null
+model_name: my_detector
+backbone_name: resnet34
+# backbone setting
+backbone_config:
+  mode: original
+  num_classes: 2
+  inc: 3
+  dropout: false
+# dataset
+all_dataset: [FaceForensics++, FF-F2F, FF-DF, FF-FS, FF-NT, FaceShifter, DeepFakeDetection, Celeb-DF-v1, Celeb-DF-v2, DFDCP, DFDC, DeeperForensics-1.0, UADFV]
+train_dataset: [protocol_2_train]
+test_dataset: [protocol_2_test]
+compression: c23
+train_batchSize: 64
+test_batchSize: 64
+workers: 8
+frame_num: {'train': 16, 'test': 16}
+resolution: 224
+with_mask: false
+with_landmark: false
+# data augmentation
+use_data_augmentation: false
+data_aug:
+  flip_prob: 0.5
+  rotate_prob: 0.5
+  rotate_limit: [-10, 10]
+  blur_prob: 0.5
+  blur_limit: [3, 7]
+  brightness_prob: 0.5
+  brightness_limit: [-0.1, 0.1]
+  contrast_limit: [-0.1, 0.1]
+  quality_lower: 40
+  quality_upper: 100
+# mean and std for normalization
+mean: [0.485, 0.456, 0.406]
+std: [0.229, 0.224, 0.225]
+# optimizer config
+optimizer:
+  type: adam
+  adam:
+    lr: 0.0002
+    beta1: 0.9
+    beta2: 0.999
+    eps: 0.00000001
+    weight_decay: 0.0005
+    amsgrad: false
+# training config
+lr_scheduler: null
+nEpochs: 20
+start_epoch: 0
+save_epoch: 1
+rec_iter: 100
+logdir: ./logs
+manualSeed: 1024
+save_ckpt: true
+save_feat: true
+# loss function
+loss_func: cross_entropy
+losstype: null
+# metric
+metric_scoring: auc
+# cuda
+ngpu: 1
+cuda: true
+cudnn: true
+save_avg: true
+save_latest_ckpt: true
+```
+### Step 2: Create the detector Python file
+Create `training/detectors/my_detector.py`:
+```python
+import torch
+import torch.nn as nn
+from metrics.base_metrics_class import calculate_metrics_for_train
+from .base_detector import AbstractDetector
+from detectors import DETECTOR
+from networks import BACKBONE
+from loss import LOSSFUNC
+@DETECTOR.register_module(module_name='my_detector')
+class MyDetector(AbstractDetector):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.backbone = self.build_backbone(config)
+        self.loss_func = LOSSFUNC[config['loss_func']]()
+    def build_backbone(self, config):
+        backbone = BACKBONE[config['backbone_name']](config['backbone_config'])
+        return backbone
+    def features(self, data_dict: dict) -> torch.Tensor:
+        return self.backbone(data_dict['image'])
+    def classifier(self, features: torch.Tensor) -> torch.Tensor:
+        return self.fc(features)
+    def get_losses(self, data_dict: dict, pred_dict: dict) -> dict:
+        label = data_dict['label']
+        pred = pred_dict['cls']
+        loss = self.loss_func(pred, label)
+        return {'overall': loss}
+    def get_train_metrics(self, data_dict: dict, pred_dict: dict) -> dict:
+        label = data_dict['label']
+        pred = pred_dict['cls']
+        auc, eer, acc, ap = calculate_metrics_for_train(label.detach(), pred.detach())
+        return {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap}
+    def forward(self, data_dict: dict, inference=False) -> dict:
+        features = self.features(data_dict)
+        pred = self.classifier(features)
+        prob = torch.softmax(pred, dim=1)[:, 1]
+        pred_dict = {'cls': pred, 'prob': prob, 'feat': features}
+        return pred_dict
+```
+### Step 3: Register the detector in `__init__.py`
+Add the following import line to `training/detectors/__init__.py`:
+```python
+from .my_detector import MyDetector
+```
+That's it! Now you can train and test with your custom detector:
+```bash
+# Train
+python -m torch.distributed.launch --master_port=29503 --nproc_per_node=8 training/train.py \
+    --detector_path ./training/config/detector/my_detector.yaml \
+    --no-save_feat --ddp
+# Test
+python -m torch.distributed.launch --master_port=29510 --nproc_per_node=8 training/test_pall.py --ddp \
+    --test_dataset "protocol_2_test" "protocol_3_test" \
+    --detector_path ./training/config/detector/my_detector.yaml \
+    --weights_path logs/my_detector/<your_checkpoint_folder>
+```

preprocessing/config.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+preprocess:
+  dataset_name: # the name of dataset
+    choices: ['FaceForensics++','Celeb-DF-v1', 'Celeb-DF-v2', 'DFDCP', 'DFDC', 'DeeperForensics-1.0','UADFV']
+    default: 'FaceForensics++'
+  dataset_root_path: # the root path to the dataset
+    type: str
+    default: 'F:\'
+  comp: # the compression level of videos, only in the dataset of FaceForensics++.
+    choices: ['raw', 'c23', 'c40']
+    default: 'c23'
+  mode: # based on the numbers of frame or skip the specific stride of frames.
+    choices: ['fixed_num_frames', 'fixed_stride']
+    default: 'fixed_num_frames'
+  stride: # when 'mode' is 'fixed_stride', 'stride' is the number of frames to skip between each frame extracted.
+    type: int
+    default: 10
+  num_frames: # when 'mode' is 'fixed_num_frames', 'num_frames' is the number of frames to extract from each video.
+    type: int
+    default: 32
+rearrange:
+  dataset_name: # the name of dataset
+    choices: ['FaceForensics++', 'DeepFakeDetection', 'Celeb-DF-v1', 'Celeb-DF-v2','DFDCP', 'DFDC', 'DeeperForensics-1.0','UADFV','FaceShifter']
+    default: 'FaceForensics++'
+  dataset_root_path: # the root path to the dataset
+    type: str
+    default: ''
+  output_file_path: # the json path to the dataset
+    type: str
+    default: '../preprocessing/dataset_json_v6'
+  comp: # the compression level of videos, only in the dataset of FaceForensics++.
+    choices: ['raw', 'c23', 'c40']
+    default: 'c23'
+  perturbation: # Extensive real-world perturbations are applied to DeeperForensics-1.0 dataset
+    choices: ['end_to_end','end_to_end_level_1','end_to_end_level_2','end_to_end_level_3','end_to_end_level_4',
+              'end_to_end_level_5','end_to_end_mix_2_distortions','end_to_end_mix_3_distortions',
+              'end_to_end_mix_4_distortions','end_to_end_random_level','reenact_postprocess']
+    default: 'end_to_end'
+to_lmdb:
+  dataset_name: # the name of dataset
+    choices: ['FaceForensics++', 'DeepFakeDetection', 'Celeb-DF-v1', 'Celeb-DF-v2','DFDCP', 'DFDC', 'DeeperForensics-1.0','UADFV','FaceShifter']
+    default: 'FaceForensics++'
+  dataset_root_path: # the root path to the dataset
+    type: str
+    default: './datasets_v2'
+  output_lmdb_dir: # the json path to the dataset
+    type: str
+    default: './datasets_lmdbs'
+  comp: # the compression level of videos, only in the dataset of FaceForensics++.
+    choices: ['raw', 'c23', 'c40']
+    default: 'c23'

preprocessing/dataset2lmdb_test.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import os
+import json
+import cv2
+import lmdb
+import yaml
+from PIL import Image
+import io
+import numpy as np
+def file_to_binary(file_path):
+    """convert to binary"""
+    if file_path.endswith('.npy'):
+        data = np.load(file_path)
+        file_binary = data.tobytes()
+    else:
+        with open(file_path, 'rb') as f:
+            file_binary = f.read()
+    return file_binary
+def create_lmdb_dataset(source_folder, lmdb_path, dataset_name, map_size):
+    """create LMDB dataset"""
+    # open LMDB file，create dataset
+    db = lmdb.open(lmdb_path, map_size=map_size)
+    with db.begin(write=True) as txn:
+        for root, dirs, files in os.walk(source_folder,followlinks=True):
+            print(root)
+            if 'video' in root:
+                continue
+            for file in files:
+                print(file)
+                image_path = os.path.join(root, file)
+                #
+                relative_path = f"{dataset_name}/" + os.path.relpath(image_path, source_folder)
+                print("relative_path:", relative_path)
+                key = relative_path.encode('utf-8')
+                # txn.delete(key)
+                # relative_path = f"{dataset_name}\\original_sequences" + os.path.relpath(image_path, source_folder)
+                # key = relative_path.encode('utf-8')
+                print("image_path:", image_path)
+                value = file_to_binary(image_path)
+                # write dataset
+                txn.put(key, value)
+    db.close()
+def read_lmdb(lmdb_dir_path):
+    # validate the key and value in the generated LMDB
+    env = lmdb.open(lmdb_dir_path)
+    idx = '%09d' % 5
+    with env.begin(write=False) as txn:
+        # key for validation
+        key='npy_test\\000_003\\000.npy'
+        binary = txn.get(key.encode())
+        data = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
+        # image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+        # img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        # image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+# Usage example
+import argparse
+# Create the ArgumentParser object
+parser = argparse.ArgumentParser(description='Process some inputs.')
+# Add the --dataset_size argument
+parser.add_argument('--dataset_size', type=int, default=25, required=True,
+                    help='lmdb requires pre-specifying the total dataset size (GB)')
+# Parse the arguments
+args = parser.parse_args()
+if __name__ == '__main__':
+    # from config.yaml load parameters
+    yaml_path = './config_DFo.yaml'
+    # open the yaml file
+    try:
+        with open(yaml_path, 'r') as f:
+            config = yaml.safe_load(f)
+    except yaml.parser.ParserError as e:
+        print("YAML file parsing error:", e)
+    config=config['to_lmdb']
+    dataset_name = config['dataset_name']['default']
+    dataset_size = args.dataset_size
+    dataset_root_path = config['dataset_root_path']['default']
+    output_lmdb_dir =config['output_lmdb_dir']['default']
+    os.makedirs(output_lmdb_dir,exist_ok=True)
+    dataset_dir_path = f"{dataset_root_path}/{dataset_name}"
+    lmdb_path=f"{output_lmdb_dir}/{dataset_name}_lmdb"
+    create_lmdb_dataset(dataset_dir_path, lmdb_path, dataset_name,map_size=int(dataset_size) * 1024 * 1024 * 1024)
+    #read_lmdb(lmdb_path)

preprocessing/dataset_json/Celeb-DF-v2.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:113fcde0ea7b1a03caf63e2ed2f3e6d80bf99efe18073ca05c606c9d0b260804
+size 20076776

preprocessing/dataset_json/DF40_all.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6308d04ffd0e9da59a7df058bf6a27ae41da0a15f03add8a11f694f510a5b2f6
+size 125339450

preprocessing/dataset_json/DFDC.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1184758620c71b68ad8715e068644ed9792bdc6b2feba9cf0b7f8a98a7e00d
+size 44499938

preprocessing/dataset_json/DFDCP.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed5022e36380b3c1ca21941e95bad7bcf08fc3c58e50441012757189eed1868d
+size 27634090

preprocessing/dataset_json/DeepFakeDetection.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aed7e4a257feb622435119bec621dc09b0614823e4e3e1186bc4b280a394fa90
+size 45849312

preprocessing/dataset_json/DiffFace.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3c8ff3368ae4c6ae5950ccc79d63603279c0c103264d2215eebd270e6a7535f
+size 7177344

preprocessing/dataset_json/DreamBooth.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4cbfa4d0efef6f4b9f8fec1ad4be9efdad7def115bf993e1684c631b1bedbc
+size 7841108

preprocessing/dataset_json/FF-DF.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be532fe67b2bebaaaf3a81237ccc518ba8ded043564b187c923e7c6e79bc242b
+size 6633592

preprocessing/dataset_json/FF-F2F.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cb9c3e9e209dfd45390b7da7a882fcefab4af4a53bd672c999b594dda49cee3
+size 6647968

preprocessing/dataset_json/FF-FS.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f14cc304d80e39597d280234c654aca1fb5af3615a26b3c431e662cb50e2c23
+size 6615423

preprocessing/dataset_json/FF-NT.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:112494833ab34ab6b9476dc2632aed5c5928961b004e7267f3690c8b70b1c947
+size 6804515

preprocessing/dataset_json/FaceForensics++.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18802f85a0de861d07140fafe4fbbdda67167afb110d0b6f5cece1738c7428c8
+size 17184826

preprocessing/dataset_json/FaceShifter.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2031400f8aea5369ebad30c5b9839db370c1bf7f7ba16183a9f9ed833b30904
+size 6695159

preprocessing/dataset_json/GPT4o.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5c880684793cc10837cb5b39e2e19cdf5c0ab460f8cb6f4214fe0f65ef8571
+size 247155

preprocessing/dataset_json/HPS.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8334f78d11be167a14492562af36dc45e11899d6d9e6949e51a5c8d252e8c89b
+size 8968435

preprocessing/dataset_json/Hart.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f69d96d5d5aed7be81ad805e113c6128d1965b275a9586ff704d72abcdd2df50
+size 4182208

preprocessing/dataset_json/Imagic.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bcfb6a163b2c9e41d9c9444208a99cd04abfa634c209604348518cad56ad3eb
+size 7937325

preprocessing/dataset_json/Infinity.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cd859f6e5aa9fdcb2e18bcc3a095c0ef73d9355fc3d083255291b186bbe7bbe
+size 4332314

preprocessing/dataset_json/LoRA.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc22024e17c872fda7beb88c09007e2c7a78b78255248a7f58e98ba58ee58517
+size 7655888

preprocessing/dataset_json/MidJourney.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f9e087f7d7525fdfe756a017cc5f4b88b8ea8056954f5f5b2bbec5c99192c8b
+size 767342

preprocessing/dataset_json/Midjourney_diff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3238e3ba309dc523560756611ae404ebf3d21084211fe05064452a38b5cf6c8
+size 8330096

preprocessing/dataset_json/SRI.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b750334121350c9c4f2e5790b1917400c26fd4af4323aec185415985993afe7
+size 1307793

preprocessing/dataset_json/SRI_hq.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1f4d7b697768c8a37d4409cbd8fc2c11c3d54a37697007c79bc6830c6965a6d
+size 1206968

preprocessing/dataset_json/abstract_dataset.py ADDED Viewed

	@@ -0,0 +1,668 @@

+# author: Zhiyuan Yan
+# email: zhiyuanyan@link.cuhk.edu.cn
+# date: 2023-03-30
+# description: Abstract Base Class for all types of deepfake datasets.
+import sys
+import lmdb
+sys.path.append('.')
+import os
+import math
+import yaml
+import glob
+import json
+import numpy as np
+from copy import deepcopy
+import cv2
+import random
+from PIL import Image
+from collections import defaultdict
+import torch
+from torch.autograd import Variable
+from torch.utils import data
+from torchvision import transforms as T
+import albumentations as A
+from .albu import IsotropicResize
+FFpp_pool=['FaceForensics++','FaceShifter','DeepFakeDetection','FF-DF','FF-F2F','FF-FS','FF-NT']#
+import pdb
+def all_in_pool(inputs,pool):
+    for each in inputs:
+        if each not in pool:
+            return False
+    return True
+class DeepfakeAbstractBaseDataset(data.Dataset):
+    """
+    Abstract base class for all deepfake datasets.
+    """
+    def __init__(self, config=None, mode='train'):
+        """Initializes the dataset object.
+        Args:
+            config (dict): A dictionary containing configuration parameters.
+            mode (str): A string indicating the mode (train or test).
+        Raises:
+            NotImplementedError: If mode is not train or test.
+        """
+        # Set the configuration and mode
+        self.config = config
+        self.mode = mode
+        self.compression = config['compression']
+        self.frame_num = config['frame_num'][mode] #
+        # Check if 'video_mode' exists in config, otherwise set video_level to False
+        self.video_level = config.get('video_mode', False)
+        self.clip_size = config.get('clip_size', None)
+        self.lmdb = config.get('lmdb', False)
+        # Dataset dictionary
+        self.image_list = []
+        self.label_list = []
+        # Set the dataset dictionary based on the mode
+        if mode == 'train':
+            dataset_list = config['train_dataset']
+            # Training data should be collected together for training
+            image_list, label_list = [], []
+            for one_data in dataset_list:
+                # if one_data == "ivy_fake_train":
+                #     tmp_image, tmp_label, tmp_name = self.collect_img_and_label_for_one_dataset(one_data)
+                #     tmp_image = list(tmp_image)
+                #     tmp_label = list(tmp_label)
+                #     sample_indices = random.sample(range(len(tmp_image)), 9510)
+                #     tmp_image = [tmp_image[i] for i in sample_indices]
+                #     tmp_label = [tmp_label[i] for i in sample_indices]
+                # if one_data == "FF-DF":
+                #     tmp_image, tmp_label, tmp_name = self.collect_img_and_label_for_one_dataset(one_data)
+                #     tmp_image = list(tmp_image)
+                #     tmp_label = list(tmp_label)
+                #     # print('ffdf')
+                #     sample_indices = random.sample(range(len(tmp_image)), 7937)
+                #     tmp_image = [tmp_image[i] for i in sample_indices]
+                #     tmp_label = [tmp_label[i] for i in sample_indices]
+                tmp_image, tmp_label, tmp_name = self.collect_img_and_label_for_one_dataset(one_data)
+                image_list.extend(tmp_image)
+                label_list.extend(tmp_label)
+            if self.lmdb:
+                if len(dataset_list)>1:
+                    if all_in_pool(dataset_list,FFpp_pool):
+                        lmdb_path = os.path.join(config['lmdb_dir'], f"FaceForensics++_lmdb")
+                        self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+                    else:
+                        raise ValueError('Training with multiple dataset and lmdb is not implemented yet.')
+                else:
+                    lmdb_path = os.path.join(config['lmdb_dir'], f"{dataset_list[0] if dataset_list[0] not in FFpp_pool else 'FaceForensics++'}_lmdb")
+                    self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+        elif mode == 'test':
+            one_data = config['test_dataset']
+            # Test dataset should be evaluated separately. So collect only one dataset each time
+            image_list, label_list, name_list = self.collect_img_and_label_for_one_dataset(one_data)
+            if self.lmdb:
+                lmdb_path = os.path.join(config['lmdb_dir'], f"{one_data}_lmdb" if one_data not in FFpp_pool else 'FaceForensics++_lmdb')
+                self.env = lmdb.open(lmdb_path, create=False, subdir=True, readonly=True, lock=False)
+        else:
+            raise NotImplementedError('Only train and test modes are supported.')
+        assert len(image_list)!=0 and len(label_list)!=0, f"Collect nothing for {mode} mode!"
+        self.image_list, self.label_list = image_list, label_list
+        # Create a dictionary containing the image and label lists
+        self.data_dict = {
+            'image': self.image_list,
+            'label': self.label_list,
+        }
+        self.transform = self.init_data_aug_method()
+    def init_data_aug_method(self):
+        # trans = A.Compose([
+        #     A.HorizontalFlip(p=self.config['data_aug']['flip_prob']),
+        #     A.Rotate(limit=self.config['data_aug']['rotate_limit'], p=self.config['data_aug']['rotate_prob']),
+        #     A.GaussianBlur(blur_limit=self.config['data_aug']['blur_limit'], p=self.config['data_aug']['blur_prob']),
+        #     A.OneOf([
+        #         IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+        #         IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+        #         IsotropicResize(max_side=self.config['resolution'], interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+        #     ], p = 0 if self.config['with_landmark'] else 1),
+        #     A.OneOf([
+        #         A.RandomBrightnessContrast(brightness_limit=self.config['data_aug']['brightness_limit'], contrast_limit=self.config['data_aug']['contrast_limit']),
+        #         A.FancyPCA(),
+        #         A.HueSaturationValue()
+        #     ], p=0.5),
+        #     A.ImageCompression(quality_lower=self.config['data_aug']['quality_lower'], quality_upper=self.config['data_aug']['quality_upper'], p=0.5)
+        # ],
+        #     keypoint_params=A.KeypointParams(format='xy') if self.config['with_landmark'] else None
+        # )
+        # video no aug
+        trans = A.Compose([
+            A.HorizontalFlip(p=0.5),
+            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
+            A.HueSaturationValue(p=0.3),
+            A.ImageCompression(quality_lower=40, quality_upper=100, p=0.1), # compression: 40-100, p=0.1
+            A.GaussNoise(p=0.1),
+            A.MotionBlur(p=0.1),
+            A.CLAHE(p=0.1),
+            A.ChannelShuffle(p=0.1),
+            A.Cutout(p=0.1),
+            A.RandomGamma(p=0.3),
+            A.GlassBlur(p=0.3),
+            ])
+        return trans
+    def rescale_landmarks(self, landmarks, original_size=256, new_size=224):
+        scale_factor = new_size / original_size
+        rescaled_landmarks = landmarks * scale_factor
+        return rescaled_landmarks
+    def collect_img_and_label_for_one_dataset(self, dataset_name: str):
+        """Collects image and label lists.
+        Args:
+            dataset_name (str): A list containing one dataset information. e.g., 'FF-F2F'
+        Returns:
+            list: A list of image paths.
+            list: A list of labels.
+        Raises:
+            ValueError: If image paths or labels are not found.
+            NotImplementedError: If the dataset is not implemented yet.
+        """
+        # Initialize the label and frame path lists
+        label_list = []
+        frame_path_list = []
+        # Record video name for video-level metrics
+        video_name_list = []
+        # Try to get the dataset information from the JSON file
+        if not os.path.exists(self.config['dataset_json_folder']):
+            self.config['dataset_json_folder'] = self.config['dataset_json_folder'].replace('/Youtu_Pangu_Security_Public', '/Youtu_Pangu_Security/public')
+        try:
+            with open(os.path.join(self.config['dataset_json_folder'], dataset_name + '.json'), 'r') as f:
+                dataset_info = json.load(f)
+        except Exception as e:
+            print(e)
+            raise ValueError(f'dataset {dataset_name} not exist!')
+        # If JSON file exists, do the following data collection
+        # FIXME: ugly, need to be modified here.
+        cp = None
+        if dataset_name == 'FaceForensics++_c40':
+            dataset_name = 'FaceForensics++'
+            cp = 'c40'
+        elif dataset_name == 'FF-DF_c40':
+            dataset_name = 'FF-DF'
+            cp = 'c40'
+        elif dataset_name == 'FF-F2F_c40':
+            dataset_name = 'FF-F2F'
+            cp = 'c40'
+        elif dataset_name == 'FF-FS_c40':
+            dataset_name = 'FF-FS'
+            cp = 'c40'
+        elif dataset_name == 'FF-NT_c40':
+            dataset_name = 'FF-NT'
+            cp = 'c40'
+        # Get the information for the current dataset
+        for label in dataset_info[dataset_name]:
+            sub_dataset_info = dataset_info[dataset_name][label][self.mode]
+            # Special case for FaceForensics++ and DeepFakeDetection, choose the compression type
+            # NOTE
+            if cp == None and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter','ivy_fake_train','ivy_fake_test',
+                                                'ivy_fake_test_Deepfakes','ivy_fake_test_NeuralTextures','ivy_fake_test_FaceSwap','ivy_fake_test_Face2Face']:
+                sub_dataset_info = sub_dataset_info[self.compression]
+            elif cp == 'c40' and dataset_name in ['FF-DF', 'FF-F2F', 'FF-FS', 'FF-NT', 'FaceForensics++','DeepFakeDetection','FaceShifter']:
+                sub_dataset_info = sub_dataset_info['c40']
+            # Iterate over the videos in the dataset
+            for video_name, video_info in sub_dataset_info.items():
+                # Unique video name
+                unique_video_name = video_info['label'] + '_' + video_name
+                # Get the label and frame paths for the current video
+                if video_info['label'] not in self.config['label_dict']:
+                    raise ValueError(f'Label {video_info["label"]} is not found in the configuration file.')
+                label = self.config['label_dict'][video_info['label']]
+                frame_paths = video_info['frames']
+                # sorted video path to the lists
+                if '\\' in frame_paths[0]:
+                    frame_paths = sorted(frame_paths, key=lambda x: int(x.split('\\')[-1].split('.')[0]))
+                else:
+                    frame_paths = sorted(frame_paths, key=lambda x: int(x.split('/')[-1].split('.')[0]))
+                # Consider the case when the actual number of frames (e.g., 270) is larger than the specified (i.e., self.frame_num=32)
+                # In this case, we select self.frame_num frames from the original 270 frames
+                total_frames = len(frame_paths)
+                if self.frame_num < total_frames:
+                    total_frames = self.frame_num
+                    if self.video_level:
+                        # Select clip_size continuous frames
+                        start_frame = random.randint(0, total_frames - self.frame_num) if self.mode == 'train' else 0
+                        frame_paths = frame_paths[start_frame:start_frame + self.frame_num]  # update total_frames
+                    else:
+                        # Select self.frame_num frames evenly distributed throughout the video
+                        step = total_frames // self.frame_num
+                        frame_paths = [frame_paths[i] for i in range(0, total_frames, step)][:self.frame_num]
+                # If video-level methods, crop clips from the selected frames if needed
+                if self.video_level:
+                    if self.clip_size is None:
+                        raise ValueError('clip_size must be specified when video_level is True.')
+                    # Check if the number of total frames is greater than or equal to clip_size
+                    if total_frames >= self.clip_size:
+                        # Initialize an empty list to store the selected continuous frames
+                        selected_clips = []
+                        # Calculate the number of clips to select
+                        num_clips = total_frames // self.clip_size
+                        if num_clips > 1:
+                            # Calculate the step size between each clip
+                            clip_step = (total_frames - self.clip_size) // (num_clips - 1)
+                            # Select clip_size continuous frames from each part of the video
+                            for i in range(num_clips):
+                                # Ensure start_frame + self.clip_size - 1 does not exceed the index of the last frame
+                                start_frame = random.randrange(i * clip_step, min((i + 1) * clip_step, total_frames - self.clip_size + 1)) if self.mode == 'train' else i * clip_step
+                                continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
+                                assert len(continuous_frames) == self.clip_size, 'clip_size is not equal to the length of frame_path_list'
+                                selected_clips.append(continuous_frames)
+                        else:
+                            start_frame = random.randrange(0, total_frames - self.clip_size + 1) if self.mode == 'train' else 0
+                            continuous_frames = frame_paths[start_frame:start_frame + self.clip_size]
+                            assert len(continuous_frames)==self.clip_size, 'clip_size is not equal to the length of frame_path_list'
+                            selected_clips.append(continuous_frames)
+                        # Append the list of selected clips and append the label
+                        label_list.extend([label] * len(selected_clips))
+                        frame_path_list.extend(selected_clips)
+                        # video name save
+                        video_name_list.extend([unique_video_name] * len(selected_clips))
+                    else:
+                        print(f"Skipping video {unique_video_name} because it has less than clip_size ({self.clip_size}) frames ({total_frames}).")
+                # Otherwise, extend the label and frame paths to the lists according to the number of frames
+                else:
+                    # Extend the label and frame paths to the lists according to the number of frames
+                    label_list.extend([label] * total_frames)
+                    frame_path_list.extend(frame_paths)
+                    # video name save
+                    video_name_list.extend([unique_video_name] * len(frame_paths))
+        # Shuffle the label and frame path lists in the same order
+        shuffled = list(zip(label_list, frame_path_list, video_name_list))
+        random.shuffle(shuffled)
+        label_list, frame_path_list, video_name_list = zip(*shuffled)
+        return frame_path_list, label_list, video_name_list
+    def load_rgb(self, file_path):
+        """
+        Load an RGB image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the image file.
+        Returns:
+            An Image object containing the loaded and resized image.
+        Raises:
+            ValueError: If the loaded image is None.
+        """
+        size = self.config['resolution'] # if self.mode == "train" else self.config['resolution']
+        if not self.lmdb:
+            # if not file_path[0] == '.':
+            #     file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if not os.path.exists(file_path):
+                file_path = file_path.replace('\\', '/')
+            assert os.path.exists(file_path), f"{file_path} does not exist"
+            img = cv2.imread(file_path)
+            if img is None:
+                raise ValueError('Loaded image is None: {}'.format(file_path))
+        elif self.lmdb:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                img = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+        return Image.fromarray(np.array(img, dtype=np.uint8))
+    def load_mask(self, file_path):
+        """
+        Load a binary mask image from a file path and resize it to a specified resolution.
+        Args:
+            file_path: A string indicating the path to the mask file.
+        Returns:
+            A numpy array containing the loaded and resized mask.
+        Raises:
+            None.
+        """
+        size = self.config['resolution']
+        if file_path is None:
+            return np.zeros((size, size, 1))
+        if not self.lmdb:
+            # if not file_path[0] == '.':
+            #     file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if os.path.exists(file_path):
+                mask = cv2.imread(file_path, 0)
+                if mask is None:
+                    mask = np.zeros((size, size))
+            else:
+                return np.zeros((size, size, 1))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                image_bin = txn.get(file_path.encode())
+                if image_bin is None:
+                    mask = np.zeros((size, size,3))
+                else:
+                    image_buf = np.frombuffer(image_bin, dtype=np.uint8)
+                    mask = cv2.imdecode(image_buf, cv2.IMREAD_COLOR)
+        mask = cv2.resize(mask, (size, size)) / 255
+        mask = np.expand_dims(mask, axis=2)
+        return np.float32(mask)
+    def load_landmark(self, file_path):
+        """
+        Load 2D facial landmarks from a file path.
+        Args:
+            file_path: A string indicating the path to the landmark file.
+        Returns:
+            A numpy array containing the loaded landmarks.
+        Raises:
+            None.
+        """
+        if file_path is None:
+            return np.zeros((81, 2))
+        if not self.lmdb:
+            # if not file_path[0] == '.':
+            #     file_path =  f'./{self.config["rgb_dir"]}\\'+file_path
+            if os.path.exists(file_path):
+                landmark = np.load(file_path)
+            else:
+                return np.zeros((81, 2))
+        else:
+            with self.env.begin(write=False) as txn:
+                # transfer the path format from rgb-path to lmdb-key
+                if file_path[0]=='.':
+                    file_path=file_path.replace('./datasets\\','')
+                binary = txn.get(file_path.encode())
+                landmark = np.frombuffer(binary, dtype=np.uint32).reshape((81, 2))
+                landmark=self.rescale_landmarks(np.float32(landmark), original_size=256, new_size=self.config['resolution'])
+        return landmark
+    def to_tensor(self, img):
+        """
+        Convert an image to a PyTorch tensor.
+        """
+        return T.ToTensor()(img)
+    def normalize(self, img):
+        """
+        Normalize an image.
+        """
+        mean = self.config['mean']
+        std = self.config['std']
+        normalize = T.Normalize(mean=mean, std=std)
+        return normalize(img)
+    def data_aug(self, img, landmark=None, mask=None, augmentation_seed=None):
+        """
+        Apply data augmentation to an image, landmark, and mask.
+        Args:
+            img: An Image object containing the image to be augmented.
+            landmark: A numpy array containing the 2D facial landmarks to be augmented.
+            mask: A numpy array containing the binary mask to be augmented.
+        Returns:
+            The augmented image, landmark, and mask.
+        """
+        # Set the seed for the random number generator
+        if augmentation_seed is not None:
+            random.seed(augmentation_seed)
+            np.random.seed(augmentation_seed)
+        # Create a dictionary of arguments
+        kwargs = {'image': img}
+        # Check if the landmark and mask are not None
+        if landmark is not None:
+            kwargs['keypoints'] = landmark
+            kwargs['keypoint_params'] = A.KeypointParams(format='xy')
+        if mask is not None:
+            mask = mask.squeeze(2)
+            if mask.max() > 0:
+                kwargs['mask'] = mask
+        # Apply data augmentation
+        transformed = self.transform(**kwargs)
+        # Get the augmented image, landmark, and mask
+        # NOTE
+        # augmented_img = transformed['image']
+        augmented_img = kwargs['image']
+        augmented_landmark = transformed.get('keypoints')
+        augmented_mask = transformed.get('mask',mask)
+        # Convert the augmented landmark to a numpy array
+        if augmented_landmark is not None:
+            augmented_landmark = np.array(augmented_landmark)
+        # Reset the seeds to ensure different transformations for different videos
+        if augmentation_seed is not None:
+            random.seed()
+            np.random.seed()
+        return augmented_img, augmented_landmark, augmented_mask
+    def __getitem__(self, index, no_norm=False):
+        """
+        Returns the data point at the given index.
+        Args:
+            index (int): The index of the data point.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Get the image paths and label
+        image_paths = self.data_dict['image'][index]
+        label = self.data_dict['label'][index]
+        # Image-level: FaceForensics++\manipulated_sequences\NeuralTextures\c23\frames\487_477\000.png
+        # Video-level: image_paths ['FaceForensics++\\original_sequences\\youtube\\c23\\frames\\977\\000.png', ..., 'FaceForensics++\\original_sequences\\youtube\\c23\\frames\\977\\314.png']
+        if not isinstance(image_paths, list):
+            image_paths = [image_paths]  # for the image-level IO, only one frame is used
+        image_tensors = []
+        landmark_tensors = []
+        mask_tensors = []
+        augmentation_seed = None
+        for image_path in image_paths:
+            # Initialize a new seed for data augmentation at the start of each video
+            if self.video_level and image_path == image_paths[0]:
+                augmentation_seed = random.randint(0, 2**32 - 1)
+            # Get the mask and landmark paths
+            mask_path = image_path.replace('frames', 'masks')  # Use .png for mask
+            landmark_path = image_path.replace('frames', 'landmarks').replace('.png', '.npy')  # Use .npy for landmark
+            # Load the image
+            try:
+                image = self.load_rgb(image_path)
+            except Exception as e:
+                # Skip this image and return the first one
+                print(f"Error loading image at index {index}: {e}")
+                return self.__getitem__(0)
+            image = np.array(image)  # Convert to numpy array for data augmentation
+            # Load mask and landmark (if needed)
+            if self.config['with_mask']:
+                mask = self.load_mask(mask_path)
+            else:
+                mask = None
+            if self.config['with_landmark']:
+                landmarks = self.load_landmark(landmark_path)
+            else:
+                landmarks = None
+            # Do Data Augmentation
+            if self.mode == 'train' and self.config['use_data_augmentation']:
+                image_trans, landmarks_trans, mask_trans = self.data_aug(image, landmarks, mask, augmentation_seed)
+            else:
+                # if self.mode == 'train':
+                #     print("Train w/o data_augmentation")
+                image_trans, landmarks_trans, mask_trans = deepcopy(image), deepcopy(landmarks), deepcopy(mask)
+            # To tensor and normalize
+            if not no_norm:
+                image_trans = self.normalize(self.to_tensor(image_trans))
+                if self.config['with_landmark']:
+                    landmarks_trans = torch.from_numpy(landmarks)
+                if self.config['with_mask']:
+                    mask_trans = torch.from_numpy(mask_trans)
+            image_tensors.append(image_trans)
+            landmark_tensors.append(landmarks_trans)
+            mask_tensors.append(mask_trans)
+        if self.video_level:
+            # Stack image tensors along a new dimension (time)
+            image_tensors = torch.stack(image_tensors, dim=0)
+            # Stack landmark and mask tensors along a new dimension (time)
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
+                landmark_tensors = torch.stack(landmark_tensors, dim=0)
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = torch.stack(mask_tensors, dim=0)
+        else:
+            # Get the first image tensor
+            image_tensors = image_tensors[0]
+            # Get the first landmark and mask tensors
+            if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmark_tensors):
+                landmark_tensors = landmark_tensors[0]
+            if not any(m is None or (isinstance(m, list) and None in m) for m in mask_tensors):
+                mask_tensors = mask_tensors[0]
+        return image_tensors, label, landmark_tensors, mask_tensors
+    @staticmethod
+    def collate_fn(batch):
+        """
+        Collate a batch of data points.
+        Args:
+            batch (list): A list of tuples containing the image tensor, the label tensor,
+                          the landmark tensor, and the mask tensor.
+        Returns:
+            A tuple containing the image tensor, the label tensor, the landmark tensor,
+            and the mask tensor.
+        """
+        # Separate the image, label, landmark, and mask tensors
+        images, labels, landmarks, masks = zip(*batch)
+        # Stack the image, label, landmark, and mask tensors
+        images = torch.stack(images, dim=0)
+        labels = torch.LongTensor(labels)
+        # Special case for landmarks and masks if they are None
+        if not any(landmark is None or (isinstance(landmark, list) and None in landmark) for landmark in landmarks):
+            landmarks = torch.stack(landmarks, dim=0)
+        else:
+            landmarks = None
+        if not any(m is None or (isinstance(m, list) and None in m) for m in masks):
+            masks = torch.stack(masks, dim=0)
+        else:
+            masks = None
+        # Create a dictionary of the tensors
+        data_dict = {}
+        data_dict['image'] = images
+        data_dict['label'] = labels
+        data_dict['landmark'] = landmarks
+        data_dict['mask'] = masks
+        return data_dict
+    def __len__(self):
+        """
+        Return the length of the dataset.
+        Args:
+            None.
+        Returns:
+            An integer indicating the length of the dataset.
+        Raises:
+            AssertionError: If the number of images and labels in the dataset are not equal.
+        """
+        assert len(self.image_list) == len(self.label_list), 'Number of images and labels are not equal'
+        return len(self.image_list)
+if __name__ == "__main__":
+    with open('/data/home/zhiyuanyan/DeepfakeBench/training/config/detector/video_baseline.yaml', 'r') as f:
+        config = yaml.safe_load(f)
+    train_set = DeepfakeAbstractBaseDataset(
+                config = config,
+                mode = 'train',
+            )
+    train_data_loader = \
+        torch.utils.data.DataLoader(
+            dataset=train_set,
+            batch_size=config['train_batchSize'],
+            shuffle=True,
+            num_workers=0,
+            collate_fn=train_set.collate_fn,
+        )
+    from tqdm import tqdm
+    for iteration, batch in enumerate(tqdm(train_data_loader)):
+        # print(iteration)
+        ...
+        # if iteration > 10:
+        #     break

preprocessing/dataset_json/gpa.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb111163a61bdb552b4354897adddb63e73a15153cd5f51c4b76b3c226a5e9c
+size 4382973

preprocessing/dataset_json/heygen.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb0f12c3d3fc0d045056b8b04d9f158275dde13c651343a667099f869c8bc96
+size 1681524

preprocessing/dataset_json/others/Chameleon.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2a154223e90dfc697d094f6a7f44888e22c39e1241be584994e099519970ea4
+size 12495937

preprocessing/dataset_json/others/CoDiff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2255d70cec56cbfacec704cbbda4b240d4c110835b3454c82873fb04f2f8ad9
+size 7851178

preprocessing/dataset_json/others/CollabDiff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:564cf3e540fa21664e2711ae75013343281c880e5584bc2e8d5c6fc06d15990f
+size 473682

preprocessing/dataset_json/others/DCFace.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6622e0a0dd8c552ab2b233c394848053a1b20fa461fea8e74ab99de5bcb87d32
+size 8019871

preprocessing/dataset_json/others/DeeperForensics-1.0.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c3d582eae7754cb005a687d607e25861585247a6a81908364f32eab85d6af66
+size 2606195

preprocessing/dataset_json/others/DiT_cdf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb15e8b0b4bd900d6251b39f785145bc628a2e148bc7d5791db06fdc0c8b4492
+size 5204091

preprocessing/dataset_json/others/DiT_ff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1725104dfcc9b459b1e671a16bcc37ca1c1fd02cdad86aa2c1361edb47629c4
+size 4677394

preprocessing/dataset_json/others/EFSAll_cdf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae2487011b7dd66954830da3f84208ac280772159c950f50446ec6114db96e82
+size 55801088

preprocessing/dataset_json/others/EFSAll_ff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56daa5d2d011ad0bcef828979a11786ab0599f63063aea7c61dbd208f6e053ce
+size 48399606

preprocessing/dataset_json/others/FRAll_cdf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1db14cdd1e09a50eddeed1260fdc799e2370a5f8944b755d4dbe3cf640a79dfd
+size 51106195

preprocessing/dataset_json/others/FRAll_ff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cbca5ced198c1f795a7a304c705d2fe4af5006adc41cf7f898f6f6c3de74624
+size 67347051

preprocessing/dataset_json/others/FSAll_cdf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73939286d610f0d2da4cc589285690762ce070f77beb2b2cfa2b8068126f7efb
+size 34632459

preprocessing/dataset_json/others/FSAll_ff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffaac2b1110f1616950364320eef1fd75e60e08273c73e1600413c4aba99fe05
+size 53468909

preprocessing/dataset_json/others/FaceForensics++_vae.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a55d9e412945fbf17cd1f1f9c2fbabb43074541ba8707772c343821d108b39a
+size 22330288

preprocessing/dataset_json/others/FreeDoM_I.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0b820bb3074d176ce55feabc88effe1d60c1eb9d8c4207916fbe98eb239c0c7
+size 7371858

preprocessing/dataset_json/others/FreeDoM_T.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:868d8666710d5a52172e20b66b2ff4831fb8371867d91f3ed066af6326cb97e6
+size 7445199

preprocessing/dataset_json/others/MRAA_cdf.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4aa412d4c096609acf6474322137de6d1723e765002d7a52282e1a8f7f217a9e
+size 4030920

preprocessing/dataset_json/others/MRAA_ff.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:057ba54e1573ea011a07f9fd1713adac01a96dc822e27ce1acba1d1d3730843b
+size 5454366

preprocessing/dataset_json/others/SDXL.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:beb9ca562ef02ee5ef292bdcc46683c4a6fd42834ae6ad3680b0c4257ca87675
+size 8257844

preprocessing/dataset_json/others/SDXL_Refine.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d739eb33786955d6a894c803fc6da0aefc275d78877f8c109718992c32eb9e21
+size 8324707