File size: 3,048 Bytes
1444206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
TASK: custom_laanet
PRECISION: float32
METRICS_BASE: binary
SEED: 317
DATA_RELOAD: False
Resume: True
begin_epoch: 100

MODEL:
  # PRETRAINED_PATH: '' 
  type: PoseEfficientNet
  model_name: efficientnet-b4
  num_layers: B4
  include_top: False
  include_hm_decoder: True
  head_conv: 64
  use_c2: False
  use_c3: True
  use_c4: True
  use_c51: True
  efpn: True
  tfpn: False
  se_layer: False
  heads:
    hm: 1
    cls: 1
    cstency: 256
  INIT_WEIGHTS:
    pretrained: True
    advprop: True

DATASET:
  type: BinaryFaceForensic
  FROM_FILE: False
  PIN_MEMORY: True
  NUM_WORKERS: 7
  COLOR_NORM: 'simple'  
  mean: [0.485, 0.456, 0.406]
  std: [0.229, 0.224, 0.225]
  IMAGE_SUFFIX: png
  COMPRESSION: c0
  IMAGE_SUFFIX: png
  IMAGE_SIZE: [384, 384]
  HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4]
  SIGMA: 2
  ADAPTIVE_SIGMA: True
  HEATMAP_TYPE: gaussian
  SPLIT_IMAGE: False
  DATA:
    TYPE: frames
    SAMPLES_PER_VIDEO: 
      ACTIVE: True
      TRAIN: 8
      VAL: 8
      TEST: 32
    TRAIN:
      NAME: custom_dataset
      ROOT: ./datasets/train/
      FROM_FILE: False
      FAKETYPE: [fake]
      LABEL_FOLDER: [real, fake]
      
    VAL:
      NAME: custom_dataset
      ROOT: ./datasets/test/
      FROM_FILE: False
      FAKETYPE: [fake]
      LABEL_FOLDER: [real, fake]
    TEST:
      NAME: custom_dataset
      ROOT: ./datasets/test/
      FROM_FILE: False
      FAKETYPE: [fake]
      LABEL_FOLDER: [real, fake]
  TRANSFORM:
    geometry:
      type: GeometryTransform
      resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1
      normalize: 0
      horizontal_flip: 0.5
      cropping: [0.15, 0.5] #Format: [crop_limit, prob]
      scale: [0.15, 0.5] #Format: [scale_limit, prob]
      rand_erasing: [0.5, 1] #Format: [p, max_count]
    color:
      type: ColorJitterTransform
      clahe: 0.0
      colorjitter: 0.3
      gaussianblur: 0.3
      gaussnoise: 0.3
      jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively
      rgbshift: 0.3
      randomcontrast: 0.0
      randomgamma: 0.5
      randombrightness: 1
      huesat: 1
    normalize:
      mean: [0.5, 0.5, 0.5]
      std: [0.5, 0.5, 0.5]

TRAIN:
  resume: True
  gpus: [0]
  pretrained_model: './logs/27-03-2025/PoseEfficientNet_custom_laanet_model_final.pth'
  batch_size: 32
  lr: 0.00005
  epochs: 150
  begin_epoch: 100
  warm_up: 6
  every_val_epochs: 1
  loss:
    type: CombinedFocalLoss
    use_target_weight: False
    cls_lmda: 1
    dst_hm_cls_lmda: 0
    offset_lmda: 0
    hm_lmda: 100
    cstency_lmda: 100
    mse_reduction: sum
    ce_reduction: mean
  optimizer: SAM
  distributed: False
  tensorboard: False
  resume: True
  lr_scheduler:
    # type: MultiStepLR
    milestones: [5, 15, 20, 25]
    gamma: 0.5
  freeze_backbone: True
  debug:
    active: False
    save_hm_gt: True
    save_hm_pred: True

TEST:
  gpus: [0]
  subtask: 'eval'
  test_file: ''
  vis_hm: True
  threshold: 0.5
  flip_test: True
  video_level: True
  pretrained: './training/weights/final_model.pth'