File size: 6,838 Bytes
ff0e79e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# Hybrid Document Forgery Detection - Configuration

# System Settings
system:
  device: cuda  # cuda or cpu
  num_workers: 0  # Reduced to avoid multiprocessing errors
  pin_memory: true
  seed: 42

# Data Settings
data:
  image_size: 384
  batch_size: 8  # Reduced for 16GB RAM
  num_classes: 3  # copy_move, splicing, text_substitution
  
  # Dataset paths
  datasets:
    doctamper:
      path: datasets/DocTamper
      type: lmdb
      has_pixel_mask: true
      min_region_area: 0.001  # 0.1%
    
    rtm:
      path: datasets/RealTextManipulation
      type: folder
      has_pixel_mask: true
      min_region_area: 0.0003  # 0.03%
    
    casia:
      path: datasets/CASIA 1.0 dataset
      type: folder
      has_pixel_mask: false
      min_region_area: 0.001  # 0.1%
      skip_deskew: true
      skip_denoising: true
    
    receipts:
      path: datasets/findit2
      type: folder
      has_pixel_mask: true
      min_region_area: 0.0005  # 0.05%
    
    fcd:
      path: datasets/DocTamper/DocTamperV1-FCD
      type: lmdb
      has_pixel_mask: true
      min_region_area: 0.00035  # 0.035% (larger forgeries, keep 99%)
    
    scd:
      path: datasets/DocTamper/DocTamperV1-SCD
      type: lmdb
      has_pixel_mask: true
      min_region_area: 0.00009  # 0.009% (small forgeries, keep 91.5%)

  # Chunked training for DocTamper (RAM constraint)
  chunked_training:
    enabled: true
    dataset: doctamper
    chunks:
      - {start: 0.0, end: 0.25, name: "chunk_1"}
      - {start: 0.25, end: 0.5, name: "chunk_2"}
      - {start: 0.5, end: 0.75, name: "chunk_3"}
      - {start: 0.75, end: 1.0, name: "chunk_4"}
    
    # Mixed dataset training (TrainingSet + FCD + SCD)
    mixing_ratios:
      doctamper: 0.70  # 70% TrainingSet (maintains baseline)
      scd: 0.20        # 20% SCD (handles small forgeries, 0.88% avg)
      fcd: 0.10        # 10% FCD (adds diversity, 3.55% avg)

# Preprocessing
preprocessing:
  deskew: true
  normalize: true
  noise_threshold: 15.0  # Laplacian variance threshold
  median_filter_size: 3
  gaussian_sigma: 0.8
  
  # Dataset-aware preprocessing
  dataset_specific:
    casia:
      deskew: false
      denoising: false

# Augmentation (Training only)
augmentation:
  enabled: true
  
  # Common augmentations
  common:
    - {type: "noise", prob: 0.3}
    - {type: "motion_blur", prob: 0.2}
    - {type: "jpeg_compression", prob: 0.3, quality: [60, 95]}
    - {type: "lighting", prob: 0.3}
    - {type: "perspective", prob: 0.2}
  
  # Dataset-specific augmentations
  receipts:
    - {type: "stain", prob: 0.2}
    - {type: "fold", prob: 0.15}

# Model Architecture
model:
  # Encoder
  encoder:
    name: mobilenetv3_small_100
    pretrained: true
    features_only: true
  
  # Decoder
  decoder:
    name: unet_lite
    channels: [16, 24, 40, 48, 96]  # MobileNetV3-Small feature channels
    upsampling: bilinear
    use_depthwise_separable: true
  
  # Output
  output_channels: 1  # Binary forgery mask

# Loss Function
loss:
  # Dataset-aware loss
  use_dice: true  # Only for datasets with pixel masks
  bce_weight: 1.0
  dice_weight: 1.0

# Training
training:
  epochs: 30  # Per chunk (increased for single-pass training)
  learning_rate: 0.001  # Higher initial LR for faster convergence
  weight_decay: 0.0001  # Slight increase for better regularization
  
  # Optimizer
  optimizer: adamw
  
  # Scheduler
  scheduler:
    type: cosine_annealing_warm_restarts
    T_0: 10  # Restart every 10 epochs
    T_mult: 2  # Double restart period each time
    warmup_epochs: 3  # Warmup for first 3 epochs
    min_lr: 0.00001  # End at 1/100th of initial LR
  
  # Early stopping
  early_stopping:
    enabled: true
    patience: 10  # Increased to allow more exploration
    min_delta: 0.0005  # Accept smaller improvements (0.05%)
    restore_best_weights: true  # Restore best model when stopping
    monitor: val_dice
    mode: max
  
  # Checkpointing
  checkpoint:
    save_best: true
    save_every: 5  # Save every 5 epochs
    save_last: true  # Also save last checkpoint
    monitor: val_dice

# Mask Refinement
mask_refinement:
  threshold: 0.5
  morphology:
    closing_kernel: 5
    opening_kernel: 3
  
  # Adaptive thresholds per dataset
  min_region_area:
    rtm: 0.0003
    receipts: 0.0005
    default: 0.001

# Feature Extraction
features:
  # Deep features
  deep:
    enabled: true
    pooling: gap  # Global Average Pooling
  
  # Statistical & Shape features
  statistical:
    enabled: true
    features:
      - area
      - perimeter
      - aspect_ratio
      - solidity
      - eccentricity
      - entropy
  
  # Frequency-domain features
  frequency:
    enabled: true
    features:
      - dct_coefficients
      - high_frequency_energy
      - wavelet_energy
  
  # Noise & ELA features
  noise:
    enabled: true
    features:
      - ela_mean
      - ela_variance
      - noise_residual
  
  # OCR-consistency features (text documents only)
  ocr:
    enabled: true
    gated: true  # Only for text documents
    features:
      - confidence_deviation
      - spacing_irregularity
      - stroke_width_variation
  
  # Feature normalization
  normalization:
    method: standard_scaler
    handle_missing: true

# LightGBM Classifier
classifier:
  model: lightgbm
  params:
    objective: multiclass
    num_class: 3
    boosting_type: gbdt
    num_leaves: 31
    learning_rate: 0.05
    n_estimators: 200
    max_depth: 7
    min_child_samples: 20
    subsample: 0.8
    colsample_bytree: 0.8
    reg_alpha: 0.1
    reg_lambda: 0.1
    random_state: 42
  
  # Confidence threshold
  confidence_threshold: 0.6

# Metrics
metrics:
  # Localization metrics (only for datasets with pixel masks)
  localization:
    - iou
    - dice
    - precision
    - recall
  
  # Classification metrics
  classification:
    - accuracy
    - f1_score
    - precision
    - recall
    - confusion_matrix
  
  # Dataset-aware metric computation
  compute_localization:
    doctamper: true
    rtm: true
    casia: false
    receipts: true

# Outputs
outputs:
  base_dir: outputs
  
  # Subdirectories
  checkpoints: outputs/checkpoints
  logs: outputs/logs
  plots: outputs/plots
  results: outputs/results
  
  # Visualization
  visualization:
    save_mask: true
    save_overlay: true
    save_json: true
    overlay_alpha: 0.5
    colormap: jet

# Deployment
deployment:
  export_onnx: true
  onnx_path: outputs/model.onnx
  quantization: false
  opset_version: 14

# Logging
logging:
  level: INFO
  tensorboard: true
  csv: true
  console: true