# Hybrid Document Forgery Detection - Configuration # System Settings system: device: cuda # cuda or cpu num_workers: 0 # Reduced to avoid multiprocessing errors pin_memory: true seed: 42 # Data Settings data: image_size: 384 batch_size: 8 # Reduced for 16GB RAM num_classes: 3 # copy_move, splicing, text_substitution # Dataset paths datasets: doctamper: path: datasets/DocTamper type: lmdb has_pixel_mask: true min_region_area: 0.001 # 0.1% rtm: path: datasets/RealTextManipulation type: folder has_pixel_mask: true min_region_area: 0.0003 # 0.03% casia: path: datasets/CASIA 1.0 dataset type: folder has_pixel_mask: false min_region_area: 0.001 # 0.1% skip_deskew: true skip_denoising: true receipts: path: datasets/findit2 type: folder has_pixel_mask: true min_region_area: 0.0005 # 0.05% fcd: path: datasets/DocTamper/DocTamperV1-FCD type: lmdb has_pixel_mask: true min_region_area: 0.00035 # 0.035% (larger forgeries, keep 99%) scd: path: datasets/DocTamper/DocTamperV1-SCD type: lmdb has_pixel_mask: true min_region_area: 0.00009 # 0.009% (small forgeries, keep 91.5%) # Chunked training for DocTamper (RAM constraint) chunked_training: enabled: true dataset: doctamper chunks: - {start: 0.0, end: 0.25, name: "chunk_1"} - {start: 0.25, end: 0.5, name: "chunk_2"} - {start: 0.5, end: 0.75, name: "chunk_3"} - {start: 0.75, end: 1.0, name: "chunk_4"} # Mixed dataset training (TrainingSet + FCD + SCD) mixing_ratios: doctamper: 0.70 # 70% TrainingSet (maintains baseline) scd: 0.20 # 20% SCD (handles small forgeries, 0.88% avg) fcd: 0.10 # 10% FCD (adds diversity, 3.55% avg) # Preprocessing preprocessing: deskew: true normalize: true noise_threshold: 15.0 # Laplacian variance threshold median_filter_size: 3 gaussian_sigma: 0.8 # Dataset-aware preprocessing dataset_specific: casia: deskew: false denoising: false # Augmentation (Training only) augmentation: enabled: true # Common augmentations common: - {type: "noise", prob: 0.3} - {type: "motion_blur", prob: 0.2} - {type: "jpeg_compression", prob: 0.3, quality: [60, 95]} - {type: "lighting", prob: 0.3} - {type: "perspective", prob: 0.2} # Dataset-specific augmentations receipts: - {type: "stain", prob: 0.2} - {type: "fold", prob: 0.15} # Model Architecture model: # Encoder encoder: name: mobilenetv3_small_100 pretrained: true features_only: true # Decoder decoder: name: unet_lite channels: [16, 24, 40, 48, 96] # MobileNetV3-Small feature channels upsampling: bilinear use_depthwise_separable: true # Output output_channels: 1 # Binary forgery mask # Loss Function loss: # Dataset-aware loss use_dice: true # Only for datasets with pixel masks bce_weight: 1.0 dice_weight: 1.0 # Training training: epochs: 30 # Per chunk (increased for single-pass training) learning_rate: 0.001 # Higher initial LR for faster convergence weight_decay: 0.0001 # Slight increase for better regularization # Optimizer optimizer: adamw # Scheduler scheduler: type: cosine_annealing_warm_restarts T_0: 10 # Restart every 10 epochs T_mult: 2 # Double restart period each time warmup_epochs: 3 # Warmup for first 3 epochs min_lr: 0.00001 # End at 1/100th of initial LR # Early stopping early_stopping: enabled: true patience: 10 # Increased to allow more exploration min_delta: 0.0005 # Accept smaller improvements (0.05%) restore_best_weights: true # Restore best model when stopping monitor: val_dice mode: max # Checkpointing checkpoint: save_best: true save_every: 5 # Save every 5 epochs save_last: true # Also save last checkpoint monitor: val_dice # Mask Refinement mask_refinement: threshold: 0.5 morphology: closing_kernel: 5 opening_kernel: 3 # Adaptive thresholds per dataset min_region_area: rtm: 0.0003 receipts: 0.0005 default: 0.001 # Feature Extraction features: # Deep features deep: enabled: true pooling: gap # Global Average Pooling # Statistical & Shape features statistical: enabled: true features: - area - perimeter - aspect_ratio - solidity - eccentricity - entropy # Frequency-domain features frequency: enabled: true features: - dct_coefficients - high_frequency_energy - wavelet_energy # Noise & ELA features noise: enabled: true features: - ela_mean - ela_variance - noise_residual # OCR-consistency features (text documents only) ocr: enabled: true gated: true # Only for text documents features: - confidence_deviation - spacing_irregularity - stroke_width_variation # Feature normalization normalization: method: standard_scaler handle_missing: true # LightGBM Classifier classifier: model: lightgbm params: objective: multiclass num_class: 3 boosting_type: gbdt num_leaves: 31 learning_rate: 0.05 n_estimators: 200 max_depth: 7 min_child_samples: 20 subsample: 0.8 colsample_bytree: 0.8 reg_alpha: 0.1 reg_lambda: 0.1 random_state: 42 # Confidence threshold confidence_threshold: 0.6 # Metrics metrics: # Localization metrics (only for datasets with pixel masks) localization: - iou - dice - precision - recall # Classification metrics classification: - accuracy - f1_score - precision - recall - confusion_matrix # Dataset-aware metric computation compute_localization: doctamper: true rtm: true casia: false receipts: true # Outputs outputs: base_dir: outputs # Subdirectories checkpoints: outputs/checkpoints logs: outputs/logs plots: outputs/plots results: outputs/results # Visualization visualization: save_mask: true save_overlay: true save_json: true overlay_alpha: 0.5 colormap: jet # Deployment deployment: export_onnx: true onnx_path: outputs/model.onnx quantization: false opset_version: 14 # Logging logging: level: INFO tensorboard: true csv: true console: true