Spaces:
Sleeping
Sleeping
| # Hybrid Document Forgery Detection - Configuration | |
| # System Settings | |
| system: | |
| device: cuda # cuda or cpu | |
| num_workers: 0 # Reduced to avoid multiprocessing errors | |
| pin_memory: true | |
| seed: 42 | |
| # Data Settings | |
| data: | |
| image_size: 384 | |
| batch_size: 8 # Reduced for 16GB RAM | |
| num_classes: 3 # copy_move, splicing, text_substitution | |
| # Dataset paths | |
| datasets: | |
| doctamper: | |
| path: datasets/DocTamper | |
| type: lmdb | |
| has_pixel_mask: true | |
| min_region_area: 0.001 # 0.1% | |
| rtm: | |
| path: datasets/RealTextManipulation | |
| type: folder | |
| has_pixel_mask: true | |
| min_region_area: 0.0003 # 0.03% | |
| casia: | |
| path: datasets/CASIA 1.0 dataset | |
| type: folder | |
| has_pixel_mask: false | |
| min_region_area: 0.001 # 0.1% | |
| skip_deskew: true | |
| skip_denoising: true | |
| receipts: | |
| path: datasets/findit2 | |
| type: folder | |
| has_pixel_mask: true | |
| min_region_area: 0.0005 # 0.05% | |
| fcd: | |
| path: datasets/DocTamper/DocTamperV1-FCD | |
| type: lmdb | |
| has_pixel_mask: true | |
| min_region_area: 0.00035 # 0.035% (larger forgeries, keep 99%) | |
| scd: | |
| path: datasets/DocTamper/DocTamperV1-SCD | |
| type: lmdb | |
| has_pixel_mask: true | |
| min_region_area: 0.00009 # 0.009% (small forgeries, keep 91.5%) | |
| # Chunked training for DocTamper (RAM constraint) | |
| chunked_training: | |
| enabled: true | |
| dataset: doctamper | |
| chunks: | |
| - {start: 0.0, end: 0.25, name: "chunk_1"} | |
| - {start: 0.25, end: 0.5, name: "chunk_2"} | |
| - {start: 0.5, end: 0.75, name: "chunk_3"} | |
| - {start: 0.75, end: 1.0, name: "chunk_4"} | |
| # Mixed dataset training (TrainingSet + FCD + SCD) | |
| mixing_ratios: | |
| doctamper: 0.70 # 70% TrainingSet (maintains baseline) | |
| scd: 0.20 # 20% SCD (handles small forgeries, 0.88% avg) | |
| fcd: 0.10 # 10% FCD (adds diversity, 3.55% avg) | |
| # Preprocessing | |
| preprocessing: | |
| deskew: true | |
| normalize: true | |
| noise_threshold: 15.0 # Laplacian variance threshold | |
| median_filter_size: 3 | |
| gaussian_sigma: 0.8 | |
| # Dataset-aware preprocessing | |
| dataset_specific: | |
| casia: | |
| deskew: false | |
| denoising: false | |
| # Augmentation (Training only) | |
| augmentation: | |
| enabled: true | |
| # Common augmentations | |
| common: | |
| - {type: "noise", prob: 0.3} | |
| - {type: "motion_blur", prob: 0.2} | |
| - {type: "jpeg_compression", prob: 0.3, quality: [60, 95]} | |
| - {type: "lighting", prob: 0.3} | |
| - {type: "perspective", prob: 0.2} | |
| # Dataset-specific augmentations | |
| receipts: | |
| - {type: "stain", prob: 0.2} | |
| - {type: "fold", prob: 0.15} | |
| # Model Architecture | |
| model: | |
| # Encoder | |
| encoder: | |
| name: mobilenetv3_small_100 | |
| pretrained: true | |
| features_only: true | |
| # Decoder | |
| decoder: | |
| name: unet_lite | |
| channels: [16, 24, 40, 48, 96] # MobileNetV3-Small feature channels | |
| upsampling: bilinear | |
| use_depthwise_separable: true | |
| # Output | |
| output_channels: 1 # Binary forgery mask | |
| # Loss Function | |
| loss: | |
| # Dataset-aware loss | |
| use_dice: true # Only for datasets with pixel masks | |
| bce_weight: 1.0 | |
| dice_weight: 1.0 | |
| # Training | |
| training: | |
| epochs: 30 # Per chunk (increased for single-pass training) | |
| learning_rate: 0.001 # Higher initial LR for faster convergence | |
| weight_decay: 0.0001 # Slight increase for better regularization | |
| # Optimizer | |
| optimizer: adamw | |
| # Scheduler | |
| scheduler: | |
| type: cosine_annealing_warm_restarts | |
| T_0: 10 # Restart every 10 epochs | |
| T_mult: 2 # Double restart period each time | |
| warmup_epochs: 3 # Warmup for first 3 epochs | |
| min_lr: 0.00001 # End at 1/100th of initial LR | |
| # Early stopping | |
| early_stopping: | |
| enabled: true | |
| patience: 10 # Increased to allow more exploration | |
| min_delta: 0.0005 # Accept smaller improvements (0.05%) | |
| restore_best_weights: true # Restore best model when stopping | |
| monitor: val_dice | |
| mode: max | |
| # Checkpointing | |
| checkpoint: | |
| save_best: true | |
| save_every: 5 # Save every 5 epochs | |
| save_last: true # Also save last checkpoint | |
| monitor: val_dice | |
| # Mask Refinement | |
| mask_refinement: | |
| threshold: 0.5 | |
| morphology: | |
| closing_kernel: 5 | |
| opening_kernel: 3 | |
| # Adaptive thresholds per dataset | |
| min_region_area: | |
| rtm: 0.0003 | |
| receipts: 0.0005 | |
| default: 0.001 | |
| # Feature Extraction | |
| features: | |
| # Deep features | |
| deep: | |
| enabled: true | |
| pooling: gap # Global Average Pooling | |
| # Statistical & Shape features | |
| statistical: | |
| enabled: true | |
| features: | |
| - area | |
| - perimeter | |
| - aspect_ratio | |
| - solidity | |
| - eccentricity | |
| - entropy | |
| # Frequency-domain features | |
| frequency: | |
| enabled: true | |
| features: | |
| - dct_coefficients | |
| - high_frequency_energy | |
| - wavelet_energy | |
| # Noise & ELA features | |
| noise: | |
| enabled: true | |
| features: | |
| - ela_mean | |
| - ela_variance | |
| - noise_residual | |
| # OCR-consistency features (text documents only) | |
| ocr: | |
| enabled: true | |
| gated: true # Only for text documents | |
| features: | |
| - confidence_deviation | |
| - spacing_irregularity | |
| - stroke_width_variation | |
| # Feature normalization | |
| normalization: | |
| method: standard_scaler | |
| handle_missing: true | |
| # LightGBM Classifier | |
| classifier: | |
| model: lightgbm | |
| params: | |
| objective: multiclass | |
| num_class: 3 | |
| boosting_type: gbdt | |
| num_leaves: 31 | |
| learning_rate: 0.05 | |
| n_estimators: 200 | |
| max_depth: 7 | |
| min_child_samples: 20 | |
| subsample: 0.8 | |
| colsample_bytree: 0.8 | |
| reg_alpha: 0.1 | |
| reg_lambda: 0.1 | |
| random_state: 42 | |
| # Confidence threshold | |
| confidence_threshold: 0.6 | |
| # Metrics | |
| metrics: | |
| # Localization metrics (only for datasets with pixel masks) | |
| localization: | |
| - iou | |
| - dice | |
| - precision | |
| - recall | |
| # Classification metrics | |
| classification: | |
| - accuracy | |
| - f1_score | |
| - precision | |
| - recall | |
| - confusion_matrix | |
| # Dataset-aware metric computation | |
| compute_localization: | |
| doctamper: true | |
| rtm: true | |
| casia: false | |
| receipts: true | |
| # Outputs | |
| outputs: | |
| base_dir: outputs | |
| # Subdirectories | |
| checkpoints: outputs/checkpoints | |
| logs: outputs/logs | |
| plots: outputs/plots | |
| results: outputs/results | |
| # Visualization | |
| visualization: | |
| save_mask: true | |
| save_overlay: true | |
| save_json: true | |
| overlay_alpha: 0.5 | |
| colormap: jet | |
| # Deployment | |
| deployment: | |
| export_onnx: true | |
| onnx_path: outputs/model.onnx | |
| quantization: false | |
| opset_version: 14 | |
| # Logging | |
| logging: | |
| level: INFO | |
| tensorboard: true | |
| csv: true | |
| console: true | |