Spaces:

DocForg
/

Document_Forgery_Detection

Sleeping

App Files Files Community

JKrishnanandhaa commited on Jan 16

Commit

ff0e79e

verified ·

1 Parent(s): 6bdcef9

Upload 54 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +231 -0
config.yaml +297 -0
models/best_doctamper.pth +3 -0
models/classifier/classifier_metadata.json +821 -0
models/classifier/lightgbm_model.txt +0 -0
models/classifier/scaler.joblib +3 -0
src/__init__.py +32 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__init__.py +5 -0
src/config/__pycache__/__init__.cpython-312.pyc +0 -0
src/config/__pycache__/config_loader.cpython-312.pyc +0 -0
src/config/config_loader.py +117 -0
src/data/__init__.py +23 -0
src/data/__pycache__/__init__.cpython-312.pyc +0 -0
src/data/__pycache__/augmentation.cpython-312.pyc +0 -0
src/data/__pycache__/datasets.cpython-312.pyc +0 -0
src/data/__pycache__/preprocessing.cpython-312.pyc +0 -0
src/data/augmentation.py +150 -0
src/data/datasets.py +541 -0
src/data/preprocessing.py +226 -0
src/features/__init__.py +32 -0
src/features/__pycache__/__init__.cpython-312.pyc +0 -0
src/features/__pycache__/feature_extraction.cpython-312.pyc +0 -0
src/features/__pycache__/region_extraction.cpython-312.pyc +0 -0
src/features/feature_extraction.py +485 -0
src/features/region_extraction.py +226 -0
src/inference/__init__.py +5 -0
src/inference/__pycache__/__init__.cpython-312.pyc +0 -0
src/inference/__pycache__/pipeline.cpython-312.pyc +0 -0
src/inference/pipeline.py +359 -0
src/models/__init__.py +19 -0
src/models/__pycache__/__init__.cpython-312.pyc +0 -0
src/models/__pycache__/decoder.cpython-312.pyc +0 -0
src/models/__pycache__/encoder.cpython-312.pyc +0 -0
src/models/__pycache__/losses.cpython-312.pyc +0 -0
src/models/__pycache__/network.cpython-312.pyc +0 -0
src/models/decoder.py +186 -0
src/models/encoder.py +75 -0
src/models/losses.py +168 -0
src/models/network.py +133 -0
src/training/__init__.py +24 -0
src/training/__pycache__/__init__.cpython-312.pyc +0 -0
src/training/__pycache__/classifier.cpython-312.pyc +0 -0
src/training/__pycache__/metrics.cpython-312.pyc +0 -0
src/training/__pycache__/trainer.cpython-312.pyc +0 -0
src/training/classifier.py +282 -0
src/training/metrics.py +305 -0
src/training/trainer.py +450 -0
src/utils/__init__.py +28 -0
src/utils/__pycache__/__init__.cpython-312.pyc +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""
+Document Forgery Detection - Gradio Interface for Hugging Face Spaces
+This app provides a web interface for detecting and classifying document forgeries.
+"""
+import gradio as gr
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+import json
+from pathlib import Path
+import sys
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.models import get_model
+from src.config import get_config
+from src.data.preprocessing import DocumentPreprocessor
+from src.data.augmentation import DatasetAwareAugmentation
+from src.features.region_extraction import get_mask_refiner, get_region_extractor
+from src.features.feature_extraction import get_feature_extractor
+from src.training.classifier import ForgeryClassifier
+# Class names
+CLASS_NAMES = {0: 'Copy-Move', 1: 'Splicing', 2: 'Generation'}
+CLASS_COLORS = {
+    0: (255, 0, 0),      # Red for Copy-Move
+    1: (0, 255, 0),      # Green for Splicing
+    2: (0, 0, 255)       # Blue for Generation
+}
+class ForgeryDetector:
+    """Main forgery detection pipeline"""
+    def __init__(self):
+        print("Loading models...")
+        # Load config
+        self.config = get_config('config.yaml')
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Load segmentation model
+        self.model = get_model(self.config).to(self.device)
+        checkpoint = torch.load('models/segmentation_model.pth', map_location=self.device)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.eval()
+        # Load classifier
+        self.classifier = ForgeryClassifier(self.config)
+        self.classifier.load('models/classifier')
+        # Initialize components
+        self.preprocessor = DocumentPreprocessor(self.config, 'doctamper')
+        self.augmentation = DatasetAwareAugmentation(self.config, 'doctamper', is_training=False)
+        self.mask_refiner = get_mask_refiner(self.config)
+        self.region_extractor = get_region_extractor(self.config)
+        self.feature_extractor = get_feature_extractor(self.config, is_text_document=True)
+        print("✓ Models loaded successfully!")
+    def detect(self, image):
+        """
+        Detect forgeries in document image
+        Args:
+            image: PIL Image or numpy array
+        Returns:
+            overlay_image: Image with detection overlay
+            results_json: Detection results as JSON
+        """
+        # Convert PIL to numpy
+        if isinstance(image, Image.Image):
+            image = np.array(image)
+        # Convert to RGB
+        if len(image.shape) == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+        elif image.shape[2] == 4:
+            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+        original_image = image.copy()
+        # Preprocess
+        preprocessed, _ = self.preprocessor(image, None)
+        # Augment
+        augmented = self.augmentation(preprocessed, None)
+        image_tensor = augmented['image'].unsqueeze(0).to(self.device)
+        # Run localization
+        with torch.no_grad():
+            logits, decoder_features = self.model(image_tensor)
+            prob_map = torch.sigmoid(logits).cpu().numpy()[0, 0]
+        # Refine mask
+        binary_mask = (prob_map > 0.5).astype(np.uint8)
+        refined_mask = self.mask_refiner.refine(binary_mask, original_size=original_image.shape[:2])
+        # Extract regions
+        regions = self.region_extractor.extract(refined_mask, prob_map, original_image)
+        # Classify regions
+        results = []
+        for region in regions:
+            # Extract features
+            features = self.feature_extractor.extract(
+                preprocessed,
+                region['region_mask'],
+                [f.cpu() for f in decoder_features]
+            )
+            # Classify
+            predictions, confidences = self.classifier.predict(features)
+            forgery_type = int(predictions[0])
+            confidence = float(confidences[0])
+            if confidence > 0.6:  # Confidence threshold
+                results.append({
+                    'region_id': region['region_id'],
+                    'bounding_box': region['bounding_box'],
+                    'forgery_type': CLASS_NAMES[forgery_type],
+                    'confidence': confidence
+                })
+        # Create visualization
+        overlay = self._create_overlay(original_image, results)
+        # Create JSON response
+        json_results = {
+            'num_detections': len(results),
+            'detections': results,
+            'model_info': {
+                'segmentation_dice': '75%',
+                'classifier_accuracy': '92%'
+            }
+        }
+        return overlay, json_results
+    def _create_overlay(self, image, results):
+        """Create overlay visualization"""
+        overlay = image.copy()
+        # Draw bounding boxes and labels
+        for result in results:
+            bbox = result['bounding_box']
+            x, y, w, h = bbox
+            forgery_type = result['forgery_type']
+            confidence = result['confidence']
+            # Get color
+            forgery_id = [k for k, v in CLASS_NAMES.items() if v == forgery_type][0]
+            color = CLASS_COLORS[forgery_id]
+            # Draw rectangle
+            cv2.rectangle(overlay, (x, y), (x+w, y+h), color, 2)
+            # Draw label
+            label = f"{forgery_type}: {confidence:.1%}"
+            label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
+            cv2.rectangle(overlay, (x, y-label_size[1]-10), (x+label_size[0], y), color, -1)
+            cv2.putText(overlay, label, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
+        # Add legend
+        if len(results) > 0:
+            legend_y = 30
+            cv2.putText(overlay, f"Detected {len(results)} forgery region(s)",
+                       (10, legend_y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
+        return overlay
+# Initialize detector
+detector = ForgeryDetector()
+def detect_forgery(image):
+    """Gradio interface function"""
+    try:
+        overlay, results = detector.detect(image)
+        return overlay, json.dumps(results, indent=2)
+    except Exception as e:
+        return None, f"Error: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=detect_forgery,
+    inputs=gr.Image(type="pil", label="Upload Document Image"),
+    outputs=[
+        gr.Image(type="numpy", label="Detection Result"),
+        gr.JSON(label="Detection Details")
+    ],
+    title="📄 Document Forgery Detector",
+    description="""
+    Upload a document image to detect and classify forgeries.
+    **Supported Forgery Types:**
+    - 🔴 Copy-Move: Duplicated regions within the document
+    - 🟢 Splicing: Content from different sources
+    - 🔵 Generation: AI-generated or synthesized content
+    **Model Performance:**
+    - Localization: 75% Dice Score
+    - Classification: 92% Accuracy
+    """,
+    examples=[
+        ["examples/sample1.jpg"],
+        ["examples/sample2.jpg"],
+    ],
+    article="""
+    ### About
+    This model uses a hybrid deep learning approach:
+    1. **Localization**: MobileNetV3-Small + UNet-Lite (detects WHERE)
+    2. **Classification**: LightGBM with hybrid features (detects WHAT)
+    Trained on DocTamper dataset (140K samples).
+    """,
+    theme=gr.themes.Soft(),
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()

config.yaml ADDED Viewed

	@@ -0,0 +1,297 @@

+# Hybrid Document Forgery Detection - Configuration
+# System Settings
+system:
+  device: cuda  # cuda or cpu
+  num_workers: 0  # Reduced to avoid multiprocessing errors
+  pin_memory: true
+  seed: 42
+# Data Settings
+data:
+  image_size: 384
+  batch_size: 8  # Reduced for 16GB RAM
+  num_classes: 3  # copy_move, splicing, text_substitution
+  # Dataset paths
+  datasets:
+    doctamper:
+      path: datasets/DocTamper
+      type: lmdb
+      has_pixel_mask: true
+      min_region_area: 0.001  # 0.1%
+    rtm:
+      path: datasets/RealTextManipulation
+      type: folder
+      has_pixel_mask: true
+      min_region_area: 0.0003  # 0.03%
+    casia:
+      path: datasets/CASIA 1.0 dataset
+      type: folder
+      has_pixel_mask: false
+      min_region_area: 0.001  # 0.1%
+      skip_deskew: true
+      skip_denoising: true
+    receipts:
+      path: datasets/findit2
+      type: folder
+      has_pixel_mask: true
+      min_region_area: 0.0005  # 0.05%
+    fcd:
+      path: datasets/DocTamper/DocTamperV1-FCD
+      type: lmdb
+      has_pixel_mask: true
+      min_region_area: 0.00035  # 0.035% (larger forgeries, keep 99%)
+    scd:
+      path: datasets/DocTamper/DocTamperV1-SCD
+      type: lmdb
+      has_pixel_mask: true
+      min_region_area: 0.00009  # 0.009% (small forgeries, keep 91.5%)
+  # Chunked training for DocTamper (RAM constraint)
+  chunked_training:
+    enabled: true
+    dataset: doctamper
+    chunks:
+      - {start: 0.0, end: 0.25, name: "chunk_1"}
+      - {start: 0.25, end: 0.5, name: "chunk_2"}
+      - {start: 0.5, end: 0.75, name: "chunk_3"}
+      - {start: 0.75, end: 1.0, name: "chunk_4"}
+    # Mixed dataset training (TrainingSet + FCD + SCD)
+    mixing_ratios:
+      doctamper: 0.70  # 70% TrainingSet (maintains baseline)
+      scd: 0.20        # 20% SCD (handles small forgeries, 0.88% avg)
+      fcd: 0.10        # 10% FCD (adds diversity, 3.55% avg)
+# Preprocessing
+preprocessing:
+  deskew: true
+  normalize: true
+  noise_threshold: 15.0  # Laplacian variance threshold
+  median_filter_size: 3
+  gaussian_sigma: 0.8
+  # Dataset-aware preprocessing
+  dataset_specific:
+    casia:
+      deskew: false
+      denoising: false
+# Augmentation (Training only)
+augmentation:
+  enabled: true
+  # Common augmentations
+  common:
+    - {type: "noise", prob: 0.3}
+    - {type: "motion_blur", prob: 0.2}
+    - {type: "jpeg_compression", prob: 0.3, quality: [60, 95]}
+    - {type: "lighting", prob: 0.3}
+    - {type: "perspective", prob: 0.2}
+  # Dataset-specific augmentations
+  receipts:
+    - {type: "stain", prob: 0.2}
+    - {type: "fold", prob: 0.15}
+# Model Architecture
+model:
+  # Encoder
+  encoder:
+    name: mobilenetv3_small_100
+    pretrained: true
+    features_only: true
+  # Decoder
+  decoder:
+    name: unet_lite
+    channels: [16, 24, 40, 48, 96]  # MobileNetV3-Small feature channels
+    upsampling: bilinear
+    use_depthwise_separable: true
+  # Output
+  output_channels: 1  # Binary forgery mask
+# Loss Function
+loss:
+  # Dataset-aware loss
+  use_dice: true  # Only for datasets with pixel masks
+  bce_weight: 1.0
+  dice_weight: 1.0
+# Training
+training:
+  epochs: 30  # Per chunk (increased for single-pass training)
+  learning_rate: 0.001  # Higher initial LR for faster convergence
+  weight_decay: 0.0001  # Slight increase for better regularization
+  # Optimizer
+  optimizer: adamw
+  # Scheduler
+  scheduler:
+    type: cosine_annealing_warm_restarts
+    T_0: 10  # Restart every 10 epochs
+    T_mult: 2  # Double restart period each time
+    warmup_epochs: 3  # Warmup for first 3 epochs
+    min_lr: 0.00001  # End at 1/100th of initial LR
+  # Early stopping
+  early_stopping:
+    enabled: true
+    patience: 10  # Increased to allow more exploration
+    min_delta: 0.0005  # Accept smaller improvements (0.05%)
+    restore_best_weights: true  # Restore best model when stopping
+    monitor: val_dice
+    mode: max
+  # Checkpointing
+  checkpoint:
+    save_best: true
+    save_every: 5  # Save every 5 epochs
+    save_last: true  # Also save last checkpoint
+    monitor: val_dice
+# Mask Refinement
+mask_refinement:
+  threshold: 0.5
+  morphology:
+    closing_kernel: 5
+    opening_kernel: 3
+  # Adaptive thresholds per dataset
+  min_region_area:
+    rtm: 0.0003
+    receipts: 0.0005
+    default: 0.001
+# Feature Extraction
+features:
+  # Deep features
+  deep:
+    enabled: true
+    pooling: gap  # Global Average Pooling
+  # Statistical & Shape features
+  statistical:
+    enabled: true
+    features:
+      - area
+      - perimeter
+      - aspect_ratio
+      - solidity
+      - eccentricity
+      - entropy
+  # Frequency-domain features
+  frequency:
+    enabled: true
+    features:
+      - dct_coefficients
+      - high_frequency_energy
+      - wavelet_energy
+  # Noise & ELA features
+  noise:
+    enabled: true
+    features:
+      - ela_mean
+      - ela_variance
+      - noise_residual
+  # OCR-consistency features (text documents only)
+  ocr:
+    enabled: true
+    gated: true  # Only for text documents
+    features:
+      - confidence_deviation
+      - spacing_irregularity
+      - stroke_width_variation
+  # Feature normalization
+  normalization:
+    method: standard_scaler
+    handle_missing: true
+# LightGBM Classifier
+classifier:
+  model: lightgbm
+  params:
+    objective: multiclass
+    num_class: 3
+    boosting_type: gbdt
+    num_leaves: 31
+    learning_rate: 0.05
+    n_estimators: 200
+    max_depth: 7
+    min_child_samples: 20
+    subsample: 0.8
+    colsample_bytree: 0.8
+    reg_alpha: 0.1
+    reg_lambda: 0.1
+    random_state: 42
+  # Confidence threshold
+  confidence_threshold: 0.6
+# Metrics
+metrics:
+  # Localization metrics (only for datasets with pixel masks)
+  localization:
+    - iou
+    - dice
+    - precision
+    - recall
+  # Classification metrics
+  classification:
+    - accuracy
+    - f1_score
+    - precision
+    - recall
+    - confusion_matrix
+  # Dataset-aware metric computation
+  compute_localization:
+    doctamper: true
+    rtm: true
+    casia: false
+    receipts: true
+# Outputs
+outputs:
+  base_dir: outputs
+  # Subdirectories
+  checkpoints: outputs/checkpoints
+  logs: outputs/logs
+  plots: outputs/plots
+  results: outputs/results
+  # Visualization
+  visualization:
+    save_mask: true
+    save_overlay: true
+    save_json: true
+    overlay_alpha: 0.5
+    colormap: jet
+# Deployment
+deployment:
+  export_onnx: true
+  onnx_path: outputs/model.onnx
+  quantization: false
+  opset_version: 14
+# Logging
+logging:
+  level: INFO
+  tensorboard: true
+  csv: true
+  console: true

models/best_doctamper.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d049ca9d4dc28c8d01519f8faab1ec131a05de877da9703ee5bb0e9322095ad2
+size 14283981

models/classifier/classifier_metadata.json ADDED Viewed

	@@ -0,0 +1,821 @@

+{
+  "confidence_threshold": 0.6,
+  "class_names": [
+    "copy_move",
+    "splicing",
+    "text_substitution"
+  ],
+  "feature_names": [
+    "deep_0",
+    "deep_1",
+    "deep_2",
+    "deep_3",
+    "deep_4",
+    "deep_5",
+    "deep_6",
+    "deep_7",
+    "deep_8",
+    "deep_9",
+    "deep_10",
+    "deep_11",
+    "deep_12",
+    "deep_13",
+    "deep_14",
+    "deep_15",
+    "deep_16",
+    "deep_17",
+    "deep_18",
+    "deep_19",
+    "deep_20",
+    "deep_21",
+    "deep_22",
+    "deep_23",
+    "deep_24",
+    "deep_25",
+    "deep_26",
+    "deep_27",
+    "deep_28",
+    "deep_29",
+    "deep_30",
+    "deep_31",
+    "deep_32",
+    "deep_33",
+    "deep_34",
+    "deep_35",
+    "deep_36",
+    "deep_37",
+    "deep_38",
+    "deep_39",
+    "deep_40",
+    "deep_41",
+    "deep_42",
+    "deep_43",
+    "deep_44",
+    "deep_45",
+    "deep_46",
+    "deep_47",
+    "deep_48",
+    "deep_49",
+    "deep_50",
+    "deep_51",
+    "deep_52",
+    "deep_53",
+    "deep_54",
+    "deep_55",
+    "deep_56",
+    "deep_57",
+    "deep_58",
+    "deep_59",
+    "deep_60",
+    "deep_61",
+    "deep_62",
+    "deep_63",
+    "deep_64",
+    "deep_65",
+    "deep_66",
+    "deep_67",
+    "deep_68",
+    "deep_69",
+    "deep_70",
+    "deep_71",
+    "deep_72",
+    "deep_73",
+    "deep_74",
+    "deep_75",
+    "deep_76",
+    "deep_77",
+    "deep_78",
+    "deep_79",
+    "deep_80",
+    "deep_81",
+    "deep_82",
+    "deep_83",
+    "deep_84",
+    "deep_85",
+    "deep_86",
+    "deep_87",
+    "deep_88",
+    "deep_89",
+    "deep_90",
+    "deep_91",
+    "deep_92",
+    "deep_93",
+    "deep_94",
+    "deep_95",
+    "deep_96",
+    "deep_97",
+    "deep_98",
+    "deep_99",
+    "deep_100",
+    "deep_101",
+    "deep_102",
+    "deep_103",
+    "deep_104",
+    "deep_105",
+    "deep_106",
+    "deep_107",
+    "deep_108",
+    "deep_109",
+    "deep_110",
+    "deep_111",
+    "deep_112",
+    "deep_113",
+    "deep_114",
+    "deep_115",
+    "deep_116",
+    "deep_117",
+    "deep_118",
+    "deep_119",
+    "deep_120",
+    "deep_121",
+    "deep_122",
+    "deep_123",
+    "deep_124",
+    "deep_125",
+    "deep_126",
+    "deep_127",
+    "deep_128",
+    "deep_129",
+    "deep_130",
+    "deep_131",
+    "deep_132",
+    "deep_133",
+    "deep_134",
+    "deep_135",
+    "deep_136",
+    "deep_137",
+    "deep_138",
+    "deep_139",
+    "deep_140",
+    "deep_141",
+    "deep_142",
+    "deep_143",
+    "deep_144",
+    "deep_145",
+    "deep_146",
+    "deep_147",
+    "deep_148",
+    "deep_149",
+    "deep_150",
+    "deep_151",
+    "deep_152",
+    "deep_153",
+    "deep_154",
+    "deep_155",
+    "deep_156",
+    "deep_157",
+    "deep_158",
+    "deep_159",
+    "deep_160",
+    "deep_161",
+    "deep_162",
+    "deep_163",
+    "deep_164",
+    "deep_165",
+    "deep_166",
+    "deep_167",
+    "deep_168",
+    "deep_169",
+    "deep_170",
+    "deep_171",
+    "deep_172",
+    "deep_173",
+    "deep_174",
+    "deep_175",
+    "deep_176",
+    "deep_177",
+    "deep_178",
+    "deep_179",
+    "deep_180",
+    "deep_181",
+    "deep_182",
+    "deep_183",
+    "deep_184",
+    "deep_185",
+    "deep_186",
+    "deep_187",
+    "deep_188",
+    "deep_189",
+    "deep_190",
+    "deep_191",
+    "deep_192",
+    "deep_193",
+    "deep_194",
+    "deep_195",
+    "deep_196",
+    "deep_197",
+    "deep_198",
+    "deep_199",
+    "deep_200",
+    "deep_201",
+    "deep_202",
+    "deep_203",
+    "deep_204",
+    "deep_205",
+    "deep_206",
+    "deep_207",
+    "deep_208",
+    "deep_209",
+    "deep_210",
+    "deep_211",
+    "deep_212",
+    "deep_213",
+    "deep_214",
+    "deep_215",
+    "deep_216",
+    "deep_217",
+    "deep_218",
+    "deep_219",
+    "deep_220",
+    "deep_221",
+    "deep_222",
+    "deep_223",
+    "deep_224",
+    "deep_225",
+    "deep_226",
+    "deep_227",
+    "deep_228",
+    "deep_229",
+    "deep_230",
+    "deep_231",
+    "deep_232",
+    "deep_233",
+    "deep_234",
+    "deep_235",
+    "deep_236",
+    "deep_237",
+    "deep_238",
+    "deep_239",
+    "deep_240",
+    "deep_241",
+    "deep_242",
+    "deep_243",
+    "deep_244",
+    "deep_245",
+    "deep_246",
+    "deep_247",
+    "deep_248",
+    "deep_249",
+    "deep_250",
+    "deep_251",
+    "deep_252",
+    "deep_253",
+    "deep_254",
+    "deep_255",
+    "area",
+    "perimeter",
+    "aspect_ratio",
+    "solidity",
+    "eccentricity",
+    "entropy",
+    "dct_mean",
+    "dct_std",
+    "high_freq_energy",
+    "wavelet_cA",
+    "wavelet_cH",
+    "wavelet_cV",
+    "wavelet_cD",
+    "wavelet_entropy_H",
+    "wavelet_entropy_V",
+    "wavelet_entropy_D",
+    "ela_mean",
+    "ela_var",
+    "ela_max",
+    "noise_residual_mean",
+    "noise_residual_var",
+    "ocr_conf_mean",
+    "ocr_conf_std",
+    "spacing_irregularity",
+    "text_density",
+    "stroke_mean",
+    "stroke_std"
+  ],
+  "feature_importance": [
+    151.5697784423828,
+    8.955550193786621,
+    32.9064998626709,
+    151.0029697418213,
+    19.174699783325195,
+    157.97871017456055,
+    45.12229919433594,
+    19.72992992401123,
+    105.08611106872559,
+    0.0,
+    148.97894096374512,
+    35.71831035614014,
+    50.15155029296875,
+    71.74272060394287,
+    43.958970069885254,
+    129.9348111152649,
+    27.99122953414917,
+    61.592909812927246,
+    295.4245676994324,
+    61.00736045837402,
+    28.548550128936768,
+    0.0,
+    54.50248908996582,
+    93.74169921875,
+    120.9488091468811,
+    148.32109832763672,
+    30.55735969543457,
+    59.058170318603516,
+    82.7595911026001,
+    49.24997901916504,
+    0.0,
+    23.502280235290527,
+    392.399715423584,
+    551.6174192428589,
+    0.0,
+    50.8812894821167,
+    60.7820405960083,
+    78.98891925811768,
+    0.0,
+    9.173580169677734,
+    631.6932668685913,
+    42.097740173339844,
+    305.0536642074585,
+    416.94709300994873,
+    92.70171976089478,
+    66.76712036132812,
+    1435.1315097808838,
+    0.0,
+    126.6096019744873,
+    111.61981964111328,
+    124.68002033233643,
+    46.16030025482178,
+    12.660099983215332,
+    115.48313999176025,
+    86.43069076538086,
+    16.674290657043457,
+    110.49228954315186,
+    0.0,
+    98.00746059417725,
+    98.95538091659546,
+    41.432090759277344,
+    11.24590015411377,
+    65.1699800491333,
+    9.251449584960938,
+    100.24416923522949,
+    109.5842399597168,
+    83.83185005187988,
+    196.82151079177856,
+    0.0,
+    455.4096431732178,
+    120.69411087036133,
+    23.130990028381348,
+    18.21858024597168,
+    69.65920066833496,
+    82.33455085754395,
+    0.0,
+    82.21379089355469,
+    119.78182220458984,
+    65.07565069198608,
+    53.62262964248657,
+    247.53085803985596,
+    144.45191097259521,
+    38.63272047042847,
+    82.24878883361816,
+    60.303489685058594,
+    8.717499732971191,
+    412.6672077178955,
+    54.25755023956299,
+    0.0,
+    23.141600608825684,
+    62.88635063171387,
+    144.1060814857483,
+    352.47050952911377,
+    23.701799392700195,
+    180.19217205047607,
+    74.43132972717285,
+    0.0,
+    92.36961936950684,
+    418.40467262268066,
+    163.96015119552612,
+    136.4917197227478,
+    8.362039566040039,
+    10.378399848937988,
+    30.465800285339355,
+    47.935009479522705,
+    28.957390308380127,
+    61.46374034881592,
+    11.319199562072754,
+    142.72890949249268,
+    0.0,
+    140.48277807235718,
+    59.3709602355957,
+    9.517510414123535,
+    22.945700645446777,
+    85.35987043380737,
+    25.964330196380615,
+    18.778900146484375,
+    79.01968955993652,
+    74.93959999084473,
+    0.0,
+    36.94928026199341,
+    47.99788188934326,
+    84.99461078643799,
+    65.24014949798584,
+    128.61994075775146,
+    71.96449947357178,
+    0.0,
+    60.59358024597168,
+    0.0,
+    144.41107177734375,
+    119.25859117507935,
+    0.0,
+    29.235299110412598,
+    75.50409030914307,
+    0.0,
+    0.0,
+    133.30608654022217,
+    50.813700675964355,
+    7.879730224609375,
+    80.23723936080933,
+    28.72357988357544,
+    85.63543939590454,
+    88.70749998092651,
+    0.0,
+    38.14083003997803,
+    10.110199928283691,
+    223.45562982559204,
+    0.0,
+    189.3048586845398,
+    11.311699867248535,
+    87.91403198242188,
+    45.88195037841797,
+    57.93142032623291,
+    621.7998056411743,
+    151.6710205078125,
+    55.90662956237793,
+    310.18284845352173,
+    0.0,
+    37.39265060424805,
+    142.64961051940918,
+    86.32072973251343,
+    167.73473930358887,
+    135.1251916885376,
+    67.87245082855225,
+    25.777999877929688,
+    82.70090961456299,
+    160.77113008499146,
+    0.0,
+    109.31087112426758,
+    36.81955051422119,
+    21.341699600219727,
+    39.508570194244385,
+    0.0,
+    12.186599731445312,
+    52.13583946228027,
+    242.86930990219116,
+    0.0,
+    27.03380012512207,
+    11.51550006866455,
+    102.65280055999756,
+    8.523859977722168,
+    105.87909126281738,
+    0.0,
+    191.5287847518921,
+    16.16029930114746,
+    43.0986704826355,
+    0.0,
+    54.736299991607666,
+    145.84991836547852,
+    62.068660736083984,
+    72.52587032318115,
+    81.85652828216553,
+    25.7001895904541,
+    36.71660041809082,
+    78.73716068267822,
+    145.95945167541504,
+    146.47522068023682,
+    23.559300422668457,
+    39.53977966308594,
+    194.42743015289307,
+    66.81133842468262,
+    0.0,
+    156.6984510421753,
+    671.7460441589355,
+    38.70531988143921,
+    0.0,
+    356.6153998374939,
+    0.0,
+    0.0,
+    166.1197419166565,
+    0.0,
+    73.76784992218018,
+    82.50808954238892,
+    249.50656414031982,
+    21.96009922027588,
+    43.69997024536133,
+    0.0,
+    95.96379089355469,
+    80.70125961303711,
+    0.0,
+    0.0,
+    31.88983964920044,
+    301.3817310333252,
+    0.0,
+    15.77073049545288,
+    396.3671169281006,
+    83.96024990081787,
+    265.5281705856323,
+    47.332489013671875,
+    0.0,
+    268.84939098358154,
+    58.15328025817871,
+    31.172239780426025,
+    30.765819549560547,
+    10.469799995422363,
+    16.379559993743896,
+    28.163670539855957,
+    199.17678022384644,
+    112.94913101196289,
+    5.905869960784912,
+    719.0067505836487,
+    157.29250049591064,
+    92.6033205986023,
+    73.79398918151855,
+    24.25756072998047,
+    0.0,
+    31.15705966949463,
+    50.47894048690796,
+    73.0004301071167,
+    131.88961124420166,
+    0.0,
+    44.40921926498413,
+    59.08494997024536,
+    60.722700119018555,
+    108.21477127075195,
+    78.56892967224121,
+    486.87088108062744,
+    235.95975875854492,
+    1809.188328742981,
+    396.9979257583618,
+    441.098051071167,
+    218.83313035964966,
+    265.3398394584656,
+    595.3824620246887,
+    6126.337133407593,
+    3245.946928501129,
+    170.21856021881104,
+    262.3172616958618,
+    98.2627010345459,
+    146.45634078979492,
+    135.70992946624756,
+    34.09130001068115,
+    14156.531812667847,
+    227.55861043930054,
+    121.6160798072815,
+    409.0565061569214,
+    282.5465121269226,
+    481.5555577278137,
+    291.560200214386,
+    797.986575126648,
+    246.7717628479004,
+    6129.707794189453,
+    957.9258012771606,
+    4484.775461196899,
+    5722.659900188446,
+    393.6506414413452,
+    882.6219139099121,
+    264.54289960861206,
+    79.82537126541138,
+    228.20479917526245,
+    155.19043970108032,
+    319.6992588043213,
+    391.5327887535095,
+    2005.5544757843018,
+    0.0,
+    1028.816568851471,
+    577.8704214096069,
+    159.98183917999268,
+    138.31745052337646,
+    115.26242113113403,
+    117.50687980651855,
+    0.0,
+    270.78229904174805,
+    300.6347818374634,
+    164.85750007629395,
+    542.5208883285522,
+    10002.710669994354,
+    502.5058374404907,
+    6619.406281471252,
+    194.39686965942383,
+    0.0,
+    239.30037021636963,
+    129.93587112426758,
+    149.23295974731445,
+    57.12141132354736,
+    152.30589962005615,
+    590.8979144096375,
+    125.51728057861328,
+    216.1852297782898,
+    4445.603507041931,
+    0.0,
+    97.60689973831177,
+    497.5633420944214,
+    699.1335229873657,
+    159.68335962295532,
+    127.93899154663086,
+    148.00423860549927,
+    385.3561215400696,
+    1255.3204145431519,
+    170.33005905151367,
+    564.577874660492,
+    1513.99400806427,
+    254.163161277771,
+    782.5869626998901,
+    166.38124132156372,
+    4800.666547775269,
+    271.63431215286255,
+    225.10281944274902,
+    674.5281610488892,
+    198.04610967636108,
+    4262.1786432266235,
+    0.0,
+    0.0,
+    749.2932777404785,
+    50.16440010070801,
+    350.71588039398193,
+    169.4644889831543,
+    3843.8212938308716,
+    0.0,
+    0.0,
+    1463.2607378959656,
+    0.0,
+    914.5419778823853,
+    213.03434944152832,
+    32.90106964111328,
+    119.6264705657959,
+    137.204270362854,
+    359.72862100601196,
+    75.62465047836304,
+    446.62164974212646,
+    105.61136054992676,
+    2787.228641986847,
+    311.6961917877197,
+    156.06305074691772,
+    1498.6027584075928,
+    185.69973182678223,
+    147.8509397506714,
+    12.531700134277344,
+    0.0,
+    192.53613948822021,
+    424.5432171821594,
+    259.268039226532,
+    175.13502979278564,
+    281.5383825302124,
+    299.1759967803955,
+    227.893488407135,
+    136.72871112823486,
+    416.3120012283325,
+    115.03175830841064,
+    0.0,
+    144.02852058410645,
+    208.2749309539795,
+    160.34006214141846,
+    109.58282947540283,
+    1500.150812625885,
+    4945.450592041016,
+    2852.855231285095,
+    881.7318058013916,
+    397.0553340911865,
+    315.55763959884644,
+    2086.7152404785156,
+    1611.37087059021,
+    2103.3109679222107,
+    3135.3377957344055,
+    2692.6771001815796,
+    4584.85631608963,
+    1700.0699429512024,
+    883.6995916366577,
+    33464.33708667755,
+    574.8801603317261,
+    2229.160650253296,
+    379.5017247200012,
+    905.5721397399902,
+    493.963942527771,
+    4049.96994638443,
+    189.95257091522217,
+    61.00449848175049,
+    450.8264832496643,
+    398.1711621284485,
+    38847.667073726654,
+    1835.184115409851,
+    2697.096595287323,
+    4710.6771783828735,
+    5588.210665225983,
+    1004.0054593086243,
+    652.6680641174316,
+    2031.7795896530151,
+    367.2168278694153,
+    2698.1613121032715,
+    591.61465883255,
+    448.26813650131226,
+    849.9976563453674,
+    8368.735646724701,
+    414.3280692100525,
+    3544.0216879844666,
+    679.3534464836121,
+    247.58060026168823,
+    402.0281286239624,
+    5822.276999950409,
+    1743.6888279914856,
+    2081.8095812797546,
+    1696.2736263275146,
+    197.28233861923218,
+    3321.6009736061096,
+    2298.3414697647095,
+    2910.3161034584045,
+    296.4575996398926,
+    14755.747835159302,
+    6977.302089691162,
+    3608.7710394859314,
+    289.08115005493164,
+    2645.5259099006653,
+    158.54701232910156,
+    490.0809507369995,
+    1880.1874709129333,
+    1493.8953075408936,
+    609.5897555351257,
+    462.8165135383606,
+    243.31624794006348,
+    150.1076784133911,
+    6197.5719475746155,
+    1036.8616194725037,
+    5302.397746086121,
+    1388.753752708435,
+    2091.038170814514,
+    785.7442808151245,
+    377.4342908859253,
+    3640.3371028900146,
+    1029.8467602729797,
+    296.86861085891724,
+    1221.5854263305664,
+    535.2803363800049,
+    2508.307864189148,
+    3831.0581674575806,
+    2263.3348484039307,
+    926.5323433876038,
+    8959.179275035858,
+    309.04264068603516,
+    1767.5786666870117,
+    2107.6189522743225,
+    155.21375036239624,
+    378.6039876937866,
+    2220.862048149109,
+    1505.2828221321106,
+    517.8384418487549,
+    4313.928272247314,
+    342.4098491668701,
+    1310.0776271820068,
+    434.5597867965698,
+    2071.2271361351013,
+    0.0,
+    8595.476936340332,
+    202.46072053909302,
+    366.71736097335815,
+    7074.809521198273,
+    6.880340099334717,
+    1959.3085498809814,
+    636.0715098381042,
+    9.84004020690918,
+    386.9805417060852,
+    2382.4822087287903,
+    2317.9521684646606,
+    2793.7392020225525,
+    1188.6612939834595,
+    933.1099715232849,
+    4565.712460041046,
+    14641.29742860794,
+    15552.311092853546,
+    56185.89445209503,
+    97331.36661911011,
+    87548.01149320602,
+    521853.7248663902,
+    2643.261353492737,
+    20220.717566013336,
+    79148.93348503113,
+    17449.243332386017,
+    13258.27445936203,
+    6109.533164024353,
+    6781.56981420517,
+    3942.6140484809875,
+    8469.07410955429,
+    40318.94767665863,
+    156345.23027658463,
+    12197.998657226562,
+    22888.345291614532,
+    10946.28234910965,
+    204263.674387455,
+    229631.36437797546,
+    1945.9702520370483,
+    3069.6773653030396,
+    6425.405041217804,
+    508.55564069747925,
+    8993.14672756195,
+    0.0,
+    0.0,
+    0.0
+  ]
+}

models/classifier/lightgbm_model.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/classifier/scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:347b85c4f3e4bcbda0599f607a1ad5194c01655baca73b6e2ee72a9ba50dcf84
+size 13207

src/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""
+Hybrid Document Forgery Detection & Localization System
+A robust hybrid (Deep Learning + Classical ML) system for multi-type
+document forgery detection and localization.
+Architecture:
+- Deep Learning: MobileNetV3-Small + UNet-Lite for pixel-level localization
+- Classical ML: LightGBM for interpretable forgery classification
+"""
+__version__ = "1.0.0"
+from .config import get_config
+from .models import get_model, get_loss_function
+from .data import get_dataset
+from .features import get_feature_extractor, get_mask_refiner, get_region_extractor
+from .training import get_trainer, get_metrics_tracker
+from .inference import get_pipeline
+__all__ = [
+    'get_config',
+    'get_model',
+    'get_loss_function',
+    'get_dataset',
+    'get_feature_extractor',
+    'get_mask_refiner',
+    'get_region_extractor',
+    'get_trainer',
+    'get_metrics_tracker',
+    'get_pipeline'
+]

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (993 Bytes). View file

src/config/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Configuration module"""
+from .config_loader import Config, get_config
+__all__ = ['Config', 'get_config']

src/config/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (291 Bytes). View file

src/config/__pycache__/config_loader.cpython-312.pyc ADDED Viewed

Binary file (5.42 kB). View file

src/config/config_loader.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Configuration loader for Hybrid Document Forgery Detection System
+"""
+import yaml
+from pathlib import Path
+from typing import Dict, Any
+class Config:
+    """Configuration manager"""
+    def __init__(self, config_path: str = "config.yaml"):
+        """
+        Load configuration from YAML file
+        Args:
+            config_path: Path to configuration file
+        """
+        self.config_path = Path(config_path)
+        self.config = self._load_config()
+    def _load_config(self) -> Dict[str, Any]:
+        """Load YAML configuration"""
+        if not self.config_path.exists():
+            raise FileNotFoundError(f"Config file not found: {self.config_path}")
+        with open(self.config_path, 'r') as f:
+            config = yaml.safe_load(f)
+        return config
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        Get configuration value using dot notation
+        Args:
+            key: Configuration key (e.g., 'model.encoder.name')
+            default: Default value if key not found
+        Returns:
+            Configuration value
+        """
+        keys = key.split('.')
+        value = self.config
+        for k in keys:
+            if isinstance(value, dict) and k in value:
+                value = value[k]
+            else:
+                return default
+        return value
+    def get_dataset_config(self, dataset_name: str) -> Dict[str, Any]:
+        """
+        Get dataset-specific configuration
+        Args:
+            dataset_name: Dataset name (doctamper, rtm, casia, receipts)
+        Returns:
+            Dataset configuration dictionary
+        """
+        return self.config['data']['datasets'].get(dataset_name, {})
+    def has_pixel_mask(self, dataset_name: str) -> bool:
+        """Check if dataset has pixel-level masks"""
+        dataset_config = self.get_dataset_config(dataset_name)
+        return dataset_config.get('has_pixel_mask', False)
+    def should_skip_deskew(self, dataset_name: str) -> bool:
+        """Check if deskewing should be skipped for dataset"""
+        dataset_config = self.get_dataset_config(dataset_name)
+        return dataset_config.get('skip_deskew', False)
+    def should_skip_denoising(self, dataset_name: str) -> bool:
+        """Check if denoising should be skipped for dataset"""
+        dataset_config = self.get_dataset_config(dataset_name)
+        return dataset_config.get('skip_denoising', False)
+    def get_min_region_area(self, dataset_name: str) -> float:
+        """Get minimum region area threshold for dataset"""
+        dataset_config = self.get_dataset_config(dataset_name)
+        return dataset_config.get('min_region_area', 0.001)
+    def should_compute_localization_metrics(self, dataset_name: str) -> bool:
+        """Check if localization metrics should be computed for dataset"""
+        compute_config = self.config['metrics'].get('compute_localization', {})
+        return compute_config.get(dataset_name, False)
+    def __getitem__(self, key: str) -> Any:
+        """Allow dictionary-style access"""
+        return self.get(key)
+    def __repr__(self) -> str:
+        return f"Config(path={self.config_path})"
+# Global config instance
+_config = None
+def get_config(config_path: str = "config.yaml") -> Config:
+    """
+    Get global configuration instance
+    Args:
+        config_path: Path to configuration file
+    Returns:
+        Config instance
+    """
+    global _config
+    if _config is None:
+        _config = Config(config_path)
+    return _config

src/data/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Data module"""
+from .preprocessing import DocumentPreprocessor, preprocess_image
+from .augmentation import DatasetAwareAugmentation, get_augmentation
+from .datasets import (
+    DocTamperDataset,
+    RTMDataset,
+    CASIADataset,
+    ReceiptsDataset,
+    get_dataset
+)
+__all__ = [
+    'DocumentPreprocessor',
+    'preprocess_image',
+    'DatasetAwareAugmentation',
+    'get_augmentation',
+    'DocTamperDataset',
+    'RTMDataset',
+    'CASIADataset',
+    'ReceiptsDataset',
+    'get_dataset'
+]

src/data/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (572 Bytes). View file

src/data/__pycache__/augmentation.cpython-312.pyc ADDED Viewed

Binary file (5.94 kB). View file

src/data/__pycache__/datasets.cpython-312.pyc ADDED Viewed

Binary file (21.2 kB). View file

src/data/__pycache__/preprocessing.cpython-312.pyc ADDED Viewed

Binary file (9.38 kB). View file

src/data/augmentation.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+Dataset-aware augmentation for training
+"""
+import cv2
+import numpy as np
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+from typing import Dict, Any, Optional
+class DatasetAwareAugmentation:
+    """Dataset-aware augmentation pipeline"""
+    def __init__(self, config, dataset_name: str, is_training: bool = True):
+        """
+        Initialize augmentation pipeline
+        Args:
+            config: Configuration object
+            dataset_name: Dataset name
+            is_training: Whether in training mode
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        self.is_training = is_training
+        # Build augmentation pipeline
+        self.transform = self._build_transform()
+    def _build_transform(self) -> A.Compose:
+        """Build albumentations transform pipeline"""
+        transforms = []
+        if self.is_training and self.config.get('augmentation.enabled', True):
+            # Common augmentations
+            common_augs = self.config.get('augmentation.common', [])
+            for aug_config in common_augs:
+                aug_type = aug_config.get('type')
+                prob = aug_config.get('prob', 0.5)
+                if aug_type == 'noise':
+                    transforms.append(
+                        A.GaussNoise(var_limit=(10.0, 50.0), p=prob)
+                    )
+                elif aug_type == 'motion_blur':
+                    transforms.append(
+                        A.MotionBlur(blur_limit=7, p=prob)
+                    )
+                elif aug_type == 'jpeg_compression':
+                    quality_range = aug_config.get('quality', [60, 95])
+                    transforms.append(
+                        A.ImageCompression(quality_lower=quality_range[0],
+                                          quality_upper=quality_range[1],
+                                          p=prob)
+                    )
+                elif aug_type == 'lighting':
+                    transforms.append(
+                        A.OneOf([
+                            A.RandomBrightnessContrast(p=1.0),
+                            A.RandomGamma(p=1.0),
+                            A.HueSaturationValue(p=1.0),
+                        ], p=prob)
+                    )
+                elif aug_type == 'perspective':
+                    transforms.append(
+                        A.Perspective(scale=(0.02, 0.05), p=prob)
+                    )
+            # Dataset-specific augmentations
+            if self.dataset_name == 'receipts':
+                receipt_augs = self.config.get('augmentation.receipts', [])
+                for aug_config in receipt_augs:
+                    aug_type = aug_config.get('type')
+                    prob = aug_config.get('prob', 0.5)
+                    if aug_type == 'stain':
+                        # Simulate stains using random blobs
+                        transforms.append(
+                            A.RandomShadow(
+                                shadow_roi=(0, 0, 1, 1),
+                                num_shadows_lower=1,
+                                num_shadows_upper=3,
+                                shadow_dimension=5,
+                                p=prob
+                            )
+                        )
+                    elif aug_type == 'fold':
+                        # Simulate folds using grid distortion
+                        transforms.append(
+                            A.GridDistortion(num_steps=5, distort_limit=0.1, p=prob)
+                        )
+        # Always convert to tensor
+        transforms.append(ToTensorV2())
+        return A.Compose(
+            transforms,
+            additional_targets={'mask': 'mask'}
+        )
+    def __call__(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """
+        Apply augmentation
+        Args:
+            image: Input image (H, W, 3), float32, [0, 1]
+            mask: Optional mask (H, W), uint8, {0, 1}
+        Returns:
+            Dictionary with 'image' and optionally 'mask'
+        """
+        # Convert to uint8 for albumentations
+        image_uint8 = (image * 255).astype(np.uint8)
+        if mask is not None:
+            mask_uint8 = (mask * 255).astype(np.uint8)
+            augmented = self.transform(image=image_uint8, mask=mask_uint8)
+            # Convert back to float32
+            augmented['image'] = augmented['image'].float() / 255.0
+            augmented['mask'] = (augmented['mask'].float() / 255.0).unsqueeze(0)
+        else:
+            augmented = self.transform(image=image_uint8)
+            augmented['image'] = augmented['image'].float() / 255.0
+        return augmented
+def get_augmentation(config, dataset_name: str, is_training: bool = True) -> DatasetAwareAugmentation:
+    """
+    Get augmentation pipeline
+    Args:
+        config: Configuration object
+        dataset_name: Dataset name
+        is_training: Whether in training mode
+    Returns:
+        Augmentation pipeline
+    """
+    return DatasetAwareAugmentation(config, dataset_name, is_training)

src/data/datasets.py ADDED Viewed

	@@ -0,0 +1,541 @@

+"""
+Dataset loaders for document forgery detection
+Implements Critical Fix #7: Image-level train/test splits
+"""
+import os
+import lmdb
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from pathlib import Path
+from typing import Tuple, Optional, List
+import json
+from PIL import Image
+from .preprocessing import DocumentPreprocessor
+from .augmentation import DatasetAwareAugmentation
+class DocTamperDataset(Dataset):
+    """
+    DocTamper dataset loader (LMDB-based)
+    Implements chunked loading for RAM constraints
+    Uses lazy LMDB initialization for multiprocessing compatibility
+    """
+    def __init__(self,
+                 config,
+                 split: str = 'train',
+                 chunk_start: float = 0.0,
+                 chunk_end: float = 1.0):
+        """
+        Initialize DocTamper dataset
+        Args:
+            config: Configuration object
+            split: 'train' or 'val'
+            chunk_start: Start ratio for chunked training (0.0 to 1.0)
+            chunk_end: End ratio for chunked training (0.0 to 1.0)
+        """
+        self.config = config
+        self.split = split
+        self.dataset_name = 'doctamper'
+        # Get dataset path
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        # Map split to actual folder names
+        if split == 'train':
+            lmdb_folder = 'DocTamperV1-TrainingSet'
+        elif split == 'val' or split == 'test':
+            lmdb_folder = 'DocTamperV1-TestingSet'
+        else:
+            lmdb_folder = 'DocTamperV1-TrainingSet'
+        self.lmdb_path = str(self.data_path / lmdb_folder)
+        if not Path(self.lmdb_path).exists():
+            raise FileNotFoundError(f"LMDB folder not found: {self.lmdb_path}")
+        # LAZY INITIALIZATION: Don't open LMDB here (pickle issue with multiprocessing)
+        # Just get the count by temporarily opening
+        temp_env = lmdb.open(self.lmdb_path, readonly=True, lock=False)
+        with temp_env.begin() as txn:
+            stat = txn.stat()
+            self.length = stat['entries'] // 2
+        temp_env.close()
+        # LMDB env will be opened lazily in __getitem__
+        self._env = None
+        # Critical Fix #7: Image-level chunking (not region-level)
+        self.chunk_start = int(self.length * chunk_start)
+        self.chunk_end = int(self.length * chunk_end)
+        self.chunk_length = self.chunk_end - self.chunk_start
+        print(f"DocTamper {split}: Total={self.length}, "
+              f"Chunk=[{self.chunk_start}:{self.chunk_end}], "
+              f"Length={self.chunk_length}")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+    @property
+    def env(self):
+        """Lazy LMDB environment initialization for multiprocessing compatibility"""
+        if self._env is None:
+            self._env = lmdb.open(self.lmdb_path, readonly=True, lock=False,
+                                  max_readers=32, readahead=False)
+        return self._env
+    def __len__(self) -> int:
+        return self.chunk_length
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, dict]:
+        """
+        Get item from dataset
+        Args:
+            idx: Index within chunk
+        Returns:
+            image: (3, H, W) tensor
+            mask: (1, H, W) tensor
+            metadata: Dictionary with additional info
+        """
+        # Try to get the requested sample, skip to next if missing
+        max_attempts = 10
+        original_idx = idx
+        for attempt in range(max_attempts):
+            try:
+                # Map chunk index to global index
+                global_idx = self.chunk_start + idx
+                # Read from LMDB
+                with self.env.begin() as txn:
+                    # DocTamper format: image-XXXXXXXXX, label-XXXXXXXXX (9 digits, dash separator)
+                    img_key = f'image-{global_idx:09d}'.encode()
+                    label_key = f'label-{global_idx:09d}'.encode()
+                    img_buf = txn.get(img_key)
+                    label_buf = txn.get(label_key)
+                    if img_buf is None:
+                        # Sample missing, try next index
+                        idx = (idx + 1) % self.chunk_length
+                        continue
+                    # Decode image
+                    img_array = np.frombuffer(img_buf, dtype=np.uint8)
+                    image = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+                    if image is None:
+                        # Failed to decode, try next index
+                        idx = (idx + 1) % self.chunk_length
+                        continue
+                    # Decode label/mask
+                    if label_buf is not None:
+                        label_array = np.frombuffer(label_buf, dtype=np.uint8)
+                        mask = cv2.imdecode(label_array, cv2.IMREAD_GRAYSCALE)
+                        if mask is None:
+                            # Label might be raw bytes, create empty mask
+                            mask = np.zeros(image.shape[:2], dtype=np.uint8)
+                    else:
+                        # No mask found - create empty mask
+                        mask = np.zeros(image.shape[:2], dtype=np.uint8)
+                # Successfully loaded - break out of retry loop
+                break
+            except Exception as e:
+                # Something went wrong, try next index
+                idx = (idx + 1) % self.chunk_length
+                if attempt == max_attempts - 1:
+                    # Last attempt failed, create a dummy sample
+                    print(f"Warning: Could not load sample at idx {original_idx}, creating dummy sample")
+                    image = np.zeros((384, 384, 3), dtype=np.float32)
+                    mask = np.zeros((384, 384), dtype=np.uint8)
+                    global_idx = original_idx
+        # Preprocess
+        image, mask = self.preprocessor(image, mask)
+        # Augment
+        augmented = self.augmentation(image, mask)
+        image = augmented['image']
+        mask = augmented['mask']
+        # Metadata
+        metadata = {
+            'dataset': self.dataset_name,
+            'index': global_idx,
+            'has_pixel_mask': True
+        }
+        return image, mask, metadata
+    def __del__(self):
+        """Close LMDB environment"""
+        if hasattr(self, '_env') and self._env is not None:
+            self._env.close()
+class RTMDataset(Dataset):
+    """Real Text Manipulation dataset loader"""
+    def __init__(self, config, split: str = 'train'):
+        """
+        Initialize RTM dataset
+        Args:
+            config: Configuration object
+            split: 'train' or 'test'
+        """
+        self.config = config
+        self.split = split
+        self.dataset_name = 'rtm'
+        # Get dataset path
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        # Load split file
+        split_file = self.data_path / f'{split}.txt'
+        with open(split_file, 'r') as f:
+            self.image_ids = [line.strip() for line in f.readlines()]
+        self.images_dir = self.data_path / 'JPEGImages'
+        self.masks_dir = self.data_path / 'SegmentationClass'
+        print(f"RTM {split}: {len(self.image_ids)} images")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+    def __len__(self) -> int:
+        return len(self.image_ids)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, dict]:
+        """Get item from dataset"""
+        image_id = self.image_ids[idx]
+        # Load image
+        img_path = self.images_dir / f'{image_id}.jpg'
+        image = cv2.imread(str(img_path))
+        # Load mask
+        mask_path = self.masks_dir / f'{image_id}.png'
+        mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
+        # Binarize mask
+        mask = (mask > 0).astype(np.uint8)
+        # Preprocess
+        image, mask = self.preprocessor(image, mask)
+        # Augment
+        augmented = self.augmentation(image, mask)
+        image = augmented['image']
+        mask = augmented['mask']
+        # Metadata
+        metadata = {
+            'dataset': self.dataset_name,
+            'image_id': image_id,
+            'has_pixel_mask': True
+        }
+        return image, mask, metadata
+class CASIADataset(Dataset):
+    """
+    CASIA v1.0 dataset loader
+    Image-level labels only (no pixel masks)
+    Implements Critical Fix #6: CASIA image-level handling
+    """
+    def __init__(self, config, split: str = 'train'):
+        """
+        Initialize CASIA dataset
+        Args:
+            config: Configuration object
+            split: 'train' or 'test'
+        """
+        self.config = config
+        self.split = split
+        self.dataset_name = 'casia'
+        # Get dataset path
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        # Load authentic and tampered images
+        self.authentic_dir = self.data_path / 'Au'
+        self.tampered_dir = self.data_path / 'Tp'
+        # Get all image paths
+        authentic_images = list(self.authentic_dir.glob('*.jpg')) + \
+                          list(self.authentic_dir.glob('*.png'))
+        tampered_images = list(self.tampered_dir.glob('*.jpg')) + \
+                         list(self.tampered_dir.glob('*.png'))
+        # Create image list with labels
+        self.samples = []
+        for img_path in authentic_images:
+            self.samples.append((img_path, 0))  # 0 = authentic
+        for img_path in tampered_images:
+            self.samples.append((img_path, 1))  # 1 = tampered
+        # Critical Fix #7: Image-level split (80/20)
+        np.random.seed(42)
+        indices = np.random.permutation(len(self.samples))
+        split_idx = int(len(self.samples) * 0.8)
+        if split == 'train':
+            indices = indices[:split_idx]
+        else:
+            indices = indices[split_idx:]
+        self.samples = [self.samples[i] for i in indices]
+        print(f"CASIA {split}: {len(self.samples)} images")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+    def __len__(self) -> int:
+        return len(self.samples)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, dict]:
+        """Get item from dataset"""
+        img_path, label = self.samples[idx]
+        # Load image
+        image = cv2.imread(str(img_path))
+        # Critical Fix #6: Create image-level mask (entire image)
+        h, w = image.shape[:2]
+        mask = np.ones((h, w), dtype=np.uint8) * label
+        # Preprocess
+        image, mask = self.preprocessor(image, mask)
+        # Augment
+        augmented = self.augmentation(image, mask)
+        image = augmented['image']
+        mask = augmented['mask']
+        # Metadata
+        metadata = {
+            'dataset': self.dataset_name,
+            'image_path': str(img_path),
+            'has_pixel_mask': False,  # Image-level only
+            'label': label
+        }
+        return image, mask, metadata
+class ReceiptsDataset(Dataset):
+    """Find-It-Again receipts dataset loader"""
+    def __init__(self, config, split: str = 'train'):
+        """
+        Initialize receipts dataset
+        Args:
+            config: Configuration object
+            split: 'train', 'val', or 'test'
+        """
+        self.config = config
+        self.split = split
+        self.dataset_name = 'receipts'
+        # Get dataset path
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        # Load split file
+        split_file = self.data_path / f'{split}.json'
+        with open(split_file, 'r') as f:
+            self.annotations = json.load(f)
+        print(f"Receipts {split}: {len(self.annotations)} images")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+    def __len__(self) -> int:
+        return len(self.annotations)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor, dict]:
+        """Get item from dataset"""
+        ann = self.annotations[idx]
+        # Load image
+        img_path = self.data_path / ann['image_path']
+        image = cv2.imread(str(img_path))
+        # Create mask from bounding boxes
+        h, w = image.shape[:2]
+        mask = np.zeros((h, w), dtype=np.uint8)
+        for bbox in ann.get('bboxes', []):
+            x, y, w_box, h_box = bbox
+            mask[y:y+h_box, x:x+w_box] = 1
+        # Preprocess
+        image, mask = self.preprocessor(image, mask)
+        # Augment
+        augmented = self.augmentation(image, mask)
+        image = augmented['image']
+        mask = augmented['mask']
+        # Metadata
+        metadata = {
+            'dataset': self.dataset_name,
+            'image_path': str(img_path),
+            'has_pixel_mask': True
+        }
+        return image, mask, metadata
+class FCDDataset(DocTamperDataset):
+    """FCD (Forgery Classification Dataset) loader - inherits from DocTamperDataset"""
+    def __init__(self, config, split: str = 'train'):
+        self.config = config
+        self.split = split
+        self.dataset_name = 'fcd'
+        # Get dataset path from config
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        self.lmdb_path = str(self.data_path)
+        if not Path(self.lmdb_path).exists():
+            raise FileNotFoundError(f"LMDB folder not found: {self.lmdb_path}")
+        # Get total count
+        temp_env = lmdb.open(self.lmdb_path, readonly=True, lock=False)
+        with temp_env.begin() as txn:
+            stat = txn.stat()
+            self.length = stat['entries'] // 2  # Half are images, half are labels
+        temp_env.close()
+        self._env = None
+        # FCD is small, no chunking needed
+        self.chunk_start = 0
+        self.chunk_end = self.length
+        self.chunk_length = self.length
+        print(f"FCD {split}: {self.length} samples")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+class SCDDataset(DocTamperDataset):
+    """SCD (Splicing Classification Dataset) loader - inherits from DocTamperDataset"""
+    def __init__(self, config, split: str = 'train'):
+        self.config = config
+        self.split = split
+        self.dataset_name = 'scd'
+        # Get dataset path from config
+        dataset_config = config.get_dataset_config(self.dataset_name)
+        self.data_path = Path(dataset_config['path'])
+        self.lmdb_path = str(self.data_path)
+        if not Path(self.lmdb_path).exists():
+            raise FileNotFoundError(f"LMDB folder not found: {self.lmdb_path}")
+        # Get total count
+        temp_env = lmdb.open(self.lmdb_path, readonly=True, lock=False)
+        with temp_env.begin() as txn:
+            stat = txn.stat()
+            self.length = stat['entries'] // 2  # Half are images, half are labels
+        temp_env.close()
+        self._env = None
+        # SCD is medium-sized, no chunking needed
+        self.chunk_start = 0
+        self.chunk_end = self.length
+        self.chunk_length = self.length
+        print(f"SCD {split}: {self.length} samples")
+        # Initialize preprocessor and augmentation
+        self.preprocessor = DocumentPreprocessor(config, self.dataset_name)
+        self.augmentation = DatasetAwareAugmentation(
+            config,
+            self.dataset_name,
+            is_training=(split == 'train')
+        )
+def get_dataset(config, dataset_name: str, split: str = 'train', **kwargs) -> Dataset:
+    """
+    Factory function to get dataset
+    Args:
+        config: Configuration object
+        dataset_name: Dataset name
+        split: Data split
+        **kwargs: Additional arguments (e.g., chunk_start, chunk_end)
+    Returns:
+        Dataset instance
+    """
+    if dataset_name == 'doctamper':
+        return DocTamperDataset(config, split, **kwargs)
+    elif dataset_name == 'rtm':
+        return RTMDataset(config, split)
+    elif dataset_name == 'casia':
+        return CASIADataset(config, split)
+    elif dataset_name == 'receipts':
+        return ReceiptsDataset(config, split)
+    elif dataset_name == 'fcd':
+        return FCDDataset(config, split)
+    elif dataset_name == 'scd':
+        return SCDDataset(config, split)
+    else:
+        raise ValueError(f"Unknown dataset: {dataset_name}")

src/data/preprocessing.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""
+Dataset-aware preprocessing for document forgery detection
+Implements Critical Fix #1: Dataset-Aware Preprocessing
+"""
+import cv2
+import numpy as np
+from typing import Tuple, Optional
+import pywt
+from scipy import ndimage
+class DocumentPreprocessor:
+    """Dataset-aware document preprocessing"""
+    def __init__(self, config, dataset_name: str):
+        """
+        Initialize preprocessor
+        Args:
+            config: Configuration object
+            dataset_name: Name of dataset (for dataset-aware processing)
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        self.image_size = config.get('data.image_size', 384)
+        self.noise_threshold = config.get('preprocessing.noise_threshold', 15.0)
+        # Dataset-aware flags (Critical Fix #1)
+        self.skip_deskew = config.should_skip_deskew(dataset_name)
+        self.skip_denoising = config.should_skip_denoising(dataset_name)
+    def __call__(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Apply preprocessing pipeline
+        Args:
+            image: Input image (H, W, 3)
+            mask: Optional ground truth mask (H, W)
+        Returns:
+            Preprocessed image and mask
+        """
+        # 1. Convert to RGB
+        if len(image.shape) == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+        elif image.shape[2] == 4:
+            image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
+        elif image.shape[2] == 3:
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # 2. Deskew (dataset-aware)
+        if not self.skip_deskew:
+            image, mask = self._deskew(image, mask)
+        # 3. Resize
+        image, mask = self._resize(image, mask)
+        # 4. Normalize
+        image = self._normalize(image)
+        # 5. Conditional denoising (dataset-aware)
+        if not self.skip_denoising:
+            noise_level = self._estimate_noise(image)
+            if noise_level > self.noise_threshold:
+                image = self._denoise(image)
+        return image, mask
+    def _deskew(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Deskew document image
+        Args:
+            image: Input image
+            mask: Optional mask
+        Returns:
+            Deskewed image and mask
+        """
+        # Convert to grayscale for angle detection
+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        # Detect edges
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+        # Detect lines using Hough transform
+        lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
+        if lines is not None and len(lines) > 0:
+            # Calculate dominant angle
+            angles = []
+            for rho, theta in lines[:, 0]:
+                angle = (theta * 180 / np.pi) - 90
+                angles.append(angle)
+            # Use median angle
+            angle = np.median(angles)
+            # Only deskew if angle is significant (> 0.5 degrees)
+            if abs(angle) > 0.5:
+                # Get rotation matrix
+                h, w = image.shape[:2]
+                center = (w // 2, h // 2)
+                M = cv2.getRotationMatrix2D(center, angle, 1.0)
+                # Rotate image
+                image = cv2.warpAffine(image, M, (w, h),
+                                      flags=cv2.INTER_CUBIC,
+                                      borderMode=cv2.BORDER_REPLICATE)
+                # Rotate mask if provided
+                if mask is not None:
+                    mask = cv2.warpAffine(mask, M, (w, h),
+                                         flags=cv2.INTER_NEAREST,
+                                         borderMode=cv2.BORDER_CONSTANT,
+                                         borderValue=0)
+        return image, mask
+    def _resize(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Resize image and mask to target size
+        Args:
+            image: Input image
+            mask: Optional mask
+        Returns:
+            Resized image and mask
+        """
+        target_size = (self.image_size, self.image_size)
+        # Resize image
+        image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
+        # Resize mask if provided
+        if mask is not None:
+            mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
+        return image, mask
+    def _normalize(self, image: np.ndarray) -> np.ndarray:
+        """
+        Normalize pixel values to [0, 1]
+        Args:
+            image: Input image
+        Returns:
+            Normalized image
+        """
+        return image.astype(np.float32) / 255.0
+    def _estimate_noise(self, image: np.ndarray) -> float:
+        """
+        Estimate noise level using Laplacian variance and wavelet-based estimation
+        Args:
+            image: Input image (normalized)
+        Returns:
+            Estimated noise level
+        """
+        # Convert to grayscale for noise estimation
+        if len(image.shape) == 3:
+            gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+        else:
+            gray = (image * 255).astype(np.uint8)
+        # Method 1: Laplacian variance
+        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+        # Method 2: Wavelet-based noise estimation
+        coeffs = pywt.dwt2(gray, 'db1')
+        _, (cH, cV, cD) = coeffs
+        sigma = np.median(np.abs(cD)) / 0.6745
+        # Combine both estimates
+        noise_level = (laplacian_var + sigma) / 2.0
+        return noise_level
+    def _denoise(self, image: np.ndarray) -> np.ndarray:
+        """
+        Apply conditional denoising
+        Args:
+            image: Input image (normalized)
+        Returns:
+            Denoised image
+        """
+        # Convert to uint8 for filtering
+        image_uint8 = (image * 255).astype(np.uint8)
+        # Apply median filter (3x3)
+        median_filtered = cv2.medianBlur(image_uint8, 3)
+        # Apply Gaussian filter (σ ≤ 0.8)
+        gaussian_filtered = cv2.GaussianBlur(median_filtered, (3, 3), 0.8)
+        # Convert back to float32
+        denoised = gaussian_filtered.astype(np.float32) / 255.0
+        return denoised
+def preprocess_image(image: np.ndarray,
+                     mask: Optional[np.ndarray] = None,
+                     config = None,
+                     dataset_name: str = 'default') -> Tuple[np.ndarray, Optional[np.ndarray]]:
+    """
+    Convenience function for preprocessing
+    Args:
+        image: Input image
+        mask: Optional mask
+        config: Configuration object
+        dataset_name: Dataset name
+    Returns:
+        Preprocessed image and mask
+    """
+    preprocessor = DocumentPreprocessor(config, dataset_name)
+    return preprocessor(image, mask)

src/features/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""Features module"""
+from .feature_extraction import (
+    DeepFeatureExtractor,
+    StatisticalFeatureExtractor,
+    FrequencyFeatureExtractor,
+    NoiseELAFeatureExtractor,
+    OCRFeatureExtractor,
+    HybridFeatureExtractor,
+    get_feature_extractor
+)
+from .region_extraction import (
+    MaskRefiner,
+    RegionExtractor,
+    get_mask_refiner,
+    get_region_extractor
+)
+__all__ = [
+    'DeepFeatureExtractor',
+    'StatisticalFeatureExtractor',
+    'FrequencyFeatureExtractor',
+    'NoiseELAFeatureExtractor',
+    'OCRFeatureExtractor',
+    'HybridFeatureExtractor',
+    'get_feature_extractor',
+    'MaskRefiner',
+    'RegionExtractor',
+    'get_mask_refiner',
+    'get_region_extractor'
+]

src/features/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (691 Bytes). View file

src/features/__pycache__/feature_extraction.cpython-312.pyc ADDED Viewed

Binary file (22.6 kB). View file

src/features/__pycache__/region_extraction.cpython-312.pyc ADDED Viewed

Binary file (8.93 kB). View file

src/features/feature_extraction.py ADDED Viewed

	@@ -0,0 +1,485 @@

+"""
+Hybrid feature extraction for forgery detection
+Implements Critical Fix #5: Feature Group Gating
+"""
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+from typing import Dict, List, Optional, Tuple
+from scipy import ndimage
+from scipy.fftpack import dct
+import pywt
+from skimage.measure import regionprops, label
+from skimage.filters import sobel
+class DeepFeatureExtractor:
+    """Extract deep features from decoder feature maps"""
+    def __init__(self):
+        """Initialize deep feature extractor"""
+        pass
+    def extract(self,
+                decoder_features: List[torch.Tensor],
+                region_mask: np.ndarray) -> np.ndarray:
+        """
+        Extract deep features using Global Average Pooling
+        Args:
+            decoder_features: List of decoder feature tensors
+            region_mask: Binary region mask (H, W)
+        Returns:
+            Deep feature vector
+        """
+        features = []
+        for feat in decoder_features:
+            # Ensure on CPU and numpy
+            if isinstance(feat, torch.Tensor):
+                feat = feat.detach().cpu().numpy()
+            # feat shape: (B, C, H, W) or (C, H, W)
+            if feat.ndim == 4:
+                feat = feat[0]  # Take first batch
+            # Resize mask to feature size
+            h, w = feat.shape[1:]
+            mask_resized = cv2.resize(region_mask.astype(np.float32), (w, h))
+            mask_resized = mask_resized > 0.5
+            # Masked Global Average Pooling
+            if mask_resized.sum() > 0:
+                for c in range(feat.shape[0]):
+                    channel_feat = feat[c]
+                    masked_mean = channel_feat[mask_resized].mean()
+                    features.append(masked_mean)
+            else:
+                # Fallback: use global average
+                features.extend(feat.mean(axis=(1, 2)).tolist())
+        return np.array(features, dtype=np.float32)
+class StatisticalFeatureExtractor:
+    """Extract statistical and shape features from regions"""
+    def __init__(self):
+        """Initialize statistical feature extractor"""
+        pass
+    def extract(self,
+                image: np.ndarray,
+                region_mask: np.ndarray) -> np.ndarray:
+        """
+        Extract statistical and shape features
+        Args:
+            image: Input image (H, W, 3) normalized [0, 1]
+            region_mask: Binary region mask (H, W)
+        Returns:
+            Statistical feature vector
+        """
+        features = []
+        # Label the mask
+        labeled_mask = label(region_mask)
+        props = regionprops(labeled_mask)
+        if len(props) > 0:
+            prop = props[0]
+            # Area and perimeter
+            features.append(prop.area)
+            features.append(prop.perimeter)
+            # Aspect ratio
+            if prop.major_axis_length > 0:
+                aspect_ratio = prop.minor_axis_length / prop.major_axis_length
+            else:
+                aspect_ratio = 1.0
+            features.append(aspect_ratio)
+            # Solidity
+            features.append(prop.solidity)
+            # Eccentricity
+            features.append(prop.eccentricity)
+            # Entropy (using intensity)
+            if len(image.shape) == 3:
+                gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+            else:
+                gray = (image * 255).astype(np.uint8)
+            region_pixels = gray[region_mask > 0]
+            if len(region_pixels) > 0:
+                hist, _ = np.histogram(region_pixels, bins=256, range=(0, 256))
+                hist = hist / hist.sum() + 1e-8
+                entropy = -np.sum(hist * np.log2(hist + 1e-8))
+            else:
+                entropy = 0.0
+            features.append(entropy)
+        else:
+            # Default values
+            features.extend([0, 0, 1.0, 0, 0, 0])
+        return np.array(features, dtype=np.float32)
+class FrequencyFeatureExtractor:
+    """Extract frequency-domain features"""
+    def __init__(self):
+        """Initialize frequency feature extractor"""
+        pass
+    def extract(self,
+                image: np.ndarray,
+                region_mask: np.ndarray) -> np.ndarray:
+        """
+        Extract frequency-domain features (DCT, wavelet)
+        Args:
+            image: Input image (H, W, 3) normalized [0, 1]
+            region_mask: Binary region mask (H, W)
+        Returns:
+            Frequency feature vector
+        """
+        features = []
+        # Convert to grayscale
+        if len(image.shape) == 3:
+            gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+        else:
+            gray = (image * 255).astype(np.uint8)
+        # Get region bounding box
+        coords = np.where(region_mask > 0)
+        if len(coords[0]) == 0:
+            return np.zeros(13, dtype=np.float32)
+        y_min, y_max = coords[0].min(), coords[0].max()
+        x_min, x_max = coords[1].min(), coords[1].max()
+        # Crop region
+        region = gray[y_min:y_max+1, x_min:x_max+1].astype(np.float32)
+        if region.size == 0:
+            return np.zeros(13, dtype=np.float32)
+        # DCT coefficients
+        try:
+            dct_coeffs = dct(dct(region, axis=0, norm='ortho'), axis=1, norm='ortho')
+            # Mean and std of DCT coefficients
+            features.append(np.mean(np.abs(dct_coeffs)))
+            features.append(np.std(dct_coeffs))
+            # High-frequency energy (bottom-right quadrant)
+            h, w = dct_coeffs.shape
+            high_freq = dct_coeffs[h//2:, w//2:]
+            features.append(np.sum(np.abs(high_freq)) / (high_freq.size + 1e-8))
+        except Exception:
+            features.extend([0, 0, 0])
+        # Wavelet features
+        try:
+            coeffs = pywt.dwt2(region, 'db1')
+            cA, (cH, cV, cD) = coeffs
+            # Energy in each sub-band
+            features.append(np.sum(cA ** 2) / (cA.size + 1e-8))
+            features.append(np.sum(cH ** 2) / (cH.size + 1e-8))
+            features.append(np.sum(cV ** 2) / (cV.size + 1e-8))
+            features.append(np.sum(cD ** 2) / (cD.size + 1e-8))
+            # Wavelet entropy
+            for coeff in [cH, cV, cD]:
+                coeff_flat = np.abs(coeff.flatten())
+                if coeff_flat.sum() > 0:
+                    coeff_norm = coeff_flat / coeff_flat.sum()
+                    entropy = -np.sum(coeff_norm * np.log2(coeff_norm + 1e-8))
+                else:
+                    entropy = 0.0
+                features.append(entropy)
+        except Exception:
+            features.extend([0, 0, 0, 0, 0, 0, 0])
+        return np.array(features, dtype=np.float32)
+class NoiseELAFeatureExtractor:
+    """Extract noise and Error Level Analysis features"""
+    def __init__(self, quality: int = 90):
+        """
+        Initialize noise/ELA extractor
+        Args:
+            quality: JPEG quality for ELA
+        """
+        self.quality = quality
+    def extract(self,
+                image: np.ndarray,
+                region_mask: np.ndarray) -> np.ndarray:
+        """
+        Extract noise and ELA features
+        Args:
+            image: Input image (H, W, 3) normalized [0, 1]
+            region_mask: Binary region mask (H, W)
+        Returns:
+            Noise/ELA feature vector
+        """
+        features = []
+        # Convert to uint8
+        img_uint8 = (image * 255).astype(np.uint8)
+        # Error Level Analysis
+        # Compress and compute difference
+        encode_param = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
+        _, encoded = cv2.imencode('.jpg', img_uint8, encode_param)
+        recompressed = cv2.imdecode(encoded, cv2.IMREAD_COLOR)
+        ela = np.abs(img_uint8.astype(np.float32) - recompressed.astype(np.float32))
+        # ELA features within region
+        ela_region = ela[region_mask > 0]
+        if len(ela_region) > 0:
+            features.append(np.mean(ela_region))  # ELA mean
+            features.append(np.var(ela_region))   # ELA variance
+            features.append(np.max(ela_region))   # ELA max
+        else:
+            features.extend([0, 0, 0])
+        # Noise residual (using median filter)
+        if len(image.shape) == 3:
+            gray = cv2.cvtColor(img_uint8, cv2.COLOR_RGB2GRAY)
+        else:
+            gray = img_uint8
+        median_filtered = cv2.medianBlur(gray, 3)
+        noise_residual = np.abs(gray.astype(np.float32) - median_filtered.astype(np.float32))
+        residual_region = noise_residual[region_mask > 0]
+        if len(residual_region) > 0:
+            features.append(np.mean(residual_region))
+            features.append(np.var(residual_region))
+        else:
+            features.extend([0, 0])
+        return np.array(features, dtype=np.float32)
+class OCRFeatureExtractor:
+    """
+    Extract OCR-based consistency features
+    Only for text documents (Feature Gating - Critical Fix #5)
+    """
+    def __init__(self):
+        """Initialize OCR feature extractor"""
+        self.ocr_available = False
+        try:
+            import easyocr
+            self.reader = easyocr.Reader(['en'], gpu=True)
+            self.ocr_available = True
+        except Exception:
+            print("Warning: EasyOCR not available, OCR features disabled")
+    def extract(self,
+                image: np.ndarray,
+                region_mask: np.ndarray) -> np.ndarray:
+        """
+        Extract OCR consistency features
+        Args:
+            image: Input image (H, W, 3) normalized [0, 1]
+            region_mask: Binary region mask (H, W)
+        Returns:
+            OCR feature vector (or zeros if not text document)
+        """
+        features = []
+        if not self.ocr_available:
+            return np.zeros(6, dtype=np.float32)
+        # Convert to uint8
+        img_uint8 = (image * 255).astype(np.uint8)
+        # Get region bounding box
+        coords = np.where(region_mask > 0)
+        if len(coords[0]) == 0:
+            return np.zeros(6, dtype=np.float32)
+        y_min, y_max = coords[0].min(), coords[0].max()
+        x_min, x_max = coords[1].min(), coords[1].max()
+        # Crop region
+        region = img_uint8[y_min:y_max+1, x_min:x_max+1]
+        try:
+            # OCR on region
+            results = self.reader.readtext(region)
+            if len(results) > 0:
+                # Confidence deviation
+                confidences = [r[2] for r in results]
+                features.append(np.mean(confidences))
+                features.append(np.std(confidences))
+                # Character spacing analysis
+                bbox_widths = [abs(r[0][1][0] - r[0][0][0]) for r in results]
+                if len(bbox_widths) > 1:
+                    features.append(np.std(bbox_widths) / (np.mean(bbox_widths) + 1e-8))
+                else:
+                    features.append(0.0)
+                # Text density
+                features.append(len(results) / (region.shape[0] * region.shape[1] + 1e-8))
+                # Stroke width variation (using edge detection)
+                gray_region = cv2.cvtColor(region, cv2.COLOR_RGB2GRAY)
+                edges = sobel(gray_region)
+                features.append(np.mean(edges))
+                features.append(np.std(edges))
+            else:
+                features.extend([0, 0, 0, 0, 0, 0])
+        except Exception:
+            features.extend([0, 0, 0, 0, 0, 0])
+        return np.array(features, dtype=np.float32)
+class HybridFeatureExtractor:
+    """
+    Complete hybrid feature extraction
+    Implements Critical Fix #5: Feature Group Gating
+    """
+    def __init__(self, config, is_text_document: bool = True):
+        """
+        Initialize hybrid feature extractor
+        Args:
+            config: Configuration object
+            is_text_document: Whether input is text document (for OCR gating)
+        """
+        self.config = config
+        self.is_text_document = is_text_document
+        # Initialize extractors
+        self.deep_extractor = DeepFeatureExtractor()
+        self.stat_extractor = StatisticalFeatureExtractor()
+        self.freq_extractor = FrequencyFeatureExtractor()
+        self.noise_extractor = NoiseELAFeatureExtractor()
+        # Critical Fix #5: OCR only for text documents
+        if is_text_document and config.get('features.ocr.enabled', True):
+            self.ocr_extractor = OCRFeatureExtractor()
+        else:
+            self.ocr_extractor = None
+    def extract(self,
+                image: np.ndarray,
+                region_mask: np.ndarray,
+                decoder_features: Optional[List[torch.Tensor]] = None) -> np.ndarray:
+        """
+        Extract all hybrid features for a region
+        Args:
+            image: Input image (H, W, 3) normalized [0, 1]
+            region_mask: Binary region mask (H, W)
+            decoder_features: Optional decoder features for deep feature extraction
+        Returns:
+            Concatenated feature vector
+        """
+        all_features = []
+        # Deep features (if available)
+        if decoder_features is not None and self.config.get('features.deep.enabled', True):
+            deep_feats = self.deep_extractor.extract(decoder_features, region_mask)
+            all_features.append(deep_feats)
+        # Statistical & shape features
+        if self.config.get('features.statistical.enabled', True):
+            stat_feats = self.stat_extractor.extract(image, region_mask)
+            all_features.append(stat_feats)
+        # Frequency-domain features
+        if self.config.get('features.frequency.enabled', True):
+            freq_feats = self.freq_extractor.extract(image, region_mask)
+            all_features.append(freq_feats)
+        # Noise & ELA features
+        if self.config.get('features.noise.enabled', True):
+            noise_feats = self.noise_extractor.extract(image, region_mask)
+            all_features.append(noise_feats)
+        # Critical Fix #5: OCR features only for text documents
+        if self.ocr_extractor is not None:
+            ocr_feats = self.ocr_extractor.extract(image, region_mask)
+            all_features.append(ocr_feats)
+        # Concatenate all features
+        if len(all_features) > 0:
+            features = np.concatenate(all_features)
+        else:
+            features = np.array([], dtype=np.float32)
+        # Handle NaN/Inf
+        features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)
+        return features
+    def get_feature_names(self) -> List[str]:
+        """Get list of feature names for interpretability"""
+        names = []
+        if self.config.get('features.deep.enabled', True):
+            names.extend([f'deep_{i}' for i in range(256)])  # Approximate
+        if self.config.get('features.statistical.enabled', True):
+            names.extend(['area', 'perimeter', 'aspect_ratio',
+                         'solidity', 'eccentricity', 'entropy'])
+        if self.config.get('features.frequency.enabled', True):
+            names.extend(['dct_mean', 'dct_std', 'high_freq_energy',
+                         'wavelet_cA', 'wavelet_cH', 'wavelet_cV', 'wavelet_cD',
+                         'wavelet_entropy_H', 'wavelet_entropy_V', 'wavelet_entropy_D'])
+        if self.config.get('features.noise.enabled', True):
+            names.extend(['ela_mean', 'ela_var', 'ela_max',
+                         'noise_residual_mean', 'noise_residual_var'])
+        if self.ocr_extractor is not None:
+            names.extend(['ocr_conf_mean', 'ocr_conf_std', 'spacing_irregularity',
+                         'text_density', 'stroke_mean', 'stroke_std'])
+        return names
+def get_feature_extractor(config, is_text_document: bool = True) -> HybridFeatureExtractor:
+    """
+    Factory function to create feature extractor
+    Args:
+        config: Configuration object
+        is_text_document: Whether input is text document
+    Returns:
+        HybridFeatureExtractor instance
+    """
+    return HybridFeatureExtractor(config, is_text_document)

src/features/region_extraction.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""
+Mask refinement and region extraction
+Implements Critical Fix #3: Adaptive Mask Refinement Thresholds
+"""
+import cv2
+import numpy as np
+from typing import List, Tuple, Dict, Optional
+from scipy import ndimage
+from skimage.measure import label, regionprops
+class MaskRefiner:
+    """
+    Mask refinement with adaptive thresholds
+    Implements Critical Fix #3: Dataset-specific minimum region areas
+    """
+    def __init__(self, config, dataset_name: str = 'default'):
+        """
+        Initialize mask refiner
+        Args:
+            config: Configuration object
+            dataset_name: Dataset name for adaptive thresholds
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        # Get mask refinement parameters
+        self.threshold = config.get('mask_refinement.threshold', 0.5)
+        self.closing_kernel = config.get('mask_refinement.morphology.closing_kernel', 5)
+        self.opening_kernel = config.get('mask_refinement.morphology.opening_kernel', 3)
+        # Critical Fix #3: Adaptive thresholds per dataset
+        self.min_region_area = config.get_min_region_area(dataset_name)
+        print(f"MaskRefiner initialized for {dataset_name}")
+        print(f"Min region area: {self.min_region_area * 100:.2f}%")
+    def refine(self,
+               probability_map: np.ndarray,
+               original_size: Tuple[int, int] = None) -> np.ndarray:
+        """
+        Refine probability map to binary mask
+        Args:
+            probability_map: Forgery probability map (H, W), values [0, 1]
+            original_size: Optional (H, W) to resize mask back to original
+        Returns:
+            Refined binary mask (H, W)
+        """
+        # Threshold to binary
+        binary_mask = (probability_map > self.threshold).astype(np.uint8)
+        # Morphological closing (fill broken strokes)
+        closing_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self.closing_kernel, self.closing_kernel)
+        )
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, closing_kernel)
+        # Morphological opening (remove isolated noise)
+        opening_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self.opening_kernel, self.opening_kernel)
+        )
+        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, opening_kernel)
+        # Critical Fix #3: Remove small regions with adaptive threshold
+        binary_mask = self._remove_small_regions(binary_mask)
+        # Resize to original size if provided
+        if original_size is not None:
+            binary_mask = cv2.resize(
+                binary_mask,
+                (original_size[1], original_size[0]),  # cv2 uses (W, H)
+                interpolation=cv2.INTER_NEAREST
+            )
+        return binary_mask
+    def _remove_small_regions(self, mask: np.ndarray) -> np.ndarray:
+        """
+        Remove regions smaller than minimum area threshold
+        Args:
+            mask: Binary mask (H, W)
+        Returns:
+            Filtered mask
+        """
+        # Calculate minimum pixel count
+        image_area = mask.shape[0] * mask.shape[1]
+        min_pixels = int(image_area * self.min_region_area)
+        # Label connected components
+        labeled_mask, num_features = ndimage.label(mask)
+        # Keep only large enough regions
+        filtered_mask = np.zeros_like(mask)
+        for region_id in range(1, num_features + 1):
+            region_mask = (labeled_mask == region_id)
+            region_area = region_mask.sum()
+            if region_area >= min_pixels:
+                filtered_mask[region_mask] = 1
+        return filtered_mask
+class RegionExtractor:
+    """
+    Extract individual regions from binary mask
+    Implements Critical Fix #4: Region Confidence Aggregation
+    """
+    def __init__(self, config, dataset_name: str = 'default'):
+        """
+        Initialize region extractor
+        Args:
+            config: Configuration object
+            dataset_name: Dataset name
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        self.min_region_area = config.get_min_region_area(dataset_name)
+    def extract(self,
+                binary_mask: np.ndarray,
+                probability_map: np.ndarray,
+                original_image: np.ndarray) -> List[Dict]:
+        """
+        Extract regions from binary mask
+        Args:
+            binary_mask: Refined binary mask (H, W)
+            probability_map: Original probability map (H, W)
+            original_image: Original image (H, W, 3)
+        Returns:
+            List of region dictionaries with bounding box, mask, image, confidence
+        """
+        regions = []
+        # Connected component analysis (8-connectivity)
+        labeled_mask = label(binary_mask, connectivity=2)
+        props = regionprops(labeled_mask)
+        for region_id, prop in enumerate(props, start=1):
+            # Bounding box
+            y_min, x_min, y_max, x_max = prop.bbox
+            # Region mask
+            region_mask = (labeled_mask == region_id).astype(np.uint8)
+            # Cropped region image
+            region_image = original_image[y_min:y_max, x_min:x_max].copy()
+            region_mask_cropped = region_mask[y_min:y_max, x_min:x_max]
+            # Critical Fix #4: Region-level confidence aggregation
+            region_probs = probability_map[region_mask > 0]
+            region_confidence = float(np.mean(region_probs)) if len(region_probs) > 0 else 0.0
+            regions.append({
+                'region_id': region_id,
+                'bounding_box': [int(x_min), int(y_min),
+                               int(x_max - x_min), int(y_max - y_min)],
+                'area': prop.area,
+                'centroid': (int(prop.centroid[1]), int(prop.centroid[0])),
+                'region_mask': region_mask,
+                'region_mask_cropped': region_mask_cropped,
+                'region_image': region_image,
+                'confidence': region_confidence,
+                'mask_probability_mean': region_confidence
+            })
+        return regions
+    def extract_for_casia(self,
+                          binary_mask: np.ndarray,
+                          probability_map: np.ndarray,
+                          original_image: np.ndarray) -> List[Dict]:
+        """
+        Critical Fix #6: CASIA handling - treat entire image as one region
+        Args:
+            binary_mask: Binary mask (may be empty for authentic images)
+            probability_map: Probability map
+            original_image: Original image
+        Returns:
+            Single region representing entire image
+        """
+        h, w = original_image.shape[:2]
+        # Create single region covering entire image
+        region_mask = np.ones((h, w), dtype=np.uint8)
+        # Overall confidence from probability map
+        overall_confidence = float(np.mean(probability_map))
+        return [{
+            'region_id': 1,
+            'bounding_box': [0, 0, w, h],
+            'area': h * w,
+            'centroid': (w // 2, h // 2),
+            'region_mask': region_mask,
+            'region_mask_cropped': region_mask,
+            'region_image': original_image,
+            'confidence': overall_confidence,
+            'mask_probability_mean': overall_confidence
+        }]
+def get_mask_refiner(config, dataset_name: str = 'default') -> MaskRefiner:
+    """Factory function for mask refiner"""
+    return MaskRefiner(config, dataset_name)
+def get_region_extractor(config, dataset_name: str = 'default') -> RegionExtractor:
+    """Factory function for region extractor"""
+    return RegionExtractor(config, dataset_name)

src/inference/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Inference module"""
+from .pipeline import ForgeryDetectionPipeline, get_pipeline
+__all__ = ['ForgeryDetectionPipeline', 'get_pipeline']

src/inference/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (305 Bytes). View file

src/inference/__pycache__/pipeline.cpython-312.pyc ADDED Viewed

Binary file (14.5 kB). View file

src/inference/pipeline.py ADDED Viewed

	@@ -0,0 +1,359 @@

+"""
+Inference pipeline for document forgery detection
+Complete pipeline: Image → Localization → Regions → Classification → Output
+"""
+import cv2
+import numpy as np
+import torch
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
+import json
+from PIL import Image
+import fitz  # PyMuPDF
+from ..config import get_config
+from ..models import get_model
+from ..features import (
+    get_feature_extractor,
+    get_mask_refiner,
+    get_region_extractor
+)
+from ..training.classifier import get_classifier
+class ForgeryDetectionPipeline:
+    """
+    Complete inference pipeline for document forgery detection
+    Pipeline:
+    1. Input handling (PDF/Image)
+    2. Preprocessing
+    3. Deep localization
+    4. Mask refinement
+    5. Region extraction
+    6. Feature extraction
+    7. Classification
+    8. Post-processing
+    9. Output generation
+    """
+    def __init__(self,
+                 config,
+                 model_path: str,
+                 classifier_path: Optional[str] = None,
+                 is_text_document: bool = True):
+        """
+        Initialize pipeline
+        Args:
+            config: Configuration object
+            model_path: Path to localization model checkpoint
+            classifier_path: Path to classifier (optional)
+            is_text_document: Whether input is text document (for OCR features)
+        """
+        self.config = config
+        self.is_text_document = is_text_document
+        # Device
+        self.device = torch.device(
+            'cuda' if torch.cuda.is_available() and config.get('system.device') == 'cuda'
+            else 'cpu'
+        )
+        print(f"Inference device: {self.device}")
+        # Load localization model
+        self.model = get_model(config).to(self.device)
+        self._load_model(model_path)
+        self.model.eval()
+        # Initialize mask refiner
+        self.mask_refiner = get_mask_refiner(config, 'default')
+        # Initialize region extractor
+        self.region_extractor = get_region_extractor(config, 'default')
+        # Initialize feature extractor
+        self.feature_extractor = get_feature_extractor(config, is_text_document)
+        # Load classifier if provided
+        if classifier_path:
+            self.classifier = get_classifier(config)
+            self.classifier.load(classifier_path)
+        else:
+            self.classifier = None
+        # Confidence threshold
+        self.confidence_threshold = config.get('classifier.confidence_threshold', 0.6)
+        # Image size
+        self.image_size = config.get('data.image_size', 384)
+        print("Inference pipeline initialized")
+    def _load_model(self, model_path: str):
+        """Load model checkpoint"""
+        checkpoint = torch.load(model_path, map_location=self.device)
+        if 'model_state_dict' in checkpoint:
+            self.model.load_state_dict(checkpoint['model_state_dict'])
+        else:
+            self.model.load_state_dict(checkpoint)
+        print(f"Loaded model from {model_path}")
+    def _load_image(self, input_path: str) -> np.ndarray:
+        """
+        Load image from file or PDF
+        Args:
+            input_path: Path to image or PDF
+        Returns:
+            Image as numpy array (H, W, 3)
+        """
+        path = Path(input_path)
+        if path.suffix.lower() == '.pdf':
+            # Rasterize PDF at 300 DPI
+            doc = fitz.open(str(path))
+            page = doc[0]
+            mat = fitz.Matrix(300/72, 300/72)  # 300 DPI
+            pix = page.get_pixmap(matrix=mat)
+            image = np.frombuffer(pix.samples, dtype=np.uint8)
+            image = image.reshape(pix.height, pix.width, pix.n)
+            if pix.n == 4:
+                image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+            doc.close()
+        else:
+            # Load image
+            image = cv2.imread(str(path))
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        return image
+    def _preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Preprocess image for inference
+        Args:
+            image: Input image (H, W, 3)
+        Returns:
+            Preprocessed image and original image
+        """
+        original = image.copy()
+        # Resize
+        preprocessed = cv2.resize(image, (self.image_size, self.image_size))
+        # Normalize to [0, 1]
+        preprocessed = preprocessed.astype(np.float32) / 255.0
+        return preprocessed, original
+    def _to_tensor(self, image: np.ndarray) -> torch.Tensor:
+        """Convert image to tensor"""
+        # (H, W, C) -> (C, H, W)
+        tensor = torch.from_numpy(image.transpose(2, 0, 1))
+        tensor = tensor.unsqueeze(0)  # Add batch dimension
+        return tensor.to(self.device)
+    def run(self,
+            input_path: str,
+            output_dir: Optional[str] = None) -> Dict:
+        """
+        Run full inference pipeline
+        Args:
+            input_path: Path to input image or PDF
+            output_dir: Optional output directory
+        Returns:
+            Dictionary with results
+        """
+        print(f"\n{'='*60}")
+        print(f"Processing: {input_path}")
+        print(f"{'='*60}")
+        # 1. Load image
+        image = self._load_image(input_path)
+        original_size = image.shape[:2]
+        print(f"Input size: {original_size}")
+        # 2. Preprocess
+        preprocessed, original = self._preprocess(image)
+        tensor = self._to_tensor(preprocessed)
+        # 3. Deep localization
+        with torch.no_grad():
+            logits, decoder_features = self.model(tensor)
+            probability_map = torch.sigmoid(logits).cpu().numpy()[0, 0]
+        print(f"Localization complete. Max prob: {probability_map.max():.3f}")
+        # 4. Mask refinement
+        binary_mask = self.mask_refiner.refine(probability_map, original_size)
+        num_positive_pixels = binary_mask.sum()
+        print(f"Mask refinement: {num_positive_pixels} positive pixels")
+        # 5. Region extraction
+        # Resize probability map to original size for confidence aggregation
+        prob_resized = cv2.resize(probability_map, (original_size[1], original_size[0]))
+        regions = self.region_extractor.extract(binary_mask, prob_resized, original)
+        print(f"Regions extracted: {len(regions)}")
+        # 6. Feature extraction & 7. Classification
+        results = []
+        for region in regions:
+            # Extract features
+            features = self.feature_extractor.extract(
+                preprocessed,
+                cv2.resize(region['region_mask'], (self.image_size, self.image_size)),
+                [f.cpu() for f in decoder_features]
+            )
+            # Classify if classifier available
+            if self.classifier is not None:
+                predictions, confidences, valid_mask = self.classifier.predict_with_filtering(
+                    features.reshape(1, -1)
+                )
+                if valid_mask[0]:
+                    region['forgery_type'] = self.classifier.get_class_name(predictions[0])
+                    region['classification_confidence'] = float(confidences[0])
+                else:
+                    # Low confidence - discard
+                    continue
+            else:
+                region['forgery_type'] = 'unknown'
+                region['classification_confidence'] = region['confidence']
+            # Clean up non-serializable fields
+            region_result = {
+                'region_id': region['region_id'],
+                'bounding_box': region['bounding_box'],
+                'forgery_type': region['forgery_type'],
+                'confidence': region['confidence'],
+                'classification_confidence': region['classification_confidence'],
+                'mask_probability_mean': region['mask_probability_mean'],
+                'area': region['area']
+            }
+            results.append(region_result)
+        print(f"Valid regions after filtering: {len(results)}")
+        # 8. Post-processing - False positive removal
+        results = self._post_process(results)
+        # 9. Generate output
+        output = {
+            'input_path': str(input_path),
+            'original_size': original_size,
+            'num_regions': len(results),
+            'regions': results,
+            'is_tampered': len(results) > 0
+        }
+        # Save outputs if directory provided
+        if output_dir:
+            output_path = Path(output_dir)
+            output_path.mkdir(parents=True, exist_ok=True)
+            input_name = Path(input_path).stem
+            # Save final mask
+            mask_path = output_path / f'{input_name}_mask.png'
+            cv2.imwrite(str(mask_path), binary_mask * 255)
+            # Save overlay visualization
+            overlay = self._create_overlay(original, binary_mask, results)
+            overlay_path = output_path / f'{input_name}_overlay.png'
+            cv2.imwrite(str(overlay_path), cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
+            # Save JSON
+            json_path = output_path / f'{input_name}_results.json'
+            with open(json_path, 'w') as f:
+                json.dump(output, f, indent=2)
+            print(f"\nOutputs saved to: {output_path}")
+            output['mask_path'] = str(mask_path)
+            output['overlay_path'] = str(overlay_path)
+            output['json_path'] = str(json_path)
+        return output
+    def _post_process(self, regions: List[Dict]) -> List[Dict]:
+        """
+        Post-process regions to remove false positives
+        Args:
+            regions: List of region dictionaries
+        Returns:
+            Filtered regions
+        """
+        filtered = []
+        for region in regions:
+            # Confidence filtering
+            if region['confidence'] < self.confidence_threshold:
+                continue
+            filtered.append(region)
+        return filtered
+    def _create_overlay(self,
+                       image: np.ndarray,
+                       mask: np.ndarray,
+                       regions: List[Dict]) -> np.ndarray:
+        """
+        Create visualization overlay
+        Args:
+            image: Original image
+            mask: Binary mask
+            regions: Detected regions
+        Returns:
+            Overlay image
+        """
+        overlay = image.copy()
+        alpha = self.config.get('outputs.visualization.overlay_alpha', 0.5)
+        # Create colored mask
+        mask_colored = np.zeros_like(image)
+        mask_colored[mask > 0] = [255, 0, 0]  # Red for forgery
+        # Blend
+        mask_resized = cv2.resize(mask, (image.shape[1], image.shape[0]))
+        overlay = np.where(
+            mask_resized[:, :, None] > 0,
+            (1 - alpha) * image + alpha * mask_colored,
+            image
+        ).astype(np.uint8)
+        # Draw bounding boxes and labels
+        for region in regions:
+            x, y, w, h = region['bounding_box']
+            # Draw rectangle
+            cv2.rectangle(overlay, (x, y), (x + w, y + h), (0, 255, 0), 2)
+            # Draw label
+            label = f"{region['forgery_type']} ({region['confidence']:.2f})"
+            cv2.putText(overlay, label, (x, y - 10),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        return overlay
+def get_pipeline(config,
+                model_path: str,
+                classifier_path: Optional[str] = None,
+                is_text_document: bool = True) -> ForgeryDetectionPipeline:
+    """Factory function for pipeline"""
+    return ForgeryDetectionPipeline(config, model_path, classifier_path, is_text_document)

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Models module"""
+from .encoder import MobileNetV3Encoder, get_encoder
+from .decoder import UNetLiteDecoder, get_decoder
+from .network import ForgeryLocalizationNetwork, get_model
+from .losses import DiceLoss, CombinedLoss, DatasetAwareLoss, get_loss_function
+__all__ = [
+    'MobileNetV3Encoder',
+    'get_encoder',
+    'UNetLiteDecoder',
+    'get_decoder',
+    'ForgeryLocalizationNetwork',
+    'get_model',
+    'DiceLoss',
+    'CombinedLoss',
+    'DatasetAwareLoss',
+    'get_loss_function'
+]

src/models/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (600 Bytes). View file

src/models/__pycache__/decoder.cpython-312.pyc ADDED Viewed

Binary file (7.65 kB). View file

src/models/__pycache__/encoder.cpython-312.pyc ADDED Viewed

Binary file (2.91 kB). View file

src/models/__pycache__/losses.cpython-312.pyc ADDED Viewed

Binary file (6.55 kB). View file

src/models/__pycache__/network.cpython-312.pyc ADDED Viewed

Binary file (5.84 kB). View file

src/models/decoder.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+UNet-Lite Decoder for forgery localization
+Lightweight decoder with skip connections, depthwise separable convolutions
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import List
+class DepthwiseSeparableConv(nn.Module):
+    """Depthwise separable convolution for efficiency"""
+    def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 3):
+        super().__init__()
+        self.depthwise = nn.Conv2d(
+            in_channels, in_channels,
+            kernel_size=kernel_size,
+            padding=kernel_size // 2,
+            groups=in_channels,
+            bias=False
+        )
+        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+class DecoderBlock(nn.Module):
+    """Single decoder block with skip connection"""
+    def __init__(self, in_channels: int, skip_channels: int, out_channels: int):
+        """
+        Initialize decoder block
+        Args:
+            in_channels: Input channels from previous decoder stage
+            skip_channels: Channels from encoder skip connection
+            out_channels: Output channels
+        """
+        super().__init__()
+        # Combine upsampled features with skip connection
+        combined_channels = in_channels + skip_channels
+        self.conv1 = DepthwiseSeparableConv(combined_channels, out_channels)
+        self.conv2 = DepthwiseSeparableConv(out_channels, out_channels)
+    def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass
+        Args:
+            x: Input from previous decoder stage
+            skip: Skip connection from encoder
+        Returns:
+            Decoded features
+        """
+        # Bilinear upsampling
+        x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
+        # Concatenate with skip connection
+        x = torch.cat([x, skip], dim=1)
+        # Convolutions
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+class UNetLiteDecoder(nn.Module):
+    """
+    UNet-Lite decoder for forgery localization
+    Features:
+    - Skip connections from encoder stages
+    - Bilinear upsampling
+    - Depthwise separable convolutions for efficiency
+    """
+    def __init__(self,
+                 encoder_channels: List[int],
+                 decoder_channels: List[int] = None,
+                 output_channels: int = 1):
+        """
+        Initialize decoder
+        Args:
+            encoder_channels: List of encoder feature channels [stage0, ..., stageN]
+            decoder_channels: List of decoder output channels
+            output_channels: Number of output channels (1 for binary mask)
+        """
+        super().__init__()
+        # Default decoder channels if not provided
+        if decoder_channels is None:
+            decoder_channels = [256, 128, 64, 32, 16]
+        # Reverse encoder channels for decoder (bottom to top)
+        encoder_channels = encoder_channels[::-1]
+        # Initial convolution from deepest encoder features
+        self.initial_conv = DepthwiseSeparableConv(encoder_channels[0], decoder_channels[0])
+        # Decoder blocks
+        self.decoder_blocks = nn.ModuleList()
+        for i in range(len(encoder_channels) - 1):
+            in_ch = decoder_channels[i]
+            skip_ch = encoder_channels[i + 1]
+            out_ch = decoder_channels[i + 1] if i + 1 < len(decoder_channels) else decoder_channels[-1]
+            self.decoder_blocks.append(
+                DecoderBlock(in_ch, skip_ch, out_ch)
+            )
+        # Final upsampling to original resolution
+        self.final_upsample = nn.Sequential(
+            DepthwiseSeparableConv(decoder_channels[-1], decoder_channels[-1]),
+            nn.Conv2d(decoder_channels[-1], output_channels, kernel_size=1)
+        )
+        # Store decoder feature channels for feature extraction
+        self.decoder_channels = decoder_channels
+        print(f"UNet-Lite decoder initialized")
+        print(f"Encoder channels: {encoder_channels[::-1]}")
+        print(f"Decoder channels: {decoder_channels}")
+    def forward(self, encoder_features: List[torch.Tensor]) -> tuple:
+        """
+        Forward pass
+        Args:
+            encoder_features: List of encoder features [stage0, ..., stageN]
+        Returns:
+            output: Forgery probability map (B, 1, H, W)
+            decoder_features: List of decoder features for hybrid extraction
+        """
+        # Reverse for bottom-up decoding
+        features = encoder_features[::-1]
+        # Initial convolution
+        x = self.initial_conv(features[0])
+        # Store decoder features for hybrid feature extraction
+        decoder_features = [x]
+        # Decoder blocks with skip connections
+        for i, block in enumerate(self.decoder_blocks):
+            x = block(x, features[i + 1])
+            decoder_features.append(x)
+        # Final upsampling to original resolution
+        # Assume input was 384x384, final feature map should match
+        target_size = encoder_features[0].shape[2] * 2  # First encoder feature is at 1/2 scale
+        x = F.interpolate(x, size=(target_size, target_size), mode='bilinear', align_corners=False)
+        output = self.final_upsample[1](self.final_upsample[0](x))
+        return output, decoder_features
+def get_decoder(encoder_channels: List[int], config) -> UNetLiteDecoder:
+    """
+    Factory function to create decoder
+    Args:
+        encoder_channels: Encoder feature channels
+        config: Configuration object
+    Returns:
+        Decoder instance
+    """
+    output_channels = config.get('model.output_channels', 1)
+    return UNetLiteDecoder(encoder_channels, output_channels=output_channels)

src/models/encoder.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+MobileNetV3-Small Encoder for forgery localization
+ImageNet pretrained, feature extraction mode
+"""
+import torch
+import torch.nn as nn
+import timm
+from typing import List
+class MobileNetV3Encoder(nn.Module):
+    """
+    MobileNetV3-Small encoder for document forgery detection
+    Chosen for:
+    - Stroke-level and texture preservation
+    - Robustness to compression and blur
+    - Edge and CPU deployment efficiency
+    """
+    def __init__(self, pretrained: bool = True):
+        """
+        Initialize encoder
+        Args:
+            pretrained: Whether to use ImageNet pretrained weights
+        """
+        super().__init__()
+        # Load MobileNetV3-Small with feature extraction
+        self.backbone = timm.create_model(
+            'mobilenetv3_small_100',
+            pretrained=pretrained,
+            features_only=True,
+            out_indices=(0, 1, 2, 3, 4)  # All feature stages
+        )
+        # Get feature channel dimensions
+        # MobileNetV3-Small: [16, 16, 24, 48, 576]
+        self.feature_channels = self.backbone.feature_info.channels()
+        print(f"MobileNetV3-Small encoder initialized")
+        print(f"Feature channels: {self.feature_channels}")
+    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+        """
+        Extract multi-scale features
+        Args:
+            x: Input tensor (B, 3, H, W)
+        Returns:
+            List of feature tensors at different scales
+        """
+        features = self.backbone(x)
+        return features
+    def get_feature_channels(self) -> List[int]:
+        """Get feature channel dimensions for each stage"""
+        return self.feature_channels
+def get_encoder(config) -> MobileNetV3Encoder:
+    """
+    Factory function to create encoder
+    Args:
+        config: Configuration object
+    Returns:
+        Encoder instance
+    """
+    pretrained = config.get('model.encoder.pretrained', True)
+    return MobileNetV3Encoder(pretrained=pretrained)

src/models/losses.py ADDED Viewed

	@@ -0,0 +1,168 @@

+"""
+Dataset-aware loss functions
+Implements Critical Fix #2: Dataset-Aware Loss Function
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Dict, Optional
+class DiceLoss(nn.Module):
+    """Dice loss for segmentation"""
+    def __init__(self, smooth: float = 1.0):
+        """
+        Initialize Dice loss
+        Args:
+            smooth: Smoothing factor to avoid division by zero
+        """
+        super().__init__()
+        self.smooth = smooth
+    def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+        """
+        Compute Dice loss
+        Args:
+            pred: Predicted probabilities (B, 1, H, W)
+            target: Ground truth mask (B, 1, H, W)
+        Returns:
+            Dice loss value
+        """
+        pred = torch.sigmoid(pred)
+        # Flatten
+        pred_flat = pred.view(-1)
+        target_flat = target.view(-1)
+        # Dice coefficient
+        intersection = (pred_flat * target_flat).sum()
+        dice = (2. * intersection + self.smooth) / (
+            pred_flat.sum() + target_flat.sum() + self.smooth
+        )
+        return 1 - dice
+class CombinedLoss(nn.Module):
+    """
+    Combined BCE + Dice loss for segmentation
+    Dataset-aware: Only uses Dice when pixel masks are available
+    """
+    def __init__(self,
+                 bce_weight: float = 1.0,
+                 dice_weight: float = 1.0):
+        """
+        Initialize combined loss
+        Args:
+            bce_weight: Weight for BCE loss
+            dice_weight: Weight for Dice loss
+        """
+        super().__init__()
+        self.bce_weight = bce_weight
+        self.dice_weight = dice_weight
+        self.bce_loss = nn.BCEWithLogitsLoss()
+        self.dice_loss = DiceLoss()
+    def forward(self,
+                pred: torch.Tensor,
+                target: torch.Tensor,
+                has_pixel_mask: bool = True) -> Dict[str, torch.Tensor]:
+        """
+        Compute loss (dataset-aware)
+        Critical Fix #2: Only use Dice loss for datasets with pixel masks
+        Args:
+            pred: Predicted logits (B, 1, H, W)
+            target: Ground truth mask (B, 1, H, W)
+            has_pixel_mask: Whether dataset has pixel-level masks
+        Returns:
+            Dictionary with 'total', 'bce', and optionally 'dice' losses
+        """
+        # BCE loss (always used)
+        bce = self.bce_loss(pred, target)
+        losses = {
+            'bce': bce
+        }
+        if has_pixel_mask:
+            # Use Dice loss only for datasets with pixel masks
+            dice = self.dice_loss(pred, target)
+            losses['dice'] = dice
+            losses['total'] = self.bce_weight * bce + self.dice_weight * dice
+        else:
+            # Critical Fix #2: CASIA only uses BCE
+            losses['total'] = self.bce_weight * bce
+        return losses
+class DatasetAwareLoss(nn.Module):
+    """
+    Dataset-aware loss function wrapper
+    Automatically determines appropriate loss based on dataset metadata
+    """
+    def __init__(self, config):
+        """
+        Initialize dataset-aware loss
+        Args:
+            config: Configuration object
+        """
+        super().__init__()
+        self.config = config
+        bce_weight = config.get('loss.bce_weight', 1.0)
+        dice_weight = config.get('loss.dice_weight', 1.0)
+        self.combined_loss = CombinedLoss(
+            bce_weight=bce_weight,
+            dice_weight=dice_weight
+        )
+    def forward(self,
+                pred: torch.Tensor,
+                target: torch.Tensor,
+                metadata: Dict) -> Dict[str, torch.Tensor]:
+        """
+        Compute loss with dataset awareness
+        Args:
+            pred: Predicted logits (B, 1, H, W)
+            target: Ground truth mask (B, 1, H, W)
+            metadata: Batch metadata containing 'has_pixel_mask' flags
+        Returns:
+            Dictionary with loss components
+        """
+        # Check if batch has pixel masks
+        has_pixel_mask = all(m.get('has_pixel_mask', True) for m in metadata) \
+                        if isinstance(metadata, list) else metadata.get('has_pixel_mask', True)
+        return self.combined_loss(pred, target, has_pixel_mask)
+def get_loss_function(config) -> DatasetAwareLoss:
+    """
+    Factory function to create loss
+    Args:
+        config: Configuration object
+    Returns:
+        Loss function instance
+    """
+    return DatasetAwareLoss(config)

src/models/network.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""
+Complete Forgery Localization Network
+MobileNetV3-Small Encoder + UNet-Lite Decoder
+"""
+import torch
+import torch.nn as nn
+from typing import Tuple, List, Optional
+from .encoder import MobileNetV3Encoder
+from .decoder import UNetLiteDecoder
+class ForgeryLocalizationNetwork(nn.Module):
+    """
+    Complete network for forgery localization
+    Architecture:
+    - Encoder: MobileNetV3-Small (ImageNet pretrained)
+    - Decoder: UNet-Lite with skip connections
+    - Output: Single-channel forgery probability map
+    """
+    def __init__(self, config):
+        """
+        Initialize network
+        Args:
+            config: Configuration object
+        """
+        super().__init__()
+        self.config = config
+        # Initialize encoder
+        pretrained = config.get('model.encoder.pretrained', True)
+        self.encoder = MobileNetV3Encoder(pretrained=pretrained)
+        # Initialize decoder
+        encoder_channels = self.encoder.get_feature_channels()
+        output_channels = config.get('model.output_channels', 1)
+        self.decoder = UNetLiteDecoder(
+            encoder_channels=encoder_channels,
+            output_channels=output_channels
+        )
+        print(f"ForgeryLocalizationNetwork initialized")
+        print(f"Total parameters: {self.count_parameters():,}")
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        """
+        Forward pass
+        Args:
+            x: Input image tensor (B, 3, H, W)
+        Returns:
+            output: Forgery probability map (B, 1, H, W) - logits
+            decoder_features: Decoder features for hybrid feature extraction
+        """
+        # Encode
+        encoder_features = self.encoder(x)
+        # Decode
+        output, decoder_features = self.decoder(encoder_features)
+        return output, decoder_features
+    def predict(self, x: torch.Tensor, threshold: float = 0.5) -> torch.Tensor:
+        """
+        Predict binary mask
+        Args:
+            x: Input image tensor (B, 3, H, W)
+            threshold: Probability threshold for binarization
+        Returns:
+            Binary mask (B, 1, H, W)
+        """
+        with torch.no_grad():
+            logits, _ = self.forward(x)
+            probs = torch.sigmoid(logits)
+            mask = (probs > threshold).float()
+        return mask
+    def get_probability_map(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Get probability map
+        Args:
+            x: Input image tensor (B, 3, H, W)
+        Returns:
+            Probability map (B, 1, H, W)
+        """
+        with torch.no_grad():
+            logits, _ = self.forward(x)
+            probs = torch.sigmoid(logits)
+        return probs
+    def count_parameters(self) -> int:
+        """Count total trainable parameters"""
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+    def get_decoder_features(self, x: torch.Tensor) -> List[torch.Tensor]:
+        """
+        Get decoder features for hybrid feature extraction
+        Args:
+            x: Input image tensor (B, 3, H, W)
+        Returns:
+            List of decoder features
+        """
+        with torch.no_grad():
+            _, decoder_features = self.forward(x)
+        return decoder_features
+def get_model(config) -> ForgeryLocalizationNetwork:
+    """
+    Factory function to create model
+    Args:
+        config: Configuration object
+    Returns:
+        Model instance
+    """
+    return ForgeryLocalizationNetwork(config)

src/training/__init__.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""Training module"""
+from .metrics import (
+    SegmentationMetrics,
+    ClassificationMetrics,
+    MetricsTracker,
+    EarlyStopping,
+    get_metrics_tracker
+)
+from .trainer import Trainer, get_trainer
+from .classifier import ForgeryClassifier, get_classifier
+__all__ = [
+    'SegmentationMetrics',
+    'ClassificationMetrics',
+    'MetricsTracker',
+    'EarlyStopping',
+    'get_metrics_tracker',
+    'Trainer',
+    'get_trainer',
+    'ForgeryClassifier',
+    'get_classifier'
+]

src/training/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (568 Bytes). View file

src/training/__pycache__/classifier.cpython-312.pyc ADDED Viewed

Binary file (11 kB). View file

src/training/__pycache__/metrics.cpython-312.pyc ADDED Viewed

Binary file (12.5 kB). View file

src/training/__pycache__/trainer.cpython-312.pyc ADDED Viewed

Binary file (18.8 kB). View file

src/training/classifier.py ADDED Viewed

	@@ -0,0 +1,282 @@

+"""
+LightGBM classifier for forgery type classification
+Implements Critical Fix #8: Configurable Confidence Threshold
+"""
+import numpy as np
+import lightgbm as lgb
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+from typing import Dict, List, Tuple, Optional
+import joblib
+from pathlib import Path
+import json
+class ForgeryClassifier:
+    """
+    LightGBM classifier for region-wise forgery classification
+    Target classes:
+    - 0: copy_move
+    - 1: splicing
+    - 2: text_substitution
+    """
+    CLASS_NAMES = ['copy_move', 'splicing', 'text_substitution']
+    def __init__(self, config):
+        """
+        Initialize classifier
+        Args:
+            config: Configuration object
+        """
+        self.config = config
+        # LightGBM parameters
+        self.params = config.get('classifier.params', {
+            'objective': 'multiclass',
+            'num_class': 3,
+            'boosting_type': 'gbdt',
+            'num_leaves': 31,
+            'learning_rate': 0.05,
+            'n_estimators': 200,
+            'max_depth': 7,
+            'min_child_samples': 20,
+            'subsample': 0.8,
+            'colsample_bytree': 0.8,
+            'reg_alpha': 0.1,
+            'reg_lambda': 0.1,
+            'random_state': 42,
+            'verbose': -1
+        })
+        # Critical Fix #8: Configurable confidence threshold
+        self.confidence_threshold = config.get('classifier.confidence_threshold', 0.6)
+        # Initialize model and scaler
+        self.model = None
+        self.scaler = StandardScaler()
+        # Feature importance
+        self.feature_importance = None
+        self.feature_names = None
+    def train(self,
+              features: np.ndarray,
+              labels: np.ndarray,
+              feature_names: Optional[List[str]] = None,
+              validation_split: float = 0.2) -> Dict:
+        """
+        Train classifier
+        Args:
+            features: Feature matrix (N, D)
+            labels: Class labels (N,)
+            feature_names: Optional feature names
+            validation_split: Validation split ratio
+        Returns:
+            Training metrics
+        """
+        print(f"Training LightGBM classifier")
+        print(f"Features shape: {features.shape}")
+        print(f"Labels distribution: {np.bincount(labels)}")
+        # Handle NaN/Inf
+        features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)
+        # Normalize features
+        features_scaled = self.scaler.fit_transform(features)
+        # Split data (Critical Fix #7: Image-level splitting should be done upstream)
+        X_train, X_val, y_train, y_val = train_test_split(
+            features_scaled, labels,
+            test_size=validation_split,
+            random_state=42,
+            stratify=labels
+        )
+        # Create LightGBM datasets
+        train_data = lgb.Dataset(X_train, label=y_train)
+        val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)
+        # Train model
+        self.model = lgb.train(
+            self.params,
+            train_data,
+            valid_sets=[train_data, val_data],
+            valid_names=['train', 'val'],
+            num_boost_round=self.params.get('n_estimators', 200),
+            callbacks=[
+                lgb.early_stopping(stopping_rounds=20),
+                lgb.log_evaluation(period=10)
+            ]
+        )
+        # Store feature importance
+        self.feature_names = feature_names
+        self.feature_importance = self.model.feature_importance(importance_type='gain')
+        # Evaluate
+        train_pred = self.model.predict(X_train)
+        train_acc = (train_pred.argmax(axis=1) == y_train).mean()
+        val_pred = self.model.predict(X_val)
+        val_acc = (val_pred.argmax(axis=1) == y_val).mean()
+        metrics = {
+            'train_accuracy': train_acc,
+            'val_accuracy': val_acc,
+            'num_features': features.shape[1],
+            'num_samples': len(labels),
+            'best_iteration': self.model.best_iteration
+        }
+        print(f"Training complete!")
+        print(f"Train accuracy: {train_acc:.4f}")
+        print(f"Val accuracy: {val_acc:.4f}")
+        return metrics
+    def predict(self, features: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Predict forgery types
+        Args:
+            features: Feature matrix (N, D)
+        Returns:
+            predictions: Predicted class indices (N,)
+            confidences: Prediction confidences (N,)
+        """
+        if self.model is None:
+            raise ValueError("Model not trained. Call train() first.")
+        # Handle NaN/Inf
+        features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)
+        # Normalize features
+        features_scaled = self.scaler.transform(features)
+        # Predict probabilities
+        probabilities = self.model.predict(features_scaled)
+        # Get predictions and confidences
+        predictions = probabilities.argmax(axis=1)
+        confidences = probabilities.max(axis=1)
+        return predictions, confidences
+    def predict_with_filtering(self,
+                               features: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Predict with confidence filtering
+        Args:
+            features: Feature matrix (N, D)
+        Returns:
+            predictions: Predicted class indices (N,)
+            confidences: Prediction confidences (N,)
+            valid_mask: Boolean mask for valid predictions (N,)
+        """
+        predictions, confidences = self.predict(features)
+        # Critical Fix #8: Apply confidence threshold
+        valid_mask = confidences >= self.confidence_threshold
+        return predictions, confidences, valid_mask
+    def get_class_name(self, class_idx: int) -> str:
+        """Get class name from index"""
+        return self.CLASS_NAMES[class_idx]
+    def get_feature_importance(self, top_k: int = 20) -> List[Tuple[str, float]]:
+        """
+        Get top-k most important features
+        Args:
+            top_k: Number of features to return
+        Returns:
+            List of (feature_name, importance) tuples
+        """
+        if self.feature_importance is None:
+            return []
+        # Sort by importance
+        indices = np.argsort(self.feature_importance)[::-1][:top_k]
+        result = []
+        for idx in indices:
+            name = self.feature_names[idx] if self.feature_names else f'feature_{idx}'
+            importance = self.feature_importance[idx]
+            result.append((name, importance))
+        return result
+    def save(self, save_dir: str):
+        """
+        Save model and scaler
+        Args:
+            save_dir: Directory to save model
+        """
+        save_path = Path(save_dir)
+        save_path.mkdir(parents=True, exist_ok=True)
+        # Save LightGBM model
+        model_path = save_path / 'lightgbm_model.txt'
+        self.model.save_model(str(model_path))
+        # Save scaler
+        scaler_path = save_path / 'scaler.joblib'
+        joblib.dump(self.scaler, str(scaler_path))
+        # Save metadata
+        metadata = {
+            'confidence_threshold': self.confidence_threshold,
+            'class_names': self.CLASS_NAMES,
+            'feature_names': self.feature_names,
+            'feature_importance': self.feature_importance.tolist() if self.feature_importance is not None else None
+        }
+        metadata_path = save_path / 'classifier_metadata.json'
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+        print(f"Classifier saved to {save_path}")
+    def load(self, load_dir: str):
+        """
+        Load model and scaler
+        Args:
+            load_dir: Directory to load from
+        """
+        load_path = Path(load_dir)
+        # Load LightGBM model
+        model_path = load_path / 'lightgbm_model.txt'
+        self.model = lgb.Booster(model_file=str(model_path))
+        # Load scaler
+        scaler_path = load_path / 'scaler.joblib'
+        self.scaler = joblib.load(str(scaler_path))
+        # Load metadata
+        metadata_path = load_path / 'classifier_metadata.json'
+        with open(metadata_path, 'r') as f:
+            metadata = json.load(f)
+        self.confidence_threshold = metadata.get('confidence_threshold', 0.6)
+        self.feature_names = metadata.get('feature_names')
+        self.feature_importance = np.array(metadata.get('feature_importance', []))
+        print(f"Classifier loaded from {load_path}")
+def get_classifier(config) -> ForgeryClassifier:
+    """Factory function for classifier"""
+    return ForgeryClassifier(config)

src/training/metrics.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+Training utilities and metrics
+Implements Critical Fix #9: Dataset-Aware Metric Computation
+"""
+import torch
+import numpy as np
+from typing import Dict, List, Optional
+from sklearn.metrics import (
+    accuracy_score, f1_score, precision_score, recall_score,
+    confusion_matrix
+)
+class SegmentationMetrics:
+    """
+    Segmentation metrics (IoU, Dice)
+    Only computed for datasets with pixel masks (Critical Fix #9)
+    """
+    def __init__(self):
+        """Initialize metrics"""
+        self.reset()
+    def reset(self):
+        """Reset all metrics"""
+        self.intersection = 0
+        self.union = 0
+        self.pred_sum = 0
+        self.target_sum = 0
+        self.total_samples = 0
+    def update(self,
+               pred: torch.Tensor,
+               target: torch.Tensor,
+               has_pixel_mask: bool = True):
+        """
+        Update metrics with batch
+        Args:
+            pred: Predicted probabilities (B, 1, H, W)
+            target: Ground truth masks (B, 1, H, W)
+            has_pixel_mask: Whether to compute metrics (Critical Fix #9)
+        """
+        if not has_pixel_mask:
+            return
+        # Binarize predictions
+        pred_binary = (pred > 0.5).float()
+        # Compute intersection and union
+        intersection = (pred_binary * target).sum().item()
+        union = pred_binary.sum().item() + target.sum().item() - intersection
+        self.intersection += intersection
+        self.union += union
+        self.pred_sum += pred_binary.sum().item()
+        self.target_sum += target.sum().item()
+        self.total_samples += pred.shape[0]
+    def compute(self) -> Dict[str, float]:
+        """
+        Compute final metrics
+        Returns:
+            Dictionary with IoU, Dice, Precision, Recall
+        """
+        # IoU (Jaccard)
+        iou = self.intersection / (self.union + 1e-8)
+        # Dice (F1)
+        dice = (2 * self.intersection) / (self.pred_sum + self.target_sum + 1e-8)
+        # Precision
+        precision = self.intersection / (self.pred_sum + 1e-8)
+        # Recall
+        recall = self.intersection / (self.target_sum + 1e-8)
+        return {
+            'iou': iou,
+            'dice': dice,
+            'precision': precision,
+            'recall': recall
+        }
+class ClassificationMetrics:
+    """Classification metrics for forgery type classification"""
+    def __init__(self, num_classes: int = 3):
+        """
+        Initialize metrics
+        Args:
+            num_classes: Number of forgery types
+        """
+        self.num_classes = num_classes
+        self.reset()
+    def reset(self):
+        """Reset all metrics"""
+        self.predictions = []
+        self.targets = []
+        self.confidences = []
+    def update(self,
+               pred: np.ndarray,
+               target: np.ndarray,
+               confidence: Optional[np.ndarray] = None):
+        """
+        Update metrics with predictions
+        Args:
+            pred: Predicted class indices
+            target: Ground truth class indices
+            confidence: Optional prediction confidences
+        """
+        self.predictions.extend(pred.tolist())
+        self.targets.extend(target.tolist())
+        if confidence is not None:
+            self.confidences.extend(confidence.tolist())
+    def compute(self) -> Dict[str, float]:
+        """
+        Compute final metrics
+        Returns:
+            Dictionary with Accuracy, F1, Precision, Recall
+        """
+        if len(self.predictions) == 0:
+            return {
+                'accuracy': 0.0,
+                'f1_macro': 0.0,
+                'f1_weighted': 0.0,
+                'precision': 0.0,
+                'recall': 0.0
+            }
+        preds = np.array(self.predictions)
+        targets = np.array(self.targets)
+        # Accuracy
+        accuracy = accuracy_score(targets, preds)
+        # F1 score (macro and weighted)
+        f1_macro = f1_score(targets, preds, average='macro', zero_division=0)
+        f1_weighted = f1_score(targets, preds, average='weighted', zero_division=0)
+        # Precision and Recall
+        precision = precision_score(targets, preds, average='macro', zero_division=0)
+        recall = recall_score(targets, preds, average='macro', zero_division=0)
+        # Confusion matrix
+        cm = confusion_matrix(targets, preds, labels=range(self.num_classes))
+        return {
+            'accuracy': accuracy,
+            'f1_macro': f1_macro,
+            'f1_weighted': f1_weighted,
+            'precision': precision,
+            'recall': recall,
+            'confusion_matrix': cm.tolist()
+        }
+class MetricsTracker:
+    """Track all metrics during training"""
+    def __init__(self, config):
+        """
+        Initialize metrics tracker
+        Args:
+            config: Configuration object
+        """
+        self.config = config
+        self.num_classes = config.get('data.num_classes', 3)
+        self.seg_metrics = SegmentationMetrics()
+        self.cls_metrics = ClassificationMetrics(self.num_classes)
+        self.history = {
+            'train_loss': [],
+            'val_loss': [],
+            'train_iou': [],
+            'val_iou': [],
+            'train_dice': [],
+            'val_dice': [],
+            'train_precision': [],
+            'val_precision': [],
+            'train_recall': [],
+            'val_recall': []
+        }
+    def reset(self):
+        """Reset metrics for new epoch"""
+        self.seg_metrics.reset()
+        self.cls_metrics.reset()
+    def update_segmentation(self,
+                           pred: torch.Tensor,
+                           target: torch.Tensor,
+                           dataset_name: str):
+        """Update segmentation metrics (dataset-aware)"""
+        has_pixel_mask = self.config.should_compute_localization_metrics(dataset_name)
+        self.seg_metrics.update(pred, target, has_pixel_mask)
+    def update_classification(self,
+                             pred: np.ndarray,
+                             target: np.ndarray,
+                             confidence: Optional[np.ndarray] = None):
+        """Update classification metrics"""
+        self.cls_metrics.update(pred, target, confidence)
+    def compute_all(self) -> Dict[str, float]:
+        """Compute all metrics"""
+        seg = self.seg_metrics.compute()
+        # Only include classification metrics if they have data
+        if len(self.cls_metrics.predictions) > 0:
+            cls = self.cls_metrics.compute()
+            # Prefix classification metrics to avoid collision
+            cls_prefixed = {f'cls_{k}': v for k, v in cls.items()}
+            return {**seg, **cls_prefixed}
+        return seg
+    def log_epoch(self, epoch: int, phase: str, loss: float, metrics: Dict):
+        """Log metrics for epoch"""
+        prefix = f'{phase}_'
+        self.history[f'{phase}_loss'].append(loss)
+        if 'iou' in metrics:
+            self.history[f'{phase}_iou'].append(metrics['iou'])
+        if 'dice' in metrics:
+            self.history[f'{phase}_dice'].append(metrics['dice'])
+        if 'precision' in metrics:
+            self.history[f'{phase}_precision'].append(metrics['precision'])
+        if 'recall' in metrics:
+            self.history[f'{phase}_recall'].append(metrics['recall'])
+    def get_history(self) -> Dict:
+        """Get full training history"""
+        return self.history
+class EarlyStopping:
+    """Early stopping to prevent overfitting"""
+    def __init__(self,
+                 patience: int = 10,
+                 min_delta: float = 0.001,
+                 mode: str = 'max'):
+        """
+        Initialize early stopping
+        Args:
+            patience: Number of epochs to wait
+            min_delta: Minimum improvement required
+            mode: 'min' for loss, 'max' for metrics
+        """
+        self.patience = patience
+        self.min_delta = min_delta
+        self.mode = mode
+        self.counter = 0
+        self.best_value = None
+        self.should_stop = False
+    def __call__(self, value: float) -> bool:
+        """
+        Check if training should stop
+        Args:
+            value: Current metric value
+        Returns:
+            True if should stop
+        """
+        if self.best_value is None:
+            self.best_value = value
+            return False
+        if self.mode == 'max':
+            improved = value > self.best_value + self.min_delta
+        else:
+            improved = value < self.best_value - self.min_delta
+        if improved:
+            self.best_value = value
+            self.counter = 0
+        else:
+            self.counter += 1
+        if self.counter >= self.patience:
+            self.should_stop = True
+        return self.should_stop
+def get_metrics_tracker(config) -> MetricsTracker:
+    """Factory function for metrics tracker"""
+    return MetricsTracker(config)

src/training/trainer.py ADDED Viewed

	@@ -0,0 +1,450 @@

+"""
+Training loop for forgery localization network
+Implements chunked training for RAM constraints
+"""
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from torch.cuda.amp import autocast, GradScaler
+from typing import Dict, Optional, Tuple
+from pathlib import Path
+from tqdm import tqdm
+import json
+import csv
+from ..models import get_model, get_loss_function
+from ..data import get_dataset
+from .metrics import MetricsTracker, EarlyStopping
+class Trainer:
+    """
+    Trainer for forgery localization network
+    Supports chunked training for large datasets (DocTamper)
+    """
+    def __init__(self, config, dataset_name: str = 'doctamper'):
+        """
+        Initialize trainer
+        Args:
+            config: Configuration object
+            dataset_name: Dataset to train on
+        """
+        self.config = config
+        self.dataset_name = dataset_name
+        # Device setup
+        self.device = torch.device(
+            'cuda' if torch.cuda.is_available() and config.get('system.device') == 'cuda'
+            else 'cpu'
+        )
+        print(f"Training on: {self.device}")
+        # Initialize model
+        self.model = get_model(config).to(self.device)
+        # Loss function (dataset-aware)
+        self.criterion = get_loss_function(config)
+        # Optimizer
+        lr = config.get('training.learning_rate', 0.001)
+        weight_decay = config.get('training.weight_decay', 0.0001)
+        self.optimizer = optim.AdamW(
+            self.model.parameters(),
+            lr=lr,
+            weight_decay=weight_decay
+        )
+        # Learning rate scheduler
+        epochs = config.get('training.epochs', 50)
+        warmup_epochs = config.get('training.scheduler.warmup_epochs', 5)
+        min_lr = config.get('training.scheduler.min_lr', 1e-5)
+        self.scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
+            self.optimizer,
+            T_0=epochs - warmup_epochs,
+            T_mult=1,
+            eta_min=min_lr
+        )
+        # Mixed precision training
+        self.scaler = GradScaler()
+        # Metrics
+        self.metrics_tracker = MetricsTracker(config)
+        # Early stopping
+        patience = config.get('training.early_stopping.patience', 10)
+        min_delta = config.get('training.early_stopping.min_delta', 0.001)
+        self.early_stopping = EarlyStopping(patience=patience, min_delta=min_delta)
+        # Output directories
+        self.checkpoint_dir = Path(config.get('outputs.checkpoints', 'outputs/checkpoints'))
+        self.log_dir = Path(config.get('outputs.logs', 'outputs/logs'))
+        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+        # Training state
+        self.current_epoch = 0
+        self.best_metric = 0.0
+    def create_dataloaders(self,
+                          chunk_start: float = 0.0,
+                          chunk_end: float = 1.0) -> Tuple[DataLoader, DataLoader]:
+        """
+        Create train and validation dataloaders
+        Args:
+            chunk_start: Start ratio for chunked training
+            chunk_end: End ratio for chunked training
+        Returns:
+            Train and validation dataloaders
+        """
+        batch_size = self.config.get('data.batch_size', 8)
+        num_workers = self.config.get('system.num_workers', 4)
+        # Training dataset (with chunking for DocTamper)
+        if self.dataset_name == 'doctamper':
+            train_dataset = get_dataset(
+                self.config,
+                self.dataset_name,
+                split='train',
+                chunk_start=chunk_start,
+                chunk_end=chunk_end
+            )
+        else:
+            train_dataset = get_dataset(
+                self.config,
+                self.dataset_name,
+                split='train'
+            )
+        # Validation dataset (always full)
+        # For FCD and SCD, validate on DocTamper TestingSet
+        if self.dataset_name in ['fcd', 'scd']:
+            val_dataset = get_dataset(
+                self.config,
+                'doctamper',  # Use DocTamper for validation
+                split='val'
+            )
+        else:
+            val_dataset = get_dataset(
+                self.config,
+                self.dataset_name,
+                split='val' if self.dataset_name in ['doctamper', 'receipts'] else 'test'
+            )
+        train_loader = DataLoader(
+            train_dataset,
+            batch_size=batch_size,
+            shuffle=True,
+            num_workers=num_workers,
+            pin_memory=self.config.get('system.pin_memory', True),
+            drop_last=True
+        )
+        val_loader = DataLoader(
+            val_dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            num_workers=num_workers,
+            pin_memory=True
+        )
+        return train_loader, val_loader
+    def train_epoch(self, dataloader: DataLoader) -> Tuple[float, Dict]:
+        """
+        Train for one epoch
+        Args:
+            dataloader: Training dataloader
+        Returns:
+            Average loss and metrics
+        """
+        self.model.train()
+        self.metrics_tracker.reset()
+        total_loss = 0.0
+        num_batches = 0
+        pbar = tqdm(dataloader, desc=f"Epoch {self.current_epoch} [Train]")
+        for batch_idx, (images, masks, metadata) in enumerate(pbar):
+            images = images.to(self.device)
+            masks = masks.to(self.device)
+            # Forward pass with mixed precision
+            self.optimizer.zero_grad()
+            with autocast():
+                outputs, _ = self.model(images)
+                # Dataset-aware loss
+                has_pixel_mask = self.config.has_pixel_mask(self.dataset_name)
+                losses = self.criterion.combined_loss(outputs, masks, has_pixel_mask)
+            # Backward pass with gradient scaling
+            self.scaler.scale(losses['total']).backward()
+            self.scaler.step(self.optimizer)
+            self.scaler.update()
+            # Update metrics
+            with torch.no_grad():
+                probs = torch.sigmoid(outputs)
+                self.metrics_tracker.update_segmentation(
+                    probs, masks, self.dataset_name
+                )
+            total_loss += losses['total'].item()
+            num_batches += 1
+            # Update progress bar
+            pbar.set_postfix({
+                'loss': f"{losses['total'].item():.4f}",
+                'bce': f"{losses['bce'].item():.4f}"
+            })
+        avg_loss = total_loss / num_batches
+        metrics = self.metrics_tracker.compute_all()
+        return avg_loss, metrics
+    def validate(self, dataloader: DataLoader) -> Tuple[float, Dict]:
+        """
+        Validate model
+        Args:
+            dataloader: Validation dataloader
+        Returns:
+            Average loss and metrics
+        """
+        self.model.eval()
+        self.metrics_tracker.reset()
+        total_loss = 0.0
+        num_batches = 0
+        pbar = tqdm(dataloader, desc=f"Epoch {self.current_epoch} [Val]")
+        with torch.no_grad():
+            for images, masks, metadata in pbar:
+                images = images.to(self.device)
+                masks = masks.to(self.device)
+                # Forward pass
+                outputs, _ = self.model(images)
+                # Dataset-aware loss
+                has_pixel_mask = self.config.has_pixel_mask(self.dataset_name)
+                losses = self.criterion.combined_loss(outputs, masks, has_pixel_mask)
+                # Update metrics
+                probs = torch.sigmoid(outputs)
+                self.metrics_tracker.update_segmentation(
+                    probs, masks, self.dataset_name
+                )
+                total_loss += losses['total'].item()
+                num_batches += 1
+                pbar.set_postfix({
+                    'loss': f"{losses['total'].item():.4f}"
+                })
+        avg_loss = total_loss / num_batches
+        metrics = self.metrics_tracker.compute_all()
+        return avg_loss, metrics
+    def save_checkpoint(self,
+                       filename: str,
+                       is_best: bool = False,
+                       chunk_id: Optional[int] = None):
+        """Save model checkpoint"""
+        checkpoint = {
+            'epoch': self.current_epoch,
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'scheduler_state_dict': self.scheduler.state_dict(),
+            'best_metric': self.best_metric,
+            'dataset': self.dataset_name,
+            'chunk_id': chunk_id
+        }
+        path = self.checkpoint_dir / filename
+        torch.save(checkpoint, path)
+        print(f"Saved checkpoint: {path}")
+        if is_best:
+            best_path = self.checkpoint_dir / f'best_{self.dataset_name}.pth'
+            torch.save(checkpoint, best_path)
+            print(f"Saved best model: {best_path}")
+    def load_checkpoint(self, filename: str, reset_epoch: bool = False):
+        """
+        Load model checkpoint
+        Args:
+            filename: Checkpoint filename
+            reset_epoch: If True, reset epoch counter to 0 (useful for chunked training)
+        """
+        path = self.checkpoint_dir / filename
+        if not path.exists():
+            print(f"Checkpoint not found: {path}")
+            return False
+        checkpoint = torch.load(path, map_location=self.device)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+        if reset_epoch:
+            self.current_epoch = 0
+            print(f"Loaded checkpoint: {path} (epoch counter reset to 0)")
+        else:
+            self.current_epoch = checkpoint['epoch'] + 1  # Continue from next epoch
+            print(f"Loaded checkpoint: {path} (resuming from epoch {self.current_epoch})")
+        self.best_metric = checkpoint.get('best_metric', 0.0)
+        return True
+    def train(self,
+              epochs: Optional[int] = None,
+              chunk_start: float = 0.0,
+              chunk_end: float = 1.0,
+              chunk_id: Optional[int] = None,
+              resume_from: Optional[str] = None):
+        """
+        Main training loop
+        Args:
+            epochs: Number of epochs (None uses config)
+            chunk_start: Start ratio for chunked training
+            chunk_end: End ratio for chunked training
+            chunk_id: Chunk identifier for logging
+            resume_from: Checkpoint to resume from
+        """
+        if epochs is None:
+            epochs = self.config.get('training.epochs', 50)
+        # Resume if specified
+        if resume_from:
+            self.load_checkpoint(resume_from)
+        # Create dataloaders
+        train_loader, val_loader = self.create_dataloaders(chunk_start, chunk_end)
+        print(f"\n{'='*60}")
+        print(f"Training: {self.dataset_name}")
+        if chunk_id is not None:
+            print(f"Chunk: {chunk_id} [{chunk_start*100:.0f}% - {chunk_end*100:.0f}%]")
+        print(f"Epochs: {epochs}")
+        print(f"Train samples: {len(train_loader.dataset)}")
+        print(f"Val samples: {len(val_loader.dataset)}")
+        print(f"{'='*60}\n")
+        # Training log file
+        log_file = self.log_dir / f'{self.dataset_name}_chunk{chunk_id or 0}_log.csv'
+        with open(log_file, 'w', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['epoch', 'train_loss', 'val_loss',
+                           'train_iou', 'val_iou', 'train_dice', 'val_dice',
+                           'train_precision', 'val_precision',
+                           'train_recall', 'val_recall', 'lr'])
+        for epoch in range(self.current_epoch, epochs):
+            self.current_epoch = epoch
+            # Train
+            train_loss, train_metrics = self.train_epoch(train_loader)
+            # Validate
+            val_loss, val_metrics = self.validate(val_loader)
+            # Update scheduler
+            self.scheduler.step()
+            current_lr = self.optimizer.param_groups[0]['lr']
+            # Log metrics
+            self.metrics_tracker.log_epoch(epoch, 'train', train_loss, train_metrics)
+            self.metrics_tracker.log_epoch(epoch, 'val', val_loss, val_metrics)
+            # Log to file
+            with open(log_file, 'a', newline='') as f:
+                writer = csv.writer(f)
+                writer.writerow([
+                    epoch,
+                    f"{train_loss:.4f}",
+                    f"{val_loss:.4f}",
+                    f"{train_metrics.get('iou', 0):.4f}",
+                    f"{val_metrics.get('iou', 0):.4f}",
+                    f"{train_metrics.get('dice', 0):.4f}",
+                    f"{val_metrics.get('dice', 0):.4f}",
+                    f"{train_metrics.get('precision', 0):.4f}",
+                    f"{val_metrics.get('precision', 0):.4f}",
+                    f"{train_metrics.get('recall', 0):.4f}",
+                    f"{val_metrics.get('recall', 0):.4f}",
+                    f"{current_lr:.6f}"
+                ])
+            # Print summary
+            print(f"\nEpoch {epoch}/{epochs-1}")
+            print(f"  Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
+            print(f"  Train IoU: {train_metrics.get('iou', 0):.4f} | Val IoU: {val_metrics.get('iou', 0):.4f}")
+            print(f"  Train Dice: {train_metrics.get('dice', 0):.4f} | Val Dice: {val_metrics.get('dice', 0):.4f}")
+            print(f"  LR: {current_lr:.6f}")
+            # Save checkpoints
+            if self.config.get('training.checkpoint.save_every', 5) > 0:
+                if (epoch + 1) % self.config.get('training.checkpoint.save_every', 5) == 0:
+                    self.save_checkpoint(
+                        f'{self.dataset_name}_chunk{chunk_id or 0}_epoch{epoch}.pth',
+                        chunk_id=chunk_id
+                    )
+            # Check for best model
+            monitor_metric = val_metrics.get('dice', 0)
+            if monitor_metric > self.best_metric:
+                self.best_metric = monitor_metric
+                self.save_checkpoint(
+                    f'{self.dataset_name}_chunk{chunk_id or 0}_best.pth',
+                    is_best=True,
+                    chunk_id=chunk_id
+                )
+            # Early stopping
+            if self.early_stopping(monitor_metric):
+                print(f"\nEarly stopping triggered at epoch {epoch}")
+                break
+        # Save final checkpoint
+        self.save_checkpoint(
+            f'{self.dataset_name}_chunk{chunk_id or 0}_final.pth',
+            chunk_id=chunk_id
+        )
+        # Save training history
+        history_file = self.log_dir / f'{self.dataset_name}_chunk{chunk_id or 0}_history.json'
+        with open(history_file, 'w') as f:
+            json.dump(self.metrics_tracker.get_history(), f, indent=2)
+        print(f"\nTraining complete!")
+        print(f"Best Dice: {self.best_metric:.4f}")
+        return self.metrics_tracker.get_history()
+def get_trainer(config, dataset_name: str = 'doctamper') -> Trainer:
+    """Factory function for trainer"""
+    return Trainer(config, dataset_name)

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""Utilities module"""
+from .plotting import (
+    plot_training_curves,
+    plot_confusion_matrix,
+    plot_feature_importance,
+    plot_dataset_comparison,
+    plot_chunked_training_progress,
+    generate_training_report
+)
+from .export import (
+    export_to_onnx,
+    export_to_torchscript,
+    quantize_model
+)
+__all__ = [
+    'plot_training_curves',
+    'plot_confusion_matrix',
+    'plot_feature_importance',
+    'plot_dataset_comparison',
+    'plot_chunked_training_progress',
+    'generate_training_report',
+    'export_to_onnx',
+    'export_to_torchscript',
+    'quantize_model'
+]

src/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (601 Bytes). View file