Spaces:

seifbenayed
/

Car_Damage_Insurance_Fraud_Detector

Runtime error

App Files Files Community

astrosbd commited on Apr 21, 2025

Commit

5c783e4

0 Parent(s):

Initial commit

Browse files

Files changed (28) hide show

.gitattributes +36 -0
.gitignore +51 -0
README.md +60 -0
app.py +551 -0
configs/.ipynb_checkpoints/__init__-checkpoint.py +7 -0
configs/__init__.py +7 -0
configs/get_config.py +16 -0
configs/test_config.yaml +38 -0
configs/train_config copie.yaml +43 -0
configs/train_config.yaml +46 -0
loss/__init__.py +11 -0
loss/abstract_loss_func.py +17 -0
loss/cross_entropy_loss.py +26 -0
metrics/__init__.py +7 -0
metrics/base_metrics_class.py +204 -0
metrics/registry.py +19 -0
metrics/utils.py +92 -0
models/__init__.py +29 -0
models/builder.py +45 -0
models/networks/arcface.py +384 -0
models/networks/common.py +75 -0
models/networks/efficientNet.py +490 -0
models/networks/mrsa_resnet.py +464 -0
models/networks/pose_efficientNet.py +788 -0
models/networks/pose_hrnet.py +515 -0
models/networks/xception.py +338 -0
models/utils.py +138 -0
requirements.txt +10 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,51 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+env/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# Model files
+*.pth
+*.pt
+*.ckpt
+*.bin
+# Config files
+*.yaml
+*.yml
+!configs/*.yaml
+!configs/*.yml

README.md ADDED Viewed

	@@ -0,0 +1,60 @@

+---
+title: Car Damage Insurance Fraud Detector
+emoji: 🚗
+colorFrom: gray
+colorTo: pink
+sdk: gradio
+sdk_version: 3.50.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+# Car Damage Insurance Fraud Detector
+A sophisticated AI-powered system that detects car damage and potential insurance fraud using deep learning models.
+## Features
+- Damage Detection: Identifies and localizes car damage using Detectron2
+- Deepfake Detection: Analyzes images for potential manipulation
+- User-friendly Interface: Built with Gradio for easy interaction
+- Multi-device Support: Works on CPU, CUDA, and MPS (Apple Silicon)
+## Requirements
+- Python 3.8+
+- PyTorch
+- OpenCV
+- Gradio
+- Detectron2 (optional, not available for macOS)
+## Installation
+1. Clone the repository
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+## Usage
+1. Run the application:
+```bash
+python app.py
+```
+2. Open your browser and navigate to the provided local URL
+## Model Requirements
+- Damage detection model (Detectron2 format)
+- Deepfake detection model (custom format)
+## License
+Apache 2.0
+## Note
+This application requires pre-trained models for both damage detection and deepfake detection. Make sure to have the appropriate model files in the correct locations before running the application.

app.py ADDED Viewed

	@@ -0,0 +1,551 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+import sys
+import time
+import cv2
+import torch
+import numpy as np
+import gradio as gr
+from PIL import Image
+from torchvision import transforms
+# Add current directory to path
+if not os.getcwd() in sys.path:
+    sys.path.append(os.getcwd())
+# Detectron2 imports - wrapped in try-except to make them optional
+try:
+    from detectron2.engine import DefaultPredictor
+    from detectron2.config import get_cfg
+    from detectron2.utils.visualizer import Visualizer, ColorMode
+    from detectron2 import model_zoo
+    DETECTRON2_AVAILABLE = True
+except ImportError:
+    print("Warning: Detectron2 is not installed. Damage detection will not be available.")
+    DETECTRON2_AVAILABLE = False
+# Check for custom path for models
+try:
+    from configs.get_config import load_config
+    from models import *
+    MODELS_IMPORTED = True
+except ImportError:
+    print("Warning: Custom models couldn't be imported. Only damage detection will work.")
+    MODELS_IMPORTED = False
+def setup_device(device_str):
+    """Set up the computation device based on user input and availability"""
+    if device_str == 'auto':
+        if torch.cuda.is_available():
+            return torch.device('cuda:0')
+        elif hasattr(torch, 'backends') and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+            return torch.device('mps')
+        else:
+            return torch.device('cpu')
+    elif device_str == 'cuda' and torch.cuda.is_available():
+        return torch.device('cuda:0')
+    elif device_str == 'mps' and hasattr(torch, 'backends') and hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+        return torch.device('mps')
+    else:
+        print(f"Warning: Device {device_str} not available, using CPU instead.")
+        return torch.device('cpu')
+def setup_damage_detector(model_path, threshold=0.7):
+    """Set up the damage detection model using Detectron2"""
+    if not DETECTRON2_AVAILABLE:
+        print("Detectron2 is not installed. Cannot set up damage detector.")
+        return None, None
+    if model_path is None or not os.path.exists(model_path):
+        print("No damage model specified or file not found. Skipping damage detection.")
+        return None, None
+    cfg = get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
+    cfg.MODEL.WEIGHTS = model_path
+    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Only one class (damage)
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold
+    # Explicitly set to use CPU if on Mac (MPS)
+    if torch.backends.mps.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+        print("Mac MPS detected - forcing Detectron2 to use CPU")
+    try:
+        predictor = DefaultPredictor(cfg)
+        return predictor, cfg
+    except Exception as e:
+        print(f"Error setting up damage detector: {e}")
+        return None, cfg
+def load_deepfake_model(model_path, cfg_path, device):
+    """Load the deepfake detection model"""
+    if not MODELS_IMPORTED:
+        print("Custom models module not imported. Cannot load deepfake model.")
+        return None, None
+    if model_path is None or not os.path.exists(model_path):
+        print("No deepfake model specified or file not found. Skipping deepfake detection.")
+        return None, None
+    if cfg_path is None or not os.path.exists(cfg_path):
+        print("No deepfake config specified or file not found. Skipping deepfake detection.")
+        return None, None
+    try:
+        # Load config
+        cfg = load_config(cfg_path)
+        # Build model
+        model = build_model(cfg.MODEL, MODELS)
+        # Load weights
+        print(f"Loading deepfake model from: {model_path}")
+        checkpoint = torch.load(model_path, map_location='cpu')
+        if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
+            model.load_state_dict(checkpoint['state_dict'])
+        else:
+            model.load_state_dict(checkpoint)
+        # Move model to device and set to evaluation mode
+        model = model.to(device)
+        if hasattr(cfg.MODEL, 'precision') and cfg.MODEL.precision == 'fp64':
+            model = model.to(torch.float64)
+        model.eval()
+        return model, cfg
+    except Exception as e:
+        print(f"Error loading deepfake model: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
+def preprocess_for_deepfake(image, cfg, device):
+    """Preprocess an image for deepfake detection"""
+    try:
+        # Convert to RGB if needed
+        if len(image.shape) == 3 and image.shape[2] == 3:
+            if image.dtype != np.uint8:
+                image = (image * 255).astype(np.uint8)
+            rgb_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        else:
+            rgb_img = image
+        # Resize
+        img_resized = cv2.resize(rgb_img, (cfg.DATASET.IMAGE_SIZE[0], cfg.DATASET.IMAGE_SIZE[1]))
+        # Convert to PIL and apply transforms
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=cfg.DATASET.TRANSFORM.normalize.mean,
+                std=cfg.DATASET.TRANSFORM.normalize.std
+            )
+        ])
+        img_tensor = transform(Image.fromarray(img_resized)).unsqueeze(0)  # Add batch dimension
+        img_tensor = img_tensor.to(device)
+        # Convert to correct precision
+        if hasattr(cfg.MODEL, 'precision') and cfg.MODEL.precision == 'fp64':
+            img_tensor = img_tensor.to(torch.float64)
+        return img_tensor
+    except Exception as e:
+        print(f"Error preprocessing image for deepfake detection: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+def detect_damage(img, damage_detector):
+    """Detect damage in an image"""
+    try:
+        if img is None:
+            raise ValueError("Invalid image")
+        # If no damage detector available, return the whole image as region
+        if damage_detector is None:
+            print("No damage detector available. Using whole image as region.")
+            h, w = img.shape[:2]
+            damage_regions = [{
+                "box": (0, 0, w, h),
+                "score": 1.0,
+                "mask": None
+            }]
+            return img, None, damage_regions
+        # Run inference
+        outputs = damage_detector(img)
+        # Get damage regions
+        instances = outputs["instances"].to("cpu")
+        boxes = instances.pred_boxes.tensor.numpy() if instances.has("pred_boxes") else []
+        scores = instances.scores.numpy() if instances.has("scores") else []
+        masks = instances.pred_masks.numpy() if instances.has("pred_masks") else []
+        damage_regions = []
+        for i in range(len(boxes)):
+            x1, y1, x2, y2 = map(int, boxes[i])
+            damage_regions.append({
+                "box": (x1, y1, x2, y2),
+                "score": float(scores[i]),
+                "mask": masks[i] if len(masks) > i else None
+            })
+        if not damage_regions:
+            print("No damage detected. Using whole image.")
+            h, w = img.shape[:2]
+            damage_regions = [{
+                "box": (0, 0, w, h),
+                "score": 1.0,
+                "mask": None
+            }]
+        return img, outputs, damage_regions
+    except Exception as e:
+        print(f"Error detecting damage: {e}")
+        # If error occurs, return the whole image as region
+        if 'img' in locals() and img is not None:
+            h, w = img.shape[:2]
+            damage_regions = [{
+                "box": (0, 0, w, h),
+                "score": 1.0,
+                "mask": None
+            }]
+            return img, None, damage_regions
+        return None, None, []
+def check_deepfake(image, damage_regions, deepfake_model, deepfake_cfg, device, threshold=0.5):
+    """Check if damage regions are deepfakes"""
+    results = []
+    if deepfake_model is None:
+        print("No deepfake model available. Skipping deepfake detection.")
+        return []
+    try:
+        # If no damage regions, check the entire image
+        if not damage_regions:
+            img_tensor = preprocess_for_deepfake(image, deepfake_cfg, device)
+            if img_tensor is None:
+                return []
+            # Run inference
+            with torch.no_grad():
+                outputs = deepfake_model(img_tensor)
+                # Extract outputs
+                if isinstance(outputs, list):
+                    outputs = outputs[0]
+                if isinstance(outputs, dict) and 'cls' in outputs:
+                    cls_outputs = outputs['cls']
+                    cls_prob = cls_outputs.sigmoid().cpu().numpy()
+                else:
+                    # Assuming the output is directly the classification probability
+                    cls_prob = outputs.sigmoid().cpu().numpy() if hasattr(outputs, 'sigmoid') else outputs.cpu().numpy()
+                if cls_prob.size > 0:
+                    is_fake = cls_prob[0][0] > threshold if cls_prob.ndim > 1 else cls_prob[0] > threshold
+                    confidence = cls_prob[0][0] if cls_prob.ndim > 1 else cls_prob[0]
+                    results.append({
+                        "region": "full_image",
+                        "deepfake_prob": float(confidence),
+                        "is_fake": bool(is_fake)
+                    })
+            return results
+        # Process each damage region
+        for i, region in enumerate(damage_regions):
+            x1, y1, x2, y2 = region["box"]
+            # Ensure coordinates are within image bounds
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(image.shape[1], x2), min(image.shape[0], y2)
+            # Extract region and check if it's a deepfake
+            if x2 > x1 and y2 > y1:
+                # Get ROI
+                roi = image[y1:y2, x1:x2]
+                # Preprocess
+                img_tensor = preprocess_for_deepfake(roi, deepfake_cfg, device)
+                if img_tensor is None:
+                    continue
+                # Run inference
+                with torch.no_grad():
+                    outputs = deepfake_model(img_tensor)
+                    # Extract outputs
+                    if isinstance(outputs, list):
+                        outputs = outputs[0]
+                    if isinstance(outputs, dict) and 'cls' in outputs:
+                        cls_outputs = outputs['cls']
+                        cls_prob = cls_outputs.sigmoid().cpu().numpy()
+                    else:
+                        # Assuming the output is directly the classification probability
+                        cls_prob = outputs.sigmoid().cpu().numpy() if hasattr(outputs, 'sigmoid') else outputs.cpu().numpy()
+                    if cls_prob.size > 0:
+                        is_fake = cls_prob[0][0] > threshold if cls_prob.ndim > 1 else cls_prob[0] > threshold
+                        confidence = cls_prob[0][0] if cls_prob.ndim > 1 else cls_prob[0]
+                        results.append({
+                            "region_id": i,
+                            "box": (x1, y1, x2, y2),
+                            "deepfake_prob": float(confidence),
+                            "is_fake": bool(is_fake)
+                        })
+        return results
+    except Exception as e:
+        print(f"Error in deepfake detection: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+def visualize_results(image, damage_outputs, deepfake_results, damage_threshold):
+    """Create visualization of damage detection and deepfake verification"""
+    try:
+        # Create a copy for visualization
+        img_copy = image.copy()
+        # Draw damage detection results
+        if damage_outputs is not None and DETECTRON2_AVAILABLE:
+            try:
+                v = Visualizer(img_copy[:, :, ::-1], scale=1.0, instance_mode=ColorMode.IMAGE_BW)
+                v = v.draw_instance_predictions(damage_outputs["instances"].to("cpu"))
+                result_img = v.get_image()[:, :, ::-1]
+                # Convert to a standard numpy array to ensure compatibility with OpenCV
+                result_img = np.array(result_img, dtype=np.uint8)
+            except Exception as e:
+                print(f"Error visualizing damage detection: {e}")
+                result_img = img_copy
+        else:
+            result_img = img_copy
+        # Add deepfake detection results
+        for result in deepfake_results:
+            try:
+                if "box" in result:
+                    x1, y1, x2, y2 = result["box"]
+                    fake_prob = result["deepfake_prob"]
+                    is_fake = result["is_fake"]
+                    region_id = result.get("region_id", 0)
+                    # Text for the region
+                    text = f"R{region_id}: {'FAKE' if is_fake else 'REAL'} ({fake_prob*100:.1f}%)"
+                    # Different colors for fake/real
+                    color = (0, 0, 255) if is_fake else (0, 255, 0)  # Red for fake, green for real
+                    # Ensure we have a standard numpy array
+                    if not isinstance(result_img, np.ndarray):
+                        result_img = np.array(result_img, dtype=np.uint8)
+                    # Draw rectangle and text
+                    cv2.rectangle(result_img, (x1, y1), (x2, y2), color, 2)
+                    cv2.putText(result_img, text, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
+                elif "region" in result and result["region"] == "full_image":
+                    fake_prob = result["deepfake_prob"]
+                    is_fake = result["is_fake"]
+                    # Text for the whole image
+                    text = f"Image: {'FAKE' if is_fake else 'REAL'} ({fake_prob*100:.1f}%)"
+                    # Different colors for fake/real
+                    color = (0, 0, 255) if is_fake else (0, 255, 0)  # Red for fake, green for real
+                    # Ensure we have a standard numpy array
+                    if not isinstance(result_img, np.ndarray):
+                        result_img = np.array(result_img, dtype=np.uint8)
+                    # Draw text
+                    cv2.putText(result_img, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
+            except Exception as e:
+                print(f"Error drawing result {result}: {e}")
+        return result_img
+    except Exception as e:
+        print(f"Error visualizing results: {e}")
+        import traceback
+        traceback.print_exc()
+        return np.array(image, dtype=np.uint8)  # Return the original image as a numpy array
+def process_image(input_image, damage_model_path, deepfake_model_path, deepfake_cfg_path,
+                 damage_threshold, deepfake_threshold, skip_damage, device_str):
+    """Process an image through the car damage and deepfake detection pipeline"""
+    progress_info = []
+    # Convert Gradio image to numpy array
+    if isinstance(input_image, dict) and "path" in input_image:
+        img = cv2.imread(input_image["path"])
+    elif isinstance(input_image, str):
+        img = cv2.imread(input_image)
+    elif isinstance(input_image, np.ndarray):
+        # Make a copy to avoid modifying the original
+        img = input_image.copy()
+        # Convert from RGB to BGR (OpenCV format)
+        if len(img.shape) == 3 and img.shape[2] == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    else:
+        return None, "Error: Unsupported image format"
+    if img is None:
+        return None, "Error: Could not read the image"
+    # Progress update
+    progress_info.append("Image loaded successfully")
+    # Setup device
+    device = setup_device(device_str)
+    progress_info.append(f"Using device: {device}")
+    # Initialize models
+    damage_detector = None
+    deepfake_model = None
+    deepfake_cfg = None
+    # Setup damage detector if not skipped
+    if not skip_damage and damage_model_path:
+        progress_info.append("Setting up damage detector...")
+        damage_detector, detector_cfg = setup_damage_detector(damage_model_path, float(damage_threshold))
+        if damage_detector is None and DETECTRON2_AVAILABLE:
+            progress_info.append("Failed to initialize damage detector")
+        else:
+            progress_info.append("Damage detector initialized successfully")
+    # Setup deepfake detector
+    if deepfake_model_path and deepfake_cfg_path:
+        progress_info.append("Setting up deepfake detector...")
+        deepfake_model, deepfake_cfg = load_deepfake_model(deepfake_model_path, deepfake_cfg_path, device)
+        if deepfake_model is None:
+            progress_info.append("Failed to initialize deepfake detector")
+        else:
+            progress_info.append("Deepfake detector initialized successfully")
+    # Ensure at least one detector is working
+    if damage_detector is None and deepfake_model is None:
+        return None, "Error: Neither damage nor deepfake detector is available"
+    # Step 1: Detect damage or use whole image
+    progress_info.append("Detecting damage regions...")
+    start_time = time.time()
+    img, damage_outputs, damage_regions = detect_damage(img, damage_detector)
+    damage_time = time.time() - start_time
+    if img is None:
+        return None, "Error: Failed to process image"
+    # Print damage detection results
+    if damage_detector is not None and damage_regions:
+        progress_info.append(f"Detected {len(damage_regions)} damage regions in {damage_time:.3f} seconds")
+    else:
+        progress_info.append("Using the whole image for analysis")
+    # Step 2: Check if damage is deepfake
+    deepfake_results = []
+    if deepfake_model is not None:
+        progress_info.append("Performing deepfake detection...")
+        start_time = time.time()
+        deepfake_results = check_deepfake(
+            img, damage_regions, deepfake_model, deepfake_cfg, device, float(deepfake_threshold)
+        )
+        deepfake_time = time.time() - start_time
+        if deepfake_results:
+            progress_info.append(f"Deepfake detection completed in {deepfake_time:.3f} seconds")
+            # Generate report
+            for result in deepfake_results:
+                if "region_id" in result:
+                    region_id = result["region_id"]
+                    fake_prob = result["deepfake_prob"]
+                    is_fake = result["is_fake"]
+                    progress_info.append(f"Region {region_id}: {'FAKE' if is_fake else 'REAL'} (Probability: {fake_prob*100:.2f}%)")
+                elif "region" in result and result["region"] == "full_image":
+                    fake_prob = result["deepfake_prob"]
+                    is_fake = result["is_fake"]
+                    progress_info.append(f"Whole image: {'FAKE' if is_fake else 'REAL'} (Probability: {fake_prob*100:.2f}%)")
+        else:
+            progress_info.append("No deepfake detection results")
+    # Step 3: Visualize final results
+    progress_info.append("Generating visualization...")
+    result_img = visualize_results(img, damage_outputs, deepfake_results, float(damage_threshold))
+    # Convert back to RGB for Gradio
+    if len(result_img.shape) == 3 and result_img.shape[2] == 3:
+        result_img = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)
+    progress_info.append("Processing complete!")
+    return result_img, "\n".join(progress_info)
+def create_gradio_interface():
+    with gr.Blocks(title="Car Damage & Deepfake Detection") as app:
+        gr.Markdown("# Car Damage Detection & Deepfake Verification")
+        gr.Markdown("Upload an image to detect car damage and check if it's a deepfake")
+        with gr.Tab("Basic Interface"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    input_image = gr.Image(type="numpy", label="Input Image")
+                    # Simple controls
+                    skip_damage = gr.Checkbox(label="Skip Damage Detection", value=False)
+                    damage_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.05,
+                                               label="Damage Detection Threshold")
+                    deepfake_threshold = gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.05,
+                                                 label="Deepfake Detection Threshold")
+                    device = gr.Dropdown(choices=["auto", "cuda", "cpu", "mps"], value="auto",
+                                        label="Computation Device")
+                    process_btn = gr.Button("Process Image", variant="primary")
+                with gr.Column(scale=1):
+                    output_image = gr.Image(type="numpy", label="Result")
+                    output_text = gr.Textbox(label="Detection Results", lines=10)
+        with gr.Tab("Advanced Settings"):
+            with gr.Row():
+                with gr.Column():
+                    damage_model_path = gr.Textbox(label="Damage Model Path",
+                                                 placeholder="Path to damage detection model (.pth)")
+                    deepfake_model_path = gr.Textbox(label="Deepfake Model Path",
+                                                   placeholder="Path to deepfake detection model (.pth)")
+                    deepfake_cfg_path = gr.Textbox(label="Deepfake Config Path",
+                                                 placeholder="Path to deepfake model config (.yaml)")
+        # Connect the process function
+        process_btn.click(
+            fn=process_image,
+            inputs=[
+                input_image,
+                damage_model_path,
+                deepfake_model_path,
+                deepfake_cfg_path,
+                damage_threshold,
+                deepfake_threshold,
+                skip_damage,
+                device
+            ],
+            outputs=[output_image, output_text]
+        )
+        # Examples
+        gr.Markdown("## Examples")
+        gr.Markdown("Note: Examples will only work if you have the appropriate models installed.")
+    return app
+if __name__ == "__main__":
+    # Create and launch the Gradio interface
+    app = create_gradio_interface()
+    app.launch(share=True)  # Set share=False in production

configs/.ipynb_checkpoints/__init__-checkpoint.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)

configs/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)

configs/get_config.py ADDED Viewed

	@@ -0,0 +1,16 @@

+#-*- coding: utf-8 -*-
+import os
+from yaml import load, dump
+try:
+    from yaml import CLoader as Loader, CDumper as Dumper
+except ImportError:
+    from yaml import Loader, Dumper
+from box import Box as edict
+def load_config(cfg):
+    with open(cfg) as f:
+        config = load(f, Loader=Loader)
+    return edict(config)

configs/test_config.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+mode: test
+lmdb: False
+rgb_dir: '/ssd_scratch/deep_fake_dataset/'
+lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/'
+dataset_json_folder: './preprocessing/dataset_json_v6/'
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

configs/train_config copie.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+mode: train
+lmdb: False
+dry_run: False
+rgb_dir: '/ssd_scratch/deep_fake_dataset/'
+lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/'
+dataset_json_folder: './preprocessing/dataset_json_v6/'
+SWA: False
+save_avg: True
+log_dir: ./logs/training/
+# label settings
+label_dict:
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

configs/train_config.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+mode: train
+lmdb: False
+dry_run: False
+rgb_dir: '/ssd_scratch/deep_fake_dataset/'
+lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/'
+dataset_json_folder: './preprocessing/dataset_json_v6/'
+SWA: False
+save_avg: True
+log_dir: ./logs/training/
+# label settings
+label_dict:
+  # iFakeFaceDB labels
+  real: 0
+  fake: 1
+  # DFD
+  DFD_fake: 1
+  DFD_real: 0
+  # FF++ + FaceShifter(FF-real+FF-FH)
+  FF-SH: 1
+  FF-F2F: 1
+  FF-DF: 1
+  FF-FS: 1
+  FF-NT: 1
+  FF-FH: 1
+  FF-real: 0
+  # CelebDF
+  CelebDFv1_real: 0
+  CelebDFv1_fake: 1
+  CelebDFv2_real: 0
+  CelebDFv2_fake: 1
+  # DFDCP
+  DFDCP_Real: 0
+  DFDCP_FakeA: 1
+  DFDCP_FakeB: 1
+  # DFDC
+  DFDC_Fake: 1
+  DFDC_Real: 0
+  # DeeperForensics-1.0
+  DF_fake: 1
+  DF_real: 0
+  # UADFV
+  UADFV_Fake: 1
+  UADFV_Real: 0
+  # Roop
+  roop_Real: 0
+  roop_Fake: 1

loss/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)
+from metrics.registry import LOSSFUNC
+from .cross_entropy_loss import CrossEntropyLoss

loss/abstract_loss_func.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import torch.nn as nn
+class AbstractLossClass(nn.Module):
+    """Abstract class for loss functions."""
+    def __init__(self):
+        super(AbstractLossClass, self).__init__()
+    def forward(self, pred, label):
+        """
+        Args:
+            pred: prediction of the model
+            label: ground truth label
+        Return:
+            loss: loss value
+        """
+        raise NotImplementedError('Each subclass should implement the forward method.')

loss/cross_entropy_loss.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import torch.nn as nn
+from .abstract_loss_func import AbstractLossClass
+from metrics.registry import LOSSFUNC
+@LOSSFUNC.register_module(module_name="cross_entropy")
+class CrossEntropyLoss(AbstractLossClass):
+    def __init__(self):
+        super().__init__()
+        self.loss_fn = nn.CrossEntropyLoss()
+    def forward(self, inputs, targets):
+        """
+        Computes the cross-entropy loss.
+        Args:
+            inputs: A PyTorch tensor of size (batch_size, num_classes) containing the predicted scores.
+            targets: A PyTorch tensor of size (batch_size) containing the ground-truth class indices.
+        Returns:
+            A scalar tensor representing the cross-entropy loss.
+        """
+        # Compute the cross-entropy loss
+        loss = self.loss_fn(inputs, targets)
+        return loss

metrics/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+import sys
+current_file_path = os.path.abspath(__file__)
+parent_dir = os.path.dirname(os.path.dirname(current_file_path))
+project_root_dir = os.path.dirname(parent_dir)
+sys.path.append(parent_dir)
+sys.path.append(project_root_dir)

metrics/base_metrics_class.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import numpy as np
+from sklearn import metrics
+import torch
+import torch.nn as nn
+def get_accracy(output, label):
+    _, prediction = torch.max(output, 1)    # argmax
+    correct = (prediction == label).sum().item()
+    accuracy = correct / prediction.size(0)
+    return accuracy
+def get_prediction(output, label):
+    prob = nn.functional.softmax(output, dim=1)[:, 1]
+    prob = prob.view(prob.size(0), 1)
+    label = label.view(label.size(0), 1)
+    #print(prob.size(), label.size())
+    datas = torch.cat((prob, label.float()), dim=1)
+    return datas
+def calculate_metrics_for_train(label, output):
+    if output.size(1) == 2:
+        prob = torch.softmax(output, dim=1)[:, 1]
+    else:
+        prob = output
+    # Accuracy
+    _, prediction = torch.max(output, 1)
+    correct = (prediction == label).sum().item()
+    accuracy = correct / prediction.size(0)
+    # Average Precision
+    y_true = label.cpu().detach().numpy()
+    y_pred = prob.cpu().detach().numpy()
+    ap = metrics.average_precision_score(y_true, y_pred)
+    # AUC and EER
+    try:
+        fpr, tpr, thresholds = metrics.roc_curve(label.squeeze().cpu().numpy(),
+                                                 prob.squeeze().cpu().numpy(),
+                                                 pos_label=1)
+    except:
+        # for the case when we only have one sample
+        return None, None, accuracy, ap
+    if np.isnan(fpr[0]) or np.isnan(tpr[0]):
+        # for the case when all the samples within a batch is fake/real
+        auc, eer = None, None
+    else:
+        auc = metrics.auc(fpr, tpr)
+        fnr = 1 - tpr
+        eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+    return auc, eer, accuracy, ap
+# ------------ compute average metrics of batches---------------------
+class Metrics_batch():
+    def __init__(self):
+        self.tprs = []
+        self.mean_fpr = np.linspace(0, 1, 100)
+        self.aucs = []
+        self.eers = []
+        self.aps = []
+        self.correct = 0
+        self.total = 0
+        self.losses = []
+    def update(self, label, output):
+        acc = self._update_acc(label, output)
+        if output.size(1) == 2:
+            prob = torch.softmax(output, dim=1)[:, 1]
+        else:
+            prob = output
+        #label = 1-label
+        #prob = torch.softmax(output, dim=1)[:, 1]
+        auc, eer = self._update_auc(label, prob)
+        ap = self._update_ap(label, prob)
+        return acc, auc, eer, ap
+    def _update_auc(self, lab, prob):
+        fpr, tpr, thresholds = metrics.roc_curve(lab.squeeze().cpu().numpy(),
+                                                 prob.squeeze().cpu().numpy(),
+                                                 pos_label=1)
+        if np.isnan(fpr[0]) or np.isnan(tpr[0]):
+            return -1, -1
+        auc = metrics.auc(fpr, tpr)
+        interp_tpr = np.interp(self.mean_fpr, fpr, tpr)
+        interp_tpr[0] = 0.0
+        self.tprs.append(interp_tpr)
+        self.aucs.append(auc)
+        # return auc
+        # EER
+        fnr = 1 - tpr
+        eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+        self.eers.append(eer)
+        return auc, eer
+    def _update_acc(self, lab, output):
+        _, prediction = torch.max(output, 1)    # argmax
+        correct = (prediction == lab).sum().item()
+        accuracy = correct / prediction.size(0)
+        # self.accs.append(accuracy)
+        self.correct = self.correct+correct
+        self.total = self.total+lab.size(0)
+        return accuracy
+    def _update_ap(self, label, prob):
+        y_true = label.cpu().detach().numpy()
+        y_pred = prob.cpu().detach().numpy()
+        ap = metrics.average_precision_score(y_true,y_pred)
+        self.aps.append(ap)
+        return np.mean(ap)
+    def get_mean_metrics(self):
+        mean_acc, std_acc = self.correct/self.total, 0
+        mean_auc, std_auc = self._mean_auc()
+        mean_err, std_err = np.mean(self.eers), np.std(self.eers)
+        mean_ap, std_ap = np.mean(self.aps), np.std(self.aps)
+        return {'acc':mean_acc, 'auc':mean_auc, 'eer':mean_err, 'ap':mean_ap}
+    def _mean_auc(self):
+        mean_tpr = np.mean(self.tprs, axis=0)
+        mean_tpr[-1] = 1.0
+        mean_auc = metrics.auc(self.mean_fpr, mean_tpr)
+        std_auc = np.std(self.aucs)
+        return mean_auc, std_auc
+    def clear(self):
+        self.tprs.clear()
+        self.aucs.clear()
+        # self.accs.clear()
+        self.correct=0
+        self.total=0
+        self.eers.clear()
+        self.aps.clear()
+        self.losses.clear()
+# ------------ compute average metrics of all data ---------------------
+class Metrics_all():
+    def __init__(self):
+        self.probs = []
+        self.labels = []
+        self.correct = 0
+        self.total = 0
+    def store(self, label, output):
+        prob = torch.softmax(output, dim=1)[:, 1]
+        _, prediction = torch.max(output, 1)    # argmax
+        correct = (prediction == label).sum().item()
+        self.correct += correct
+        self.total += label.size(0)
+        self.labels.append(label.squeeze().cpu().numpy())
+        self.probs.append(prob.squeeze().cpu().numpy())
+    def get_metrics(self):
+        y_pred = np.concatenate(self.probs)
+        y_true = np.concatenate(self.labels)
+        # auc
+        fpr, tpr, thresholds = metrics.roc_curve(y_true,y_pred,pos_label=1)
+        auc = metrics.auc(fpr, tpr)
+        # eer
+        fnr = 1 - tpr
+        eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+        # ap
+        ap = metrics.average_precision_score(y_true,y_pred)
+        # acc
+        acc = self.correct / self.total
+        return {'acc':acc, 'auc':auc, 'eer':eer, 'ap':ap}
+    def clear(self):
+        self.probs.clear()
+        self.labels.clear()
+        self.correct = 0
+        self.total = 0
+# only used to record a series of scalar value
+class Recorder:
+    def __init__(self):
+        self.sum = 0
+        self.num = 0
+    def update(self, item, num=1):
+        if item is not None:
+            self.sum += item * num
+            self.num += num
+    def average(self):
+        if self.num == 0:
+            return None
+        return self.sum/self.num
+    def clear(self):
+        self.sum = 0
+        self.num = 0

metrics/registry.py ADDED Viewed

	@@ -0,0 +1,19 @@

+class Registry(object):
+    def __init__(self):
+        self.data = {}
+    def register_module(self, module_name=None):
+        def _register(cls):
+            name = module_name
+            if module_name is None:
+                name = cls.__name__
+            self.data[name] = cls
+            return cls
+        return _register
+    def __getitem__(self, key):
+        return self.data[key]
+DETECTOR = Registry()
+TRAINER  = Registry()
+LOSSFUNC = Registry()

metrics/utils.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from sklearn import metrics
+import numpy as np
+def parse_metric_for_print(metric_dict):
+    if metric_dict is None:
+        return "\n"
+    str = "\n"
+    str += "================================ Each dataset best metric ================================ \n"
+    for key, value in metric_dict.items():
+        if key != 'avg':
+            str= str+ f"| {key}: "
+            for k,v in value.items():
+                str = str + f" {k}={v} "
+            str= str+ "| \n"
+        else:
+            str += "============================================================================================= \n"
+            str += "================================== Average best metric ====================================== \n"
+            avg_dict = value
+            for avg_key, avg_value in avg_dict.items():
+                if avg_key == 'dataset_dict':
+                    for key,value in avg_value.items():
+                        str = str + f"| {key}: {value} | \n"
+                else:
+                    str = str + f"| avg {avg_key}: {avg_value} | \n"
+    str += "============================================================================================="
+    return str
+def get_test_metrics(y_pred, y_true, img_names):
+    def get_video_metrics(image, pred, label):
+        result_dict = {}
+        new_label = []
+        new_pred = []
+        # print(image[0])
+        # print(pred.shape)
+        # print(label.shape)
+        for item in np.transpose(np.stack((image, pred, label)), (1, 0)):
+            s = item[0]
+            if '\\' in s:
+                parts = s.split('\\')
+            else:
+                parts = s.split('/')
+            a = parts[-2]
+            b = parts[-1]
+            if a not in result_dict:
+                result_dict[a] = []
+            result_dict[a].append(item)
+        image_arr = list(result_dict.values())
+        for video in image_arr:
+            pred_sum = 0
+            label_sum = 0
+            leng = 0
+            for frame in video:
+                pred_sum += float(frame[1])
+                label_sum += int(frame[2])
+                leng += 1
+            new_pred.append(pred_sum / leng)
+            new_label.append(int(label_sum / leng))
+        fpr, tpr, thresholds = metrics.roc_curve(new_label, new_pred)
+        v_auc = metrics.auc(fpr, tpr)
+        fnr = 1 - tpr
+        v_eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+        return v_auc, v_eer
+    y_pred = y_pred.squeeze()
+    # For UCF, where labels for different manipulations are not consistent.
+    y_true[y_true >= 1] = 1
+    # auc
+    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1)
+    auc = metrics.auc(fpr, tpr)
+    # eer
+    fnr = 1 - tpr
+    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
+    # ap
+    ap = metrics.average_precision_score(y_true, y_pred)
+    # acc
+    prediction_class = (y_pred > 0.5).astype(int)
+    correct = (prediction_class == np.clip(y_true, a_min=0, a_max=1)).sum().item()
+    acc = correct / len(prediction_class)
+    if type(img_names[0]) is not list:
+        # calculate video-level auc for the frame-level methods.
+        v_auc, _ = get_video_metrics(img_names, y_pred, y_true)
+    else:
+        # video-level methods
+        v_auc=auc
+    return {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap, 'pred': y_pred, 'video_auc': v_auc, 'label': y_true}

models/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+#-*- coding: utf-8 -*-
+from .builder import MODELS, build_model
+from .networks.arcface import (
+    SimpleClassificationDF,
+)
+from .networks.mrsa_resnet import (
+    PoseResNet, resnet_spec, Bottleneck
+)
+from .networks.pose_hrnet import (
+    PoseHighResolutionNet
+)
+from .networks.xception import (
+    Xception
+)
+from.networks.pose_efficientNet import (
+    PoseEfficientNet
+)
+from .networks.common import *
+from .utils import (
+    load_pretrained, freeze_backbone,
+    load_model, save_model, unfreeze_backbone,
+    preset_model,
+)
+__all__=['SimpleClassificationDF', 'PoseResNet', 'MODELS', 'build_model',
+         'load_pretrained', 'freeze_backbone', 'resnet_spec',
+         'load_model', 'save_model', 'unfreeze_backbone', 'Bottleneck',
+         'preset_model', 'PoseHighResolutionNet', 'Xception', 'PoseEfficientNet']

models/builder.py ADDED Viewed

	@@ -0,0 +1,45 @@

+#-*- coding: utf-8 -*-
+from typing import Dict, Any, Optional
+import os
+import sys
+if not os.getcwd() in sys.path:
+    sys.path.append(os.getcwd())
+from torch.nn import Sequential
+from register.register import Registry, build_from_cfg
+def build_model_from_cfg(cfg, registry, default_args=None):
+    """Build a PyTorch model from config dict(s). Different from
+    ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built.
+    Args:
+        cfg (dict, list[dict]): The config of modules, is is either a config
+            dict or a list of config dicts. If cfg is a list, a
+            the built modules will be wrapped with ``nn.Sequential``.
+        registry (:obj:`Registry`): A registry the module belongs to.
+        default_args (dict, optional): Default arguments to build the module.
+            Defaults to None.
+    Returns:
+        nn.Module: A built nn module.
+    """
+    if isinstance(cfg, list):
+        modules = [
+            build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
+        ]
+        return Sequential(*modules)
+    else:
+        return build_from_cfg(cfg, registry, default_args)
+MODELS = Registry('model', build_func=build_model_from_cfg)
+HEADS = MODELS
+BACKBONES = MODELS
+def build_model(cfg: Dict,
+                model: Registry,
+                build_func=build_model_from_cfg,
+                default_args: Optional[Dict] = None) -> Any:
+    return build_func(cfg, model, default_args)

models/networks/arcface.py ADDED Viewed

	@@ -0,0 +1,384 @@

+#-*- coding: utf-8 -*-
+import os
+import math
+from collections import namedtuple
+from torch.nn import (Linear, Conv2d, BatchNorm1d, Softmax,
+                      BatchNorm2d, PReLU, ReLU, Sigmoid,
+                      Dropout2d, Dropout, AvgPool2d, MaxPool2d,
+                      AdaptiveAvgPool2d, Sequential, Module, Parameter)
+import torch.nn.functional as F
+import torch
+from ..builder import (
+    MODELS, HEADS, BACKBONES,
+    build_model,
+)
+##################################  Original Arcface Model #############################################################
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input,axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels, channels // reduction, kernel_size=1, padding=0 ,bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction, channels, kernel_size=1, padding=0 ,bias=False)
+        self.sigmoid = Sigmoid()
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+class bottleneck_IR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride ,bias=False), BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1 ,bias=False), PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1 ,bias=False), BatchNorm2d(depth))
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride ,bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3,3), (1,1),1 ,bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3,3), stride, 1 ,bias=False),
+            BatchNorm2d(depth),
+            SEModule(depth,16)
+            )
+    def forward(self,x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    '''A named tuple describing a ResNet block.'''
+def get_block(in_channel, depth, num_units, stride = 2):
+  return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units-1)]
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units = 3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    return blocks
+@BACKBONES.register_module()
+class ResNet(Module):
+    def __init__(self, num_layers=50, drop_ratio=0.6, mode='ir', **kwargs):
+        """
+        Implementation for ResNet 50, 101, 152 with/out SE module
+        """
+        super(ResNet, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1 ,bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_layer = Sequential(BatchNorm2d(512),
+                                       Dropout(drop_ratio),
+                                       Flatten(),
+                                       Linear(512 * 7 * 7, 512),
+                                       BatchNorm1d(512))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel,
+                                bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self,x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        x = l2_norm(x)
+        return x
+@HEADS.register_module()
+class SimpleClassificationHead(Module):
+    def __init__(self, drop_ratio=0.6, in_planes=512, **kwargs):
+        super(SimpleClassificationHead, self).__init__()
+        self.classification_head = Sequential(Dropout(drop_ratio),
+                                              Linear(in_planes, 256),
+                                              BatchNorm1d(256),
+                                              Dropout(drop_ratio),
+                                              Linear(256, 128),
+                                              BatchNorm1d(128),
+                                              Dropout(drop_ratio),
+                                              Linear(128, 64),
+                                              BatchNorm1d(64),
+                                              Dropout(drop_ratio),
+                                              Linear(64, 32),
+                                              BatchNorm1d(32),
+                                            #   Dropout(drop_ratio),
+                                              Linear(32, 1),
+                                              Sigmoid())
+    def forward(self, x):
+        x = self.classification_head(x)
+        return x
+@MODELS.register_module()
+class SimpleClassificationDF(Module):
+    def __init__(self, cfg: dict, **kwargs):
+        super(SimpleClassificationDF, self).__init__()
+        assert 'backbone' in cfg, 'Config for Backbones is mandatory!'
+        assert 'head' in cfg, 'Config for Heads is mandatory!'
+        self.backbone = BACKBONES.get(cfg.backbone.type)(**cfg.backbone)
+        self.head = HEADS.get(cfg.head.type)(**cfg.head)
+        self.model = Sequential(*[self.backbone,
+                                  self.head])
+    def forward(self, x):
+        x = self.model(x)
+        return x
+##################################  MobileFaceNet #############################################################
+class Conv_block(Module):
+    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
+        super(Conv_block, self).__init__()
+        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
+        self.bn = BatchNorm2d(out_c)
+        self.prelu = PReLU(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.prelu(x)
+        return x
+class Linear_block(Module):
+    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
+        super(Linear_block, self).__init__()
+        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
+        self.bn = BatchNorm2d(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class Depth_Wise(Module):
+     def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
+        super(Depth_Wise, self).__init__()
+        self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
+        self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
+        self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
+        self.residual = residual
+     def forward(self, x):
+        if self.residual:
+            short_cut = x
+        x = self.conv(x)
+        x = self.conv_dw(x)
+        x = self.project(x)
+        if self.residual:
+            output = short_cut + x
+        else:
+            output = x
+        return output
+class Residual(Module):
+    def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
+        super(Residual, self).__init__()
+        modules = []
+        for _ in range(num_block):
+            modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
+        self.model = Sequential(*modules)
+    def forward(self, x):
+        return self.model(x)
+class MobileFaceNet(Module):
+    def __init__(self, embedding_size):
+        super(MobileFaceNet, self).__init__()
+        self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
+        self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
+        self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
+        self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
+        self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
+        self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
+        self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0))
+        self.conv_6_flatten = Flatten()
+        self.linear = Linear(512, embedding_size, bias=False)
+        self.bn = BatchNorm1d(embedding_size)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2_dw(out)
+        out = self.conv_23(out)
+        out = self.conv_3(out)
+        out = self.conv_34(out)
+        out = self.conv_4(out)
+        out = self.conv_45(out)
+        out = self.conv_5(out)
+        out = self.conv_6_sep(out)
+        out = self.conv_6_dw(out)
+        out = self.conv_6_flatten(out)
+        out = self.linear(out)
+        out = self.bn(out)
+        return l2_norm(out)
+##################################  Arcface head #############################################################
+class Arcface(Module):
+    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
+    def __init__(self, embedding_size=512, classnum=51332,  s=64., m=0.5):
+        super(Arcface, self).__init__()
+        self.classnum = classnum
+        self.kernel = Parameter(torch.Tensor(embedding_size,classnum))
+        # initial kernel
+        self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
+        self.m = m # the margin value, default is 0.5
+        self.s = s # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        self.mm = self.sin_m * m  # issue 1
+        self.threshold = math.cos(math.pi - m)
+    def forward(self, embbedings, label):
+        # weights norm
+        nB = len(embbedings)
+        kernel_norm = l2_norm(self.kernel,axis=0)
+        # cos(theta+m)
+        cos_theta = torch.mm(embbedings,kernel_norm)
+#         output = torch.mm(embbedings,kernel_norm)
+        cos_theta = cos_theta.clamp(-1,1) # for numerical stability
+        cos_theta_2 = torch.pow(cos_theta, 2)
+        sin_theta_2 = 1 - cos_theta_2
+        sin_theta = torch.sqrt(sin_theta_2)
+        cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m)
+        # this condition controls the theta+m should in range [0, pi]
+        #      0<=theta+m<=pi
+        #     -m<=theta<=pi-m
+        cond_v = cos_theta - self.threshold
+        cond_mask = cond_v <= 0
+        keep_val = (cos_theta - self.mm) # when theta not in [0,pi], use cosface instead
+        cos_theta_m[cond_mask] = keep_val[cond_mask]
+        output = cos_theta * 1.0 # a little bit hacky way to prevent in_place operation on cos_theta
+        idx_ = torch.arange(0, nB, dtype=torch.long)
+        output[idx_, label] = cos_theta_m[idx_, label]
+        output *= self.s # scale up in order to make softmax work, first introduced in normface
+        return output
+##################################  Cosface head #############################################################
+class Am_softmax(Module):
+    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
+    def __init__(self,embedding_size=512,classnum=51332):
+        super(Am_softmax, self).__init__()
+        self.classnum = classnum
+        self.kernel = Parameter(torch.Tensor(embedding_size,classnum))
+        # initial kernel
+        self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5)
+        self.m = 0.35 # additive margin recommended by the paper
+        self.s = 30. # see normface https://arxiv.org/abs/1704.06369
+    def forward(self,embbedings,label):
+        kernel_norm = l2_norm(self.kernel,axis=0)
+        cos_theta = torch.mm(embbedings,kernel_norm)
+        cos_theta = cos_theta.clamp(-1,1) # for numerical stability
+        phi = cos_theta - self.m
+        label = label.view(-1,1) #size=(B,1)
+        index = cos_theta.data * 0.0 #size=(B,Classnum)
+        index.scatter_(1,label.data.view(-1,1),1)
+        index = index.byte()
+        output = cos_theta * 1.0
+        output[index] = phi[index] #only change the correct predicted output
+        output *= self.s # scale up in order to make softmax work, first introduced in normface
+        return output
+if __name__ == "__main__":
+    cfg = dict(num_layers=50, drop_ratio=0.6, mode='ir', type='Backbone')
+    backbone = MODELS.build(cfg)
+    print(backbone)

models/networks/common.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#-*- coding: utf-8 -*-
+import torch
+import torch.nn as nn
+BN_MOMENTUM = 0.1
+def point_wise_block(inplanes, outplanes):
+    return nn.Sequential(
+        nn.Conv2d(in_channels=inplanes, out_channels=outplanes, kernel_size=1, padding=0, stride=1, bias=False),
+        nn.BatchNorm2d(outplanes, momentum=BN_MOMENTUM),
+        nn.ReLU(inplace=True),
+    )
+def conv_block(inplanes, outplanes, kernel_size, stride=1, padding=0):
+    return nn.Sequential(
+        nn.Conv2d(in_channels=inplanes, out_channels=outplanes, kernel_size=kernel_size, padding=padding, stride=stride, bias=False),
+        nn.BatchNorm2d(outplanes, momentum=BN_MOMENTUM),
+        nn.ReLU(inplace=True)
+    )
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class InceptionBlock(nn.Module):
+    def __init__(self, inplanes, outplanes, stride=1, pool_size=3):
+        self.inplanes = inplanes
+        self.outplanes = outplanes
+        self.stride = stride
+        self.pool_size = pool_size
+        super(InceptionBlock, self).__init__()
+        self.pw_block = point_wise_block(self.inplanes, self.outplanes//4)
+        self.mp_layer = nn.MaxPool2d(kernel_size=self.pool_size, stride=stride, padding=1)
+        self.conv3_block = conv_block(self.outplanes//4, self.outplanes//4, kernel_size=3, stride=1, padding=1)
+        self.conv5_block = conv_block(self.outplanes//4, self.outplanes//4, kernel_size=5, stride=1, padding=2)
+    def forward(self, x):
+        x1 = self.pw_block(x)
+        x2 = self.pw_block(x)
+        x2 = self.conv3_block(x2)
+        x3 = self.pw_block(x)
+        x3 = self.conv5_block(x3)
+        x4 = self.mp_layer(x)
+        x4 = self.pw_block(x4)
+        x = torch.cat((x1, x2, x3, x4), dim=1)
+        return x
+class SELayer(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SELayer, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)

models/networks/efficientNet.py ADDED Viewed

	@@ -0,0 +1,490 @@

+#-*- coding: utf-8 -*-
+import math
+import re
+import collections
+from functools import partial
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from torch.utils import model_zoo
+# Parameters for the entire model (stem, all blocks, and head)
+GlobalParams = collections.namedtuple('GlobalParams', [
+    'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate',
+    'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon',
+    'drop_connect_rate', 'depth_divisor', 'min_depth', 'include_top',
+    'include_hm_decoder', 'head_conv', 'heads', 'num_layers', 'INIT_WEIGHTS',
+    'use_c2', 'use_c3', 'use_c4', 'use_c51', 'efpn', 'se_layer', 'tfpn'])
+# Parameters for an individual model block
+BlockArgs = collections.namedtuple('BlockArgs', [
+    'num_repeat', 'kernel_size', 'stride', 'expand_ratio',
+    'input_filters', 'output_filters', 'se_ratio', 'id_skip'])
+# Set GlobalParams and BlockArgs's defaults
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+# Swish activation function
+if hasattr(nn, 'SiLU'):
+    Swish = nn.SiLU
+else:
+    # For compatibility with old PyTorch versions
+    class Swish(nn.Module):
+        def forward(self, x):
+            return x * torch.sigmoid(x)
+def round_filters(filters, global_params):
+    """Calculate and round number of filters based on width multiplier.
+       Use width_coefficient, depth_divisor and min_depth of global_params.
+    Args:
+        filters (int): Filters number to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new_filters: New filters number after calculating.
+    """
+    multiplier = global_params.width_coefficient
+    if not multiplier:
+        return filters
+    # TODO: modify the params names.
+    #       maybe the names (width_divisor,min_width)
+    #       are more suitable than (depth_divisor,min_depth).
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    filters *= multiplier
+    min_depth = min_depth or divisor  # pay attention to this line when using min_depth
+    # follow the formula transferred from official TensorFlow implementation
+    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+def round_repeats(repeats, global_params):
+    """Calculate module's repeat number of a block based on depth multiplier.
+       Use depth_coefficient of global_params.
+    Args:
+        repeats (int): num_repeat to be calculated.
+        global_params (namedtuple): Global params of the model.
+    Returns:
+        new repeat: New repeat number after calculating.
+    """
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    # follow the formula transferred from official TensorFlow implementation
+    return int(math.ceil(multiplier * repeats))
+def drop_connect(inputs, p, training):
+    """Drop connect.
+    Args:
+        input (tensor: BCWH): Input of this structure.
+        p (float: 0.0~1.0): Probability of drop connection.
+        training (bool): The running mode.
+    Returns:
+        output: Output after drop connection.
+    """
+    assert 0 <= p <= 1, 'p must be in range of [0,1]'
+    if not training:
+        return inputs
+    batch_size = inputs.shape[0]
+    keep_prob = 1 - p
+    # generate binary_tensor mask according to probability (p for 0, 1-p for 1)
+    random_tensor = keep_prob
+    random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
+    binary_tensor = torch.floor(random_tensor)
+    output = inputs / keep_prob * binary_tensor
+    return output
+def get_same_padding_conv2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+       Static padding is necessary for ONNX exporting of models.
+    Args:
+        image_size (int or tuple): Size of the image.
+    Returns:
+        Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
+    """
+    if image_size is None:
+        return Conv2dDynamicSamePadding
+    else:
+        return partial(Conv2dStaticSamePadding, image_size=image_size)
+class Conv2dDynamicSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow, for a dynamic image size.
+       The padding is operated in forward function by calculating dynamically.
+    """
+    # Tips for 'SAME' mode padding.
+    #     Given the following:
+    #         i: width or height
+    #         s: stride
+    #         k: kernel size
+    #         d: dilation
+    #         p: padding
+    #     Output after Conv2d:
+    #         o = floor((i+p-((k-1)*d+1))/s+1)
+    # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1),
+    # => p = (i-1)*s+((k-1)*d+1)-i
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
+        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+    def forward(self, x):
+        ih, iw = x.size()[-2:]
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)  # change the output size according to stride ! ! !
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
+        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+class Conv2dStaticSamePadding(nn.Conv2d):
+    """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size.
+       The padding mudule is calculated in construction function, then used in forward.
+    """
+    # With the same calculation as Conv2dDynamicSamePadding
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs):
+        super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs)
+        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size
+        kh, kw = self.weight.size()[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
+        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2,
+                                                pad_h // 2, pad_h - pad_h // 2))
+        else:
+            self.static_padding = nn.Identity()
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+        return x
+def get_model_params(model_name, override_params):
+    """Get the block args and global params for a given model name.
+    Args:
+        model_name (str): Model's name.
+        override_params (dict): A dict to modify global_params.
+    Returns:
+        blocks_args, global_params
+    """
+    if model_name.startswith('efficientnet'):
+        w, d, s, p = efficientnet_params(model_name)
+        # note: all models have drop connect rate = 0.2
+        blocks_args, global_params = efficientnet(
+            width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
+    else:
+        raise NotImplementedError('model name is not pre-defined: {}'.format(model_name))
+    if override_params:
+        # ValueError will be raised here if override_params has fields not included in global_params.
+        global_params = global_params._replace(**override_params)
+    return blocks_args, global_params
+def efficientnet_params(model_name):
+    """Map EfficientNet model name to parameter coefficients.
+    Args:
+        model_name (str): Model name to be queried.
+    Returns:
+        params_dict[model_name]: A (width,depth,res,dropout) tuple.
+    """
+    params_dict = {
+        # Coefficients:   width,depth,res,dropout
+        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+    }
+    return params_dict[model_name]
+def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None,
+                 dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000,
+                 include_top=True, include_hm_decoder=False, head_conv=None,
+                 heads=None, use_c2=False, use_c3=False, use_c4=False, use_c51=False,
+                 num_layers=None, INIT_WEIGHTS=None, efpn=False, se_layer=False, tfpn=False):
+    """Create BlockArgs and GlobalParams for efficientnet model.
+    Args:
+        width_coefficient (float)
+        depth_coefficient (float)
+        image_size (int)
+        dropout_rate (float)
+        drop_connect_rate (float)
+        num_classes (int)
+        Meaning as the name suggests.
+    Returns:
+        blocks_args, global_params.
+    """
+    # Blocks args for the whole model(efficientnet-b0 by default)
+    # It will be modified in the construction of EfficientNet Class according to model
+    blocks_args = [
+        'r1_k3_s11_e1_i32_o16_se0.25',
+        'r2_k3_s22_e6_i16_o24_se0.25',
+        'r2_k5_s22_e6_i24_o40_se0.25',
+        'r3_k3_s22_e6_i40_o80_se0.25',
+        'r3_k5_s11_e6_i80_o112_se0.25',
+        'r4_k5_s22_e6_i112_o192_se0.25',
+        'r1_k3_s11_e6_i192_o320_se0.25',
+    ]
+    blocks_args = BlockDecoder.decode(blocks_args)
+    global_params = GlobalParams(
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        image_size=image_size,
+        dropout_rate=dropout_rate,
+        num_classes=num_classes,
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        drop_connect_rate=drop_connect_rate,
+        depth_divisor=8,
+        min_depth=None,
+        include_top=include_top,
+        include_hm_decoder=include_hm_decoder,
+        head_conv=head_conv,
+        heads=heads,
+        use_c2=use_c2,
+        use_c3=use_c3,
+        use_c4=use_c4,
+        use_c51=use_c51,
+        efpn=efpn,
+        tfpn=tfpn,
+        se_layer=se_layer,
+        num_layers=num_layers,
+        INIT_WEIGHTS=INIT_WEIGHTS
+    )
+    return blocks_args, global_params
+class BlockDecoder(object):
+    """Block Decoder for readability,
+       straight from the official TensorFlow repository.
+    """
+    @staticmethod
+    def _decode_block_string(block_string):
+        """Get a block through a string notation of arguments.
+        Args:
+            block_string (str): A string notation of arguments.
+                                Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'.
+        Returns:
+            BlockArgs: The namedtuple defined at the top of this file.
+        """
+        assert isinstance(block_string, str)
+        ops = block_string.split('_')
+        options = {}
+        for op in ops:
+            splits = re.split(r'(\d.*)', op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+        # Check stride
+        assert (('s' in options and len(options['s']) == 1) or
+                (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
+        return BlockArgs(
+            num_repeat=int(options['r']),
+            kernel_size=int(options['k']),
+            stride=[int(options['s'][0])],
+            expand_ratio=int(options['e']),
+            input_filters=int(options['i']),
+            output_filters=int(options['o']),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            id_skip=('noskip' not in block_string))
+    @staticmethod
+    def _encode_block_string(block):
+        """Encode a block to a string.
+        Args:
+            block (namedtuple): A BlockArgs type argument.
+        Returns:
+            block_string: A String form of BlockArgs.
+        """
+        args = [
+            'r%d' % block.num_repeat,
+            'k%d' % block.kernel_size,
+            's%d%d' % (block.strides[0], block.strides[1]),
+            'e%s' % block.expand_ratio,
+            'i%d' % block.input_filters,
+            'o%d' % block.output_filters
+        ]
+        if 0 < block.se_ratio <= 1:
+            args.append('se%s' % block.se_ratio)
+        if block.id_skip is False:
+            args.append('noskip')
+        return '_'.join(args)
+    @staticmethod
+    def decode(string_list):
+        """Decode a list of string notations to specify blocks inside the network.
+        Args:
+            string_list (list[str]): A list of strings, each string is a notation of block.
+        Returns:
+            blocks_args: A list of BlockArgs namedtuples of block args.
+        """
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
+        return blocks_args
+    @staticmethod
+    def encode(blocks_args):
+        """Encode a list of BlockArgs to a list of strings.
+        Args:
+            blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args.
+        Returns:
+            block_strings: A list of strings, each string is a notation of block.
+        """
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(BlockDecoder._encode_block_string(block))
+        return block_strings
+class SwishImplementation(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, i):
+        result = i * torch.sigmoid(i)
+        ctx.save_for_backward(i)
+        return result
+    @staticmethod
+    def backward(ctx, grad_output):
+        i = ctx.saved_tensors[0]
+        sigmoid_i = torch.sigmoid(i)
+        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
+def get_width_and_height_from_size(x):
+    """Obtain height and width from x.
+    Args:
+        x (int, tuple or list): Data size.
+    Returns:
+        size: A tuple or list (H,W).
+    """
+    if isinstance(x, int):
+        return x, x
+    if isinstance(x, list) or isinstance(x, tuple):
+        return x
+    else:
+        raise TypeError()
+def calculate_output_image_size(input_image_size, stride):
+    """Calculates the output image size when using Conv2dSamePadding with a stride.
+       Necessary for static padding. Thanks to mannatsingh for pointing this out.
+    Args:
+        input_image_size (int, tuple or list): Size of input image.
+        stride (int, tuple or list): Conv2d operation's stride.
+    Returns:
+        output_image_size: A list [H,W].
+    """
+    if input_image_size is None:
+        return None
+    image_height, image_width = get_width_and_height_from_size(input_image_size)
+    stride = stride if isinstance(stride, int) else stride[0]
+    image_height = int(math.ceil(image_height / stride))
+    image_width = int(math.ceil(image_width / stride))
+    return [image_height, image_width]
+class MemoryEfficientSwish(nn.Module):
+    def forward(self, x):
+        return SwishImplementation.apply(x)
+url_map_advprop = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth',
+    'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth',
+}
+url_map = {
+    'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth',
+    'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth',
+    'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth',
+    'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth',
+    'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth',
+    'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth',
+    'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth',
+    'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth',
+}
+def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True):
+    """Loads pretrained weights from weights path or download using url.
+    Args:
+        model (Module): The whole model of efficientnet.
+        model_name (str): Model name of efficientnet.
+        weights_path (None or str):
+            str: path to pretrained weights file on the local disk.
+            None: use pretrained weights downloaded from the Internet.
+        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
+        advprop (bool): Whether to load pretrained weights
+                        trained with advprop (valid when weights_path is None).
+    """
+    if isinstance(weights_path, str):
+        state_dict = torch.load(weights_path)
+    else:
+        # AutoAugment or Advprop (different preprocessing)
+        url_map_ = url_map_advprop if advprop else url_map
+        state_dict = model_zoo.load_url(url_map_[model_name])
+    if load_fc:
+        ret = model.load_state_dict(state_dict, strict=False)
+        assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    else:
+        state_dict.pop('_fc.weight')
+        state_dict.pop('_fc.bias')
+        ret = model.load_state_dict(state_dict, strict=False)
+        # if len(ret.missing_keys):
+        #     assert set(ret.missing_keys) == set(
+        #         ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys)
+    assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys)
+    if verbose:
+        print('Loaded pretrained weights for {}'.format(model_name))

models/networks/mrsa_resnet.py ADDED Viewed

	@@ -0,0 +1,464 @@

+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import math
+import torch
+import torch.nn as nn
+from torch.nn.modules.activation import ReLU
+from torch.nn.modules.batchnorm import BatchNorm2d
+from torch.nn.modules.pooling import MaxPool2d
+import torch.utils.model_zoo as model_zoo
+from ..builder import MODELS, build_model
+from .common import (
+    BN_MOMENTUM,
+    conv_block,
+    point_wise_block,
+    InceptionBlock,
+)
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+    @staticmethod
+    def __repr__():
+        return 'BasicBlock'
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+    @staticmethod
+    def __repr__():
+        return 'Bottleneck'
+@MODELS.register_module()
+class PoseResNet(nn.Module):
+    def __init__(self,
+                 block,
+                 layers,
+                 heads,
+                 head_conv,
+                 dropout_prob,
+                 fpn=False,
+                 cls_based_hm=True,
+                 use_c2=False,
+                 **kwargs):
+        self.inplanes = 64
+        self.deconv_with_bias = False
+        self.heads = heads
+        self.fpn = fpn
+        self.cls_based_hm= cls_based_hm
+        self.use_c2 = use_c2
+        #Convert Cls name into Cls Object
+        if isinstance(block, str):
+            for bl in [BasicBlock, Bottleneck]:
+                if block == bl.__repr__():
+                    block = bl
+        for k, v in kwargs.items():
+            if v is None:
+                raise ValueError(f'The {k} argument receive a None value, Please check!')
+            self.__setattr__(k, v)
+        super(PoseResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        # Custom dropout layer
+        self.dropout_layer = nn.Dropout(dropout_prob)
+        if self.fpn:
+            # Adding sidmoid layer
+            self.sigmoid_layer = nn.Sigmoid()
+            # Adding pointwise block
+            self.pw_block_1 = self._point_wise_block(2048, 1024)
+        # used for deconv layers
+        deconv_filters = [256, 128, 256] if self.fpn else [256, 256, 256]
+        self.deconv_layers = self._make_deconv_layer(
+            3,
+            deconv_filters,
+            [4, 4, 4],
+        )
+        # Adding inception block
+        if self.fpn:
+            for idx, deconv_layer in enumerate(self.deconv_layers):
+                self.__setattr__(f'deconv_layer_{idx}', nn.Sequential(deconv_layer))
+            self.pw_block_2 = self._point_wise_block(512, 512)
+            if self.use_c2:
+                self.pw_block_3 = self._point_wise_block(512, 256)
+            self.pw_block_c3 = self._point_wise_block(1024, 256)
+            self.pw_block_c2 = self._point_wise_block(512, 128)
+            self.inception_block = InceptionBlock(256, 256, stride=1, pool_size=3)
+        for head in sorted(self.heads):
+            num_output = self.heads[head]
+            if head_conv > 0:
+                if head != 'cls':
+                    fc = nn.Sequential(
+                            nn.Conv2d(256, head_conv,
+                            kernel_size=3, padding=1, bias=True),
+                            nn.BatchNorm2d(head_conv),
+                            nn.ReLU(inplace=True),
+                            nn.Conv2d(head_conv, num_output,
+                            kernel_size=1, stride=1, padding=0)
+                        )
+                else:
+                    if self.cls_based_hm:
+                        fc = nn.Sequential(
+                                nn.AdaptiveMaxPool2d(head_conv//4),
+                                nn.Flatten(),
+                                nn.Linear(num_output*((head_conv//4)**2), head_conv, bias=True),
+                                nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM),
+                                nn.ReLU(inplace=True),
+                                nn.Linear(head_conv, 1, bias=True),
+                                nn.Sigmoid()
+                            )
+                    else:
+                        fc = nn.Sequential(
+                            nn.Conv2d(256, head_conv, kernel_size=3,
+                                      padding=1, bias=True),
+                            nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM),
+                            nn.ReLU(inplace=True),
+                            # nn.Conv2d(head_conv, num_output, kernel_size=1,
+                            #           stride=1, padding=0, bias=True),
+                            # nn.BatchNorm2d(num_output),
+                            # nn.ReLU(inplace=True),
+                            # nn.AdaptiveMaxPool2d(head_conv//4),
+                            nn.AdaptiveAvgPool2d(1),
+                            nn.Flatten(),
+                            # nn.Linear((head_conv//4)**2, head_conv, bias=True),
+                            # nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM),
+                            # nn.ReLU(inplace=True),
+                            nn.Linear(head_conv, 1, bias=True),
+                            # nn.Sigmoid()
+                        )
+            else:
+                fc = nn.Conv2d(
+                    in_channels=256,
+                    out_channels=num_output,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0
+                )
+            self.__setattr__(head, fc)
+    def _point_wise_block(self, inplanes, outplanes):
+        self.inplanes = outplanes
+        module = point_wise_block(inplanes, outplanes)
+        return module
+    def _conv_block(self, inplanes, outplanes, kernel_size, stride=1):
+        self.inplanes = outplanes
+        module = conv_block(inplanes, outplanes, kernel_size=kernel_size, stride=stride)
+        return module
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _get_deconv_cfg(self, deconv_kernel, index):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        return deconv_kernel, padding, output_padding
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        assert num_layers == len(num_filters), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i], i)
+            planes = num_filters[i]
+            layers.append(nn.Sequential(
+                nn.ConvTranspose2d(
+                    in_channels=self.inplanes,
+                    out_channels=planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=self.deconv_with_bias),
+                nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
+            )
+            if (not self.fpn):
+                layers.append(nn.ReLU(inplace=True))
+            self.inplanes = planes if not self.fpn else planes * 2
+        if self.fpn:
+            return layers
+        else:
+            return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x1 = self.layer1(x) #256 x 64 x 64
+        x2 = self.layer2(x1) #512 x 32 x 32
+        x3 = self.layer3(x2) #1024 x 16 x 16
+        x4 = self.layer4(x3) #2048 x 8 x 8
+        # Custom dropout layer
+        x = self.dropout_layer(x4) #B x 8 x 8 x 2048
+        x3 = self.dropout_layer(x3)
+        x2 = self.dropout_layer(x2)
+        x1 = self.dropout_layer(x1)
+        # Custom FPN
+        if self.fpn:
+            assert isinstance(self.deconv_layers, list), "To custom FPN, decompose deconv layers as a list!"
+            x = self.pw_block_1(x) # B x 1024 x 8 x 8
+            x = self.deconv_layer_0(x) # B x 256 x 16 x 16
+            # x = self.relu(x) # B x 256 x 16 x 16
+            x_weighted = self.sigmoid_layer(x) # B x 256 x 16 x 16
+            x_inverse = torch.sub(1, x_weighted, alpha=1) # B x 256 x 16 x 16
+            x3 = self.pw_block_c3(x3) #B x 256 x 16 x 16
+            x3_ = torch.multiply(x3, x_inverse) #B x 256 x 16 x 16
+            x = torch.cat((x, x3_), dim=1) #B x 512 x 16 x 16
+            x = self.pw_block_2(x) #B x 512 x 16 x 16
+            x = self.deconv_layer_1(x) #B x 128 x 32 x 32
+            # x = self.relu(x) #B x 128 x 32 x 32
+            x_weighted = self.sigmoid_layer(x) #B x 128 x 32 x 32
+            x_inverse = torch.sub(1, x_weighted, alpha=1) #B x 128 x 32 x 32
+            x2 = self.pw_block_c2(x2)
+            x2_ = torch.multiply(x2, x_inverse) #B x 128 x 32 x 32
+            x = torch.cat((x, x2_), dim=1) #B x 256 x 32 x 32
+            x = self.inception_block(x) #B x 256 x 64 x 64
+            x = self.deconv_layer_2(x) #B x 256 x 64 x 64
+            if self.use_c2:
+                x_weighted = self.sigmoid_layer(x)
+                x_inverse = torch.sub(1, x_weighted, alpha=1)
+                x1_ = torch.multiply(x1, x_inverse)
+                x = torch.cat((x, x1_), dim=1)
+                x = self.pw_block_3(x)
+            else:
+                x = self.relu(x) #B x 256 x 64 x 64
+        else:
+            assert isinstance(self.deconv_layers, nn.Module), "Deconv Layer must be nn Module to compute!"
+            x = self.deconv_layers(x)
+        ret = {}
+        x1_hm = None
+        for head in self.heads:
+            if self.cls_based_hm and head == 'cls' and x1_hm is not None:
+                x = x1_hm
+            elif head == 'hm':
+                x1_hm = x
+            ret[head] = self.__getattr__(head)(x)
+        return [ret]
+    def init_weights(self, pretrained=True, **kwargs):
+        num_layers = kwargs.get('num_layers')
+        if pretrained:
+            if self.fpn:
+                for bl in [self.pw_block_1, self.pw_block_2]:
+                    for _, l in bl.named_parameters():
+                        if isinstance(l, nn.Conv2d):
+                            nn.init.normal_(l.weight, std=0.001)
+                            nn.init.constant_(l.bias, 0)
+                for _, l in self.inception_block.named_parameters():
+                    if isinstance(l, nn.Conv2d):
+                        nn.init.normal_(l.weight, std=0.001)
+                        nn.init.constant_(l.bias, 0)
+            # print('=> init resnet deconv weights from normal distribution')
+            if isinstance(self.deconv_layers, nn.Module):
+                for _, m in self.deconv_layers.named_modules():
+                    if isinstance(m, nn.ConvTranspose2d):
+                        # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                        # print('=> init {}.bias as 0'.format(name))
+                        nn.init.normal_(m.weight, std=0.001)
+                        if self.deconv_with_bias:
+                            nn.init.constant_(m.bias, 0)
+                    elif isinstance(m, nn.BatchNorm2d):
+                        # print('=> init {}.weight as 1'.format(name))
+                        # print('=> init {}.bias as 0'.format(name))
+                        nn.init.constant_(m.weight, 1)
+                        nn.init.constant_(m.bias, 0)
+            else:
+                for layer in [self.deconv_layer_0, self.deconv_layer_1, self.deconv_layer_2]:
+                    for _, m in layer.named_modules():
+                        if isinstance(m, nn.ConvTranspose2d):
+                            # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                            # print('=> init {}.bias as 0'.format(name))
+                            nn.init.normal_(m.weight, std=0.001)
+                            if self.deconv_with_bias:
+                                nn.init.constant_(m.bias, 0)
+                        elif isinstance(m, nn.BatchNorm2d):
+                            # print('=> init {}.weight as 1'.format(name))
+                            # print('=> init {}.bias as 0'.format(name))
+                            nn.init.constant_(m.weight, 1)
+                            nn.init.constant_(m.bias, 0)
+            # print('=> init final conv weights from normal distribution')
+            for head in self.heads:
+                final_layer = self.__getattr__(head)
+                for i, m in enumerate(final_layer.modules()):
+                    if isinstance(m, nn.Conv2d):
+                        # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                        # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                        # print('=> init {}.bias as 0'.format(name))
+                        if m.weight.shape[0] == self.heads[head]:
+                            if 'hm' in head:
+                                nn.init.constant_(m.bias, -2.19)
+                            else:
+                                nn.init.normal_(m.weight, std=0.001)
+                                nn.init.constant_(m.bias, 0)
+                    # if isinstance(m, nn.Linear):
+                    #     if m.weight.shape[0] == self.heads[head]:
+                    #         prior = 1/71
+                    #         nn.init.constant_(m.bias, -math.log((1-prior)/prior))
+                    #     else:
+                    #         nn.init.normal_(m.weight, std=0.001)
+                    #         nn.init.constant_(m.bias, 0)
+            #pretrained_state_dict = torch.load(pretrained)
+            url = model_urls['resnet{}'.format(num_layers)]
+            pretrained_state_dict = model_zoo.load_url(url)
+            print('=> loading pretrained model {}'.format(url))
+            self.load_state_dict(pretrained_state_dict, strict=False)
+        else:
+            print('=> imagenet pretrained model dose not exist')
+            print('=> please download it first')
+            raise ValueError('imagenet pretrained model does not exist')
+resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
+               34: (BasicBlock, [3, 4, 6, 3]),
+               50: (Bottleneck, [3, 4, 6, 3]),
+               101: (Bottleneck, [3, 4, 23, 3]),
+               152: (Bottleneck, [3, 8, 36, 3])}

models/networks/pose_efficientNet.py ADDED Viewed

	@@ -0,0 +1,788 @@

+#-*- coding: utf-8 -*-
+import math
+import sys
+import os
+if not os.getcwd() in sys.path:
+    sys.path.append(os.getcwd())
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils import model_zoo
+from ..builder import MODELS, build_model
+from .efficientNet import (
+    round_filters,
+    round_repeats,
+    drop_connect,
+    get_same_padding_conv2d,
+    get_model_params,
+    efficientnet_params,
+    load_pretrained_weights,
+    Swish,
+    MemoryEfficientSwish,
+    calculate_output_image_size,
+    url_map_advprop,
+    url_map
+)
+from .common import (
+    InceptionBlock,
+    conv_block,
+    BN_MOMENTUM,
+    SELayer
+)
+VALID_MODELS = (
+    'efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3',
+    'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7',
+    'efficientnet-b8',
+    # Support the construction of 'efficientnet-l2' without pretrained weights
+    'efficientnet-l2'
+)
+class MBConvBlock(nn.Module):
+    """Mobile Inverted Residual Bottleneck Block.
+    Args:
+        block_args (namedtuple): BlockArgs, defined in utils.py.
+        global_params (namedtuple): GlobalParam, defined in utils.py.
+        image_size (tuple or list): [image_height, image_width].
+    References:
+        [1] https://arxiv.org/abs/1704.04861 (MobileNet v1)
+        [2] https://arxiv.org/abs/1801.04381 (MobileNet v2)
+        [3] https://arxiv.org/abs/1905.02244 (MobileNet v3)
+    """
+    def __init__(self, block_args, global_params, image_size=None):
+        super().__init__()
+        self._block_args = block_args
+        self._bn_mom = 1 - global_params.batch_norm_momentum  # pytorch's difference from tensorflow
+        self._bn_eps = global_params.batch_norm_epsilon
+        self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
+        self.id_skip = block_args.id_skip  # whether to use skip connection and drop connect
+        # Expansion phase (Inverted Bottleneck)
+        inp = self._block_args.input_filters  # number of input channels
+        oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
+        if self._block_args.expand_ratio != 1:
+            Conv2d = get_same_padding_conv2d(image_size=image_size)
+            self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
+            self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+            # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size
+        # Depthwise convolution phase
+        k = self._block_args.kernel_size
+        s = self._block_args.stride
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._depthwise_conv = Conv2d(
+            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
+            kernel_size=k, stride=s, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
+        image_size = calculate_output_image_size(image_size, s)
+        # Squeeze and Excitation layer, if desired
+        if self.has_se:
+            Conv2d = get_same_padding_conv2d(image_size=(1, 1))
+            num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
+            self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
+            self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
+        # Pointwise convolution phase
+        final_oup = self._block_args.output_filters
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
+        self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
+        self._swish = MemoryEfficientSwish()
+    def forward(self, inputs, drop_connect_rate=None):
+        """MBConvBlock's forward function.
+        Args:
+            inputs (tensor): Input tensor.
+            drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
+        Returns:
+            Output of this block after processing.
+        """
+        # Expansion and Depthwise Convolution
+        x = inputs
+        if self._block_args.expand_ratio != 1:
+            x = self._expand_conv(inputs)
+            x = self._bn0(x)
+            x = self._swish(x)
+        x = self._depthwise_conv(x)
+        x = self._bn1(x)
+        x = self._swish(x)
+        # Squeeze and Excitation
+        if self.has_se:
+            x_squeezed = F.adaptive_avg_pool2d(x, 1)
+            x_squeezed = self._se_reduce(x_squeezed)
+            x_squeezed = self._swish(x_squeezed)
+            x_squeezed = self._se_expand(x_squeezed)
+            x = torch.sigmoid(x_squeezed) * x
+        # Pointwise Convolution
+        x = self._project_conv(x)
+        x = self._bn2(x)
+        # Skip connection and drop connect
+        input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
+        if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
+            # The combination of skip connection and drop connect brings about stochastic depth.
+            if drop_connect_rate:
+                x = drop_connect(x, p=drop_connect_rate, training=self.training)
+            x = x + inputs  # skip connection
+        return x
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+@MODELS.register_module()
+class EfficientNet(nn.Module):
+    """EfficientNet model.
+       Most easily loaded with the .from_name or .from_pretrained methods.
+    Args:
+        blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks.
+        global_params (namedtuple): A set of GlobalParams shared between blocks.
+    References:
+        [1] https://arxiv.org/abs/1905.11946 (EfficientNet)
+    Example:
+        >>> import torch
+        >>> from efficientnet.model import EfficientNet
+        >>> inputs = torch.rand(1, 3, 224, 224)
+        >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+        >>> model.eval()
+        >>> outputs = model(inputs)
+    """
+    def __init__(self, blocks_args=None, global_params=None):
+        super().__init__()
+        assert isinstance(blocks_args, list), 'blocks_args should be a list'
+        assert len(blocks_args) > 0, 'block args must be greater than 0'
+        self._global_params = global_params
+        self._blocks_args = blocks_args
+        # Batch norm parameters
+        bn_mom = 1 - self._global_params.batch_norm_momentum
+        bn_eps = self._global_params.batch_norm_epsilon
+        # Get stem static or dynamic convolution depending on image size
+        image_size = global_params.image_size
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        # Stem
+        in_channels = 3  # rgb
+        out_channels = round_filters(32, self._global_params)  # number of output channels
+        self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        image_size = calculate_output_image_size(image_size, 2)
+        # Build blocks
+        self._blocks = nn.ModuleList([])
+        for block_args in self._blocks_args:
+            # Update block input and output filters based on depth multiplier.
+            block_args = block_args._replace(
+                input_filters=round_filters(block_args.input_filters, self._global_params),
+                output_filters=round_filters(block_args.output_filters, self._global_params),
+                num_repeat=round_repeats(block_args.num_repeat, self._global_params)
+            )
+            # The first block needs to take care of stride and filter size increase.
+            self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+            image_size = calculate_output_image_size(image_size, block_args.stride)
+            if block_args.num_repeat > 1:  # modify block_args to keep same output size
+                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
+            for _ in range(block_args.num_repeat - 1):
+                self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size))
+                # image_size = calculate_output_image_size(image_size, block_args.stride)  # stride = 1
+        # Head
+        in_channels = block_args.output_filters  # output of final block
+        out_channels = round_filters(1280, self._global_params)
+        Conv2d = get_same_padding_conv2d(image_size=image_size)
+        self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
+        # Final linear layer
+        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
+        if self._global_params.include_top:
+            self._dropout = nn.Dropout(self._global_params.dropout_rate)
+            self._fc = nn.Linear(out_channels, self._global_params.num_classes)
+        # Heatmap Decoder Construction
+        if self._global_params.include_hm_decoder:
+            print("Constructing the heatmap Decoder!")
+            self.efpn = self._global_params.efpn
+            self.tfpn = self._global_params.tfpn
+            assert not (self.efpn and self.tfpn), "Only one of E-FPN or FPN is intergrated!"
+            self.se_layer = self._global_params.se_layer
+            # self.hm_decoder_filters = [1792, 448, 160, 56] if self.fpn else [1792, 256, 256, 128]
+            self.hm_decoder_filters = [1792, 448, 160, 56]
+            num_kernels = [4, 4, 4, 4] if (self.efpn or self.tfpn) else [4, 4, 4]
+            self._dropout = nn.Dropout(self._global_params.dropout_rate)
+            self._sigmoid = nn.Sigmoid()
+            self._relu = nn.ReLU(inplace=True)
+            self._relu1 = nn.ReLU(inplace=False)
+            self.deconv_with_bias = False
+            if self._global_params.use_c3:
+                self.inception_block = InceptionBlock(112, 112, stride=1, pool_size=3)
+            else:
+                self.inception_block = InceptionBlock(56, 56, stride=1, pool_size=3)
+            self.heads = self._global_params.heads
+            n_deconv = len(self.hm_decoder_filters)
+            self.fpn_layers = [self._global_params.use_c51, self._global_params.use_c4, self._global_params.use_c3]
+            if self.efpn or self.tfpn:
+                for idx in range(n_deconv):
+                    in_decod_filters = self.hm_decoder_filters[idx]
+                    if idx == 0:
+                        out_decod_filters = self.hm_decoder_filters[idx+1]
+                        deconv = nn.Sequential(
+                            conv_block(in_decod_filters, out_decod_filters, (3,3), stride=1, padding=1),
+                        )
+                    else:
+                        in_decod_filters = in_decod_filters*2 if self.fpn_layers[idx-1] else in_decod_filters
+                        kernel, padding, output_padding = self._get_deconv_cfg(num_kernels[idx])
+                        if idx+1 < n_deconv:
+                            out_decod_filters = self.hm_decoder_filters[idx+1]
+                            deconv = nn.Sequential(
+                                conv_block(in_decod_filters, out_decod_filters, (3,3), stride=1, padding=1),
+                                nn.ConvTranspose2d(
+                                    in_channels=out_decod_filters,
+                                    out_channels=out_decod_filters,
+                                    kernel_size=kernel,
+                                    stride=2,
+                                    padding=padding,
+                                    output_padding=output_padding,
+                                    bias=self.deconv_with_bias),
+                                nn.BatchNorm2d(out_decod_filters, momentum=BN_MOMENTUM),
+                            )
+                        else:
+                            out_decod_filters = in_decod_filters
+                            deconv = nn.Sequential(
+                                self.inception_block,
+                                nn.ConvTranspose2d(
+                                    in_channels=out_decod_filters,
+                                    out_channels=out_decod_filters,
+                                    kernel_size=kernel,
+                                    stride=2,
+                                    padding=padding,
+                                    output_padding=output_padding,
+                                    bias=self.deconv_with_bias),
+                                nn.BatchNorm2d(out_decod_filters, momentum=BN_MOMENTUM),
+                            )
+                            # In case of using C2, this conv to apply to C2 features to get the same filters of the last deconv
+                            if self._global_params.use_c2:
+                                self.conv_c2 = conv_block(32, out_decod_filters, (3,3), stride=1, padding=1)
+                    if self.se_layer:
+                        se = SELayer(channel=out_decod_filters*2)
+                        self.__setattr__(f'se_layer_{idx+1}', se)
+                    self.__setattr__(f'deconv_{idx+1}', deconv)
+            else:
+                self.deconv_layers = self._make_deconv_layer(
+                    len(num_kernels),
+                    self.hm_decoder_filters,
+                    num_kernels,
+                )
+            for head, num_output in self.heads.items():
+                head_conv = int(self._global_params.head_conv)
+                num_output = int(num_output)
+                if self._global_params.use_c2:
+                    assert self._global_params.efpn or self._global_params.tfpn, "FPN Design must be set active!"
+                    assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2"
+                    in_head_filters = self.hm_decoder_filters[-1]*4
+                elif self._global_params.use_c3:
+                    in_head_filters = self.hm_decoder_filters[-1]*2
+                else:
+                    in_head_filters = self.hm_decoder_filters[-1]
+                if head_conv > 0:
+                    if head != 'cls':
+                        fc = nn.Sequential(
+                            nn.Conv2d(in_head_filters, head_conv,
+                            kernel_size=3, padding=1, bias=True),
+                            nn.BatchNorm2d(head_conv),
+                            nn.ReLU(inplace=True),
+                            nn.Conv2d(head_conv, num_output,
+                            kernel_size=1, stride=1, padding=0)
+                        )
+                    else:
+                        fc = nn.Sequential(
+                            nn.Conv2d(in_head_filters, head_conv, kernel_size=3,
+                                        padding=1, bias=True),
+                            nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM),
+                            nn.ReLU(inplace=True),
+                            # nn.Conv2d(head_conv, num_output, kernel_size=1,
+                            #             stride=1, padding=0, bias=True),
+                            # nn.BatchNorm2d(num_output),
+                            # nn.ReLU(inplace=True),
+                            # nn.AdaptiveMaxPool2d(head_conv//4),
+                            nn.AdaptiveAvgPool2d(1),
+                            nn.Flatten(),
+                            # nn.Linear((head_conv//4)**2, head_conv, bias=True),
+                            # nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM),
+                            # nn.ReLU(inplace=True),
+                            nn.Linear(head_conv, num_output, bias=True),
+                            # nn.Sigmoid(),
+                            # nn.Softmax(dim=-1)
+                        )
+                else:
+                    fc = nn.Conv2d(
+                        in_channels=in_head_filters,
+                        out_channels=num_output,
+                        kernel_size=1,
+                        stride=1,
+                        padding=0
+                    )
+                self.__setattr__(head, fc)
+        # set activation to memory efficient swish by default
+        self._swish = MemoryEfficientSwish()
+    def _get_deconv_cfg(self, deconv_kernel):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        return deconv_kernel, padding, output_padding
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        assert num_layers == (len(num_filters) - 1), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i])
+            in_planes = num_filters[i]
+            out_planes = num_filters[i+1]
+            layers.append(nn.Sequential(
+                nn.ConvTranspose2d(
+                    in_channels=in_planes,
+                    out_channels=out_planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=self.deconv_with_bias),
+                nn.BatchNorm2d(out_planes, momentum=BN_MOMENTUM),
+                nn.ReLU(inplace=True))
+            )
+        return nn.Sequential(*layers)
+    def set_swish(self, memory_efficient=True):
+        """Sets swish function as memory efficient (for training) or standard (for export).
+        Args:
+            memory_efficient (bool): Whether to use memory-efficient version of swish.
+        """
+        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
+        for block in self._blocks:
+            block.set_swish(memory_efficient)
+    def extract_endpoints(self, inputs):
+        """Use convolution layer to extract features
+        from reduction levels i in [1, 2, 3, 4, 5].
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Dictionary of last intermediate features
+            with reduction levels i in [1, 2, 3, 4, 5].
+            Example:
+                >>> import torch
+                >>> from efficientnet.model import EfficientNet
+                >>> inputs = torch.rand(1, 3, 224, 224)
+                >>> model = EfficientNet.from_pretrained('efficientnet-b0')
+                >>> endpoints = model.extract_endpoints(inputs)
+                >>> print(endpoints['reduction_1'].shape)  # torch.Size([1, 16, 112, 112])
+                >>> print(endpoints['reduction_2'].shape)  # torch.Size([1, 24, 56, 56])
+                >>> print(endpoints['reduction_3'].shape)  # torch.Size([1, 40, 28, 28])
+                >>> print(endpoints['reduction_4'].shape)  # torch.Size([1, 112, 14, 14])
+                >>> print(endpoints['reduction_5'].shape)  # torch.Size([1, 320, 7, 7])
+                >>> print(endpoints['reduction_6'].shape)  # torch.Size([1, 1280, 7, 7])
+        """
+        endpoints = dict()
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        prev_x = x
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            # print('Prev', prev_x.size())
+            # print('X', x.size())
+            if prev_x.size(2) > x.size(2):
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x
+            elif idx == len(self._blocks) - 1:
+                endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+            prev_x = x
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        endpoints['reduction_{}'.format(len(endpoints) + 1)] = x
+        return endpoints
+    def extract_features(self, inputs):
+        """use convolution layer to extract feature .
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of the final convolution
+            layer in the efficientnet model.
+        """
+        # Stem
+        x = self._swish(self._bn0(self._conv_stem(inputs)))
+        # Blocks
+        for idx, block in enumerate(self._blocks):
+            drop_connect_rate = self._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self._blocks)  # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+        # Head
+        x = self._swish(self._bn1(self._conv_head(x)))
+        return x
+    def forward(self, inputs):
+        """EfficientNet's forward function.
+           Calls extract_features to extract features, applies final linear layer, and returns logits.
+        Args:
+            inputs (tensor): Input tensor.
+        Returns:
+            Output of this model after processing.
+        """
+        # Convolution layers
+        # x = self.extract_features(inputs)
+        endpoints = self.extract_endpoints(inputs)
+        x1 = endpoints['reduction_6']
+        x2 = endpoints['reduction_5']
+        x3 = endpoints['reduction_4']
+        x4 = endpoints['reduction_3']
+        x5 = endpoints['reduction_2']
+        x = x1
+        if self._global_params.include_top:
+            # Pooling and final linear layer
+            x = self._avg_pooling(x)
+            x = x.flatten(start_dim=1)
+            x = self._dropout(x)
+            x = self._fc(x)
+            return x
+        if self._global_params.include_hm_decoder:
+            x1 = self._dropout(x1)
+            x2 = self._dropout(x2)
+            x3 = self._dropout(x3)
+            x4 = self._dropout(x4)
+            if self.efpn:
+                assert self._global_params.use_c51, "C51 must be utilized for FPN intergration"
+                x = self.__getattr__('deconv_1')(x1)
+                if self._global_params.use_c51:
+                    x_weighted = self._sigmoid(x)
+                    x_inv = torch.sub(1, x_weighted, alpha=1)
+                    x2_ = torch.multiply(x_inv, x2)
+                    x = torch.cat([x, x2_], dim=1)
+                    if self.se_layer:
+                        x = self.__getattr__('se_layer_1')(x)
+                else:
+                    x = self._relu(x)
+                x = self.__getattr__('deconv_2')(x)
+                if self._global_params.use_c4:
+                    x_weighted = self._sigmoid(x)
+                    x_inv = torch.sub(1, x_weighted, alpha=1)
+                    x3_ = torch.multiply(x_inv, x3)
+                    x = torch.cat([x, x3_], dim=1)
+                    if self.se_layer:
+                        x = self.__getattr__('se_layer_2')(x)
+                else:
+                    x = self._relu(x)
+                x = self.__getattr__('deconv_3')(x)
+                if self._global_params.use_c3:
+                    assert self._global_params.use_c4, "C4 must be utilized for FPN intergration of C3"
+                    x_weighted = self._sigmoid(x)
+                    x_inv = torch.sub(1, x_weighted, alpha=1)
+                    x4_ = torch.multiply(x_inv, x4)
+                    x = torch.cat([x, x4_], dim=1)
+                    if self.se_layer:
+                        x = self.__getattr__('se_layer_3')(x)
+                else:
+                    x = self._relu(x)
+                x = self.__getattr__('deconv_4')(x)
+                if not self._global_params.use_c2:
+                    x = self._relu(x)
+                else:
+                    assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2"
+                    x5 = self._dropout(x5)
+                    x5_ = self.conv_c2(x5)
+                    x_weighted = self._sigmoid(x)
+                    x_inv = torch.sub(1, x_weighted, alpha=1)
+                    x5_ = torch.multiply(x_inv, x5_)
+                    x = torch.cat([x, x5_], dim=1)
+                    if self.se_layer:
+                        x = self.__getattr__('se_layer_4')(x)
+            elif self.tfpn:
+                assert self._global_params.use_c51, "C51 must be utilized for FPN intergration"
+                x = self.__getattr__('deconv_1')(x1)
+                x = self._relu1(x)
+                x = torch.cat([x, x2], dim=1)
+                x = self.__getattr__('deconv_2')(x)
+                if not self._global_params.use_c4:
+                    x = self._relu1(x)
+                else:
+                    x = torch.cat([x, x3], dim=1)
+                x = self.__getattr__('deconv_3')(x)
+                if not self._global_params.use_c3:
+                    x = self._relu1(x)
+                else:
+                    assert self._global_params.use_c4, "C4 must be utilized for FPN intergration of C3"
+                    x = torch.cat([x, x4], dim=1)
+                x = self.__getattr__('deconv_4')(x)
+                if not self._global_params.use_c2:
+                    x = self._relu(x)
+                else:
+                    assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2"
+                    x5 = self._dropout(x5)
+                    x5 = self.conv_c2(x5)
+                    x = self._relu1(x)
+                    x = torch.cat([x, x5], dim=1)
+            else:
+                x = self.deconv_layers(x1)
+            ret = {}
+            for head in self.heads:
+                ret[head] = self.__getattr__(head)(x)
+            return [ret]
+    @classmethod
+    def from_name(cls, model_name, in_channels=3, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            in_channels (int): Input data's channel number.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'num_classes', 'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            An efficientnet model.
+        """
+        cls._check_model_name_is_valid(model_name)
+        blocks_args, global_params = get_model_params(model_name, override_params)
+        model = cls(blocks_args, global_params)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def from_pretrained(cls, model_name, weights_path=None, advprop=False,
+                        in_channels=3, num_classes=1000, **override_params):
+        """Create an efficientnet model according to name.
+        Args:
+            model_name (str): Name for efficientnet.
+            weights_path (None or str):
+                str: path to pretrained weights file on the local disk.
+                None: use pretrained weights downloaded from the Internet.
+            advprop (bool):
+                Whether to load pretrained weights
+                trained with advprop (valid when weights_path is None).
+            in_channels (int): Input data's channel number.
+            num_classes (int):
+                Number of categories for classification.
+                It controls the output size for final linear layer.
+            override_params (other key word params):
+                Params to override model's global_params.
+                Optional key:
+                    'width_coefficient', 'depth_coefficient',
+                    'image_size', 'dropout_rate',
+                    'batch_norm_momentum',
+                    'batch_norm_epsilon', 'drop_connect_rate',
+                    'depth_divisor', 'min_depth'
+        Returns:
+            A pretrained efficientnet model.
+        """
+        model = cls.from_name(model_name, num_classes=num_classes, **override_params)
+        load_pretrained_weights(model, model_name, weights_path=weights_path,
+                                load_fc=((num_classes == 1000) and (model._global_params.include_top)), advprop=advprop)
+        model._change_in_channels(in_channels)
+        return model
+    @classmethod
+    def get_image_size(cls, model_name):
+        """Get the input image size for a given efficientnet model.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            Input image size (resolution).
+        """
+        cls._check_model_name_is_valid(model_name)
+        _, _, res, _ = efficientnet_params(model_name)
+        return res
+    @classmethod
+    def _check_model_name_is_valid(cls, model_name):
+        """Validates model name.
+        Args:
+            model_name (str): Name for efficientnet.
+        Returns:
+            bool: Is a valid name or not.
+        """
+        if model_name not in VALID_MODELS:
+            raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS))
+    def _change_in_channels(self, in_channels):
+        """Adjust model's first convolution layer to in_channels, if in_channels not equals 3.
+        Args:
+            in_channels (int): Input data's channel number.
+        """
+        if in_channels != 3:
+            Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size)
+            out_channels = round_filters(32, self._global_params)
+            self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+@MODELS.register_module()
+class PoseEfficientNet(EfficientNet):
+    def __init__(self, model_name, in_channels=3, **override_params):
+        self.model_name = model_name
+        self.in_channels = in_channels
+        # Initialize Parent Class
+        super()._check_model_name_is_valid(model_name)
+        blocks_args, global_params = get_model_params(model_name, override_params)
+        super().__init__(blocks_args, global_params)
+    @classmethod
+    def from_name(cls, model_name, in_channels, **override_params):
+        return NotImplemented
+    @classmethod
+    def from_pretrained(cls, model_name, weights_path, advprop, in_channels, num_classes, **override_params):
+        return NotImplemented
+    def _change_in_channels(self, in_channels):
+        return NotImplemented
+    def init_weights(self, pretrained=False, advprop=False, verbose=True):
+        if pretrained:
+            url_map_ = url_map_advprop if advprop else url_map
+            state_dict = model_zoo.load_url(url_map_[self.model_name])
+            self.load_state_dict(state_dict, strict=False)
+        # Initialize weights for Deconvolution Layer
+        if self._global_params.include_hm_decoder:
+            if self.efpn or self.tfpn:
+                deconv_layers = [self.deconv_1, self.deconv_2, self.deconv_3, self.deconv_4]
+            else:
+                deconv_layers = self.deconv_layers
+            for layer in deconv_layers:
+                for _, m in layer.named_modules():
+                    if isinstance(m, nn.ConvTranspose2d):
+                        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                        m.weight.data.normal_(0, math.sqrt(2. / n))
+                        if self.deconv_with_bias:
+                            nn.init.constant_(m.bias, 0)
+                    elif isinstance(m, nn.BatchNorm2d):
+                        nn.init.constant_(m.weight, 1)
+                        nn.init.constant_(m.bias, 0)
+            # Init head parameters
+            for head in self.heads:
+                final_layer = self.__getattr__(head)
+                for i, m in enumerate(final_layer.modules()):
+                    if isinstance(m, nn.Conv2d):
+                        if m.weight.shape[0] == self.heads[head]:
+                            if 'hm' in head:
+                                nn.init.constant_(m.bias, -2.19)
+                            else:
+                                # nn.init.normal_(m.weight, std=0.001)
+                                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                                m.weight.data.normal_(0, math.sqrt(2. / n))
+                                nn.init.constant_(m.bias, 0)
+        self._change_in_channels(in_channels=self.in_channels)
+        if verbose:
+            print('Loaded pretrained weights for {}'.format(self.model_name))
+if __name__ == '__main__':
+    cfg = dict(type='PoseEfficientNet',
+               model_name='efficientnet-b4',
+               include_top=False,
+               include_hm_decoder=True,
+               head_conv=64,
+               heads={'hm':1, 'cls':1, 'cstency':256},
+               use_c2=True)
+    model = build_model(cfg, MODELS)
+    model.init_weights(pretrained=True)
+    model.eval()
+    inputs = torch.rand((1, 3, 384, 384))
+    for i, (n, p) in enumerate(model.named_parameters()):
+        print(i, n)
+    # To show the whole pose EFN model outputs shape
+    x = model(inputs)[0]
+    for head in x.keys():
+        print(f'{head} shape is --- {x[head].shape}')
+    # To show the endpoints features shape
+    # endpoints = model.extract_endpoints(inputs)
+    # for k in endpoints.keys():
+    #     print(endpoints[k].shape)

models/networks/pose_hrnet.py ADDED Viewed

	@@ -0,0 +1,515 @@

+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import logging
+import re
+import torch
+import torch.nn as nn
+from ..builder import MODELS
+from .common import conv3x3, BN_MOMENTUM
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            residual = self.downsample(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method, multi_scale_output=True):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(
+            num_branches, blocks, num_blocks, num_inchannels, num_channels)
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+        self.multi_scale_output = multi_scale_output
+        self.branches = self._make_branches(
+            num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+    def _check_branches(self, num_branches, blocks, num_blocks,
+                        num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+                num_branches, len(num_blocks))
+            # logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_channels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+                num_branches, len(num_channels))
+            # logger.error(error_msg)
+            raise ValueError(error_msg)
+        if num_branches != len(num_inchannels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+                num_branches, len(num_inchannels))
+            # logger.error(error_msg)
+            raise ValueError(error_msg)
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
+                         stride=1):
+        downsample = None
+        if stride != 1 or \
+           self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(
+                    num_channels[branch_index] * block.expansion,
+                    momentum=BN_MOMENTUM
+                ),
+            )
+        layers = []
+        layers.append(
+            block(
+                self.num_inchannels[branch_index],
+                num_channels[branch_index],
+                stride,
+                downsample
+            )
+        )
+        self.num_inchannels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(
+                block(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index]
+                )
+            )
+        return nn.Sequential(*layers)
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels)
+            )
+        return nn.ModuleList(branches)
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_inchannels[j],
+                                num_inchannels[i],
+                                1, 1, 0, bias=False
+                            ),
+                            nn.BatchNorm2d(num_inchannels[i]),
+                            nn.Upsample(scale_factor=2**(j-i), mode='nearest')
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i-j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3)
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True)
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+        return nn.ModuleList(fuse_layers)
+    def get_num_inchannels(self):
+        return self.num_inchannels
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+        x_fuse = []
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+        return x_fuse
+blocks_dict = {
+    'BASIC': BasicBlock,
+    'BOTTLENECK': Bottleneck
+}
+@MODELS.register_module()
+class PoseHighResolutionNet(nn.Module):
+    def __init__(self,
+                 cfg,
+                 **kwargs):
+        self.inplanes = 64
+        extra = cfg.MODEL.EXTRA
+        self.cls_based_hm = cfg.MODEL.cls_based_hm
+        self.heads = cfg.MODEL.heads
+        super(PoseHighResolutionNet, self).__init__()
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+        self.stage2_cfg = cfg['MODEL']['EXTRA']['STAGE2']
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels)
+        self.stage3_cfg = cfg['MODEL']['EXTRA']['STAGE3']
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels)
+        self.stage4_cfg = cfg['MODEL']['EXTRA']['STAGE4']
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=False)
+        self.final_layer = nn.Conv2d(
+            in_channels=pre_stage_channels[0],
+            out_channels=cfg.MODEL.NUM_JOINTS,
+            kernel_size=extra.FINAL_CONV_KERNEL,
+            stride=1,
+            padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
+        )
+        self.final_layer_cls = nn.Sequential(
+            nn.BatchNorm2d(cfg.MODEL.NUM_JOINTS, momentum=BN_MOMENTUM),
+            nn.AdaptiveMaxPool2d(cfg.MODEL.HEATMAP_SIZE[0]//4),
+            nn.Flatten(),
+            nn.Linear((cfg.MODEL.HEATMAP_SIZE[0]//4)**2, cfg.MODEL.NUM_JOINTS, bias=True),
+            nn.Sigmoid()
+        )
+        self.pretrained_layers = cfg['MODEL']['EXTRA']['PRETRAINED_LAYERS']
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3, 1, 1, bias=False
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True)
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i+1-num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i-num_branches_pre else inchannels
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                inchannels, outchannels, 3, 2, 1, bias=False
+                            ),
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True)
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+        return nn.ModuleList(transition_layers)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes, planes * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def _make_stage(self, layer_config, num_inchannels,
+                    multi_scale_output=True):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        fuse_method = layer_config['FUSE_METHOD']
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    fuse_method,
+                    reset_multi_scale_output
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+        return nn.Sequential(*modules), num_inchannels
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+        x = self.final_layer(y_list[0])
+        ret = {}
+        for head in self.heads.keys():
+            if head == 'hm':
+                ret[head] = x
+            else:
+                x1 = self.final_layer_cls(x)
+                ret[head] = x1
+        return [ret]
+    def init_weights(self, pretrained='', **kwargs):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                nn.init.normal_(m.weight, std=0.001)
+                for name, _ in m.named_parameters():
+                    if name in ['bias']:
+                        nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.ConvTranspose2d):
+                nn.init.normal_(m.weight, std=0.001)
+                for name, _ in m.named_parameters():
+                    if name in ['bias']:
+                        nn.init.constant_(m.bias, 0)
+        if os.path.isfile(pretrained):
+            pretrained_state_dict = torch.load(pretrained)
+            need_init_state_dict = {}
+            for name, m in pretrained_state_dict.items():
+                if name.split('.')[0] in self.pretrained_layers \
+                   or self.pretrained_layers[0] == '*':
+                    need_init_state_dict[name] = m
+            self.load_state_dict(need_init_state_dict, strict=False)
+        elif pretrained:
+            raise ValueError('{} is not exist!'.format(pretrained))
+def get_pose_net(cfg, is_train, **kwargs):
+    model = PoseHighResolutionNet(cfg, **kwargs)
+    if is_train and cfg.MODEL.INIT_WEIGHTS:
+        model.init_weights(cfg.MODEL.PRETRAINED)
+    return model
+if __name__ == "__main__":
+    from configs.get_config import load_config
+    from builder import build_model
+    cfg = load_config("configs/hrnet_sbi.yaml")
+    hrnet = build_model(cfg.MODEL, MODELS, default_args=dict(cfg=cfg))
+    print(hrnet)

models/networks/xception.py ADDED Viewed

	@@ -0,0 +1,338 @@

+"""
+Creates an Xception Model as defined in:
+Francois Chollet
+Xception: Deep Learning with Depthwise Separable Convolutions
+https://arxiv.org/pdf/1610.02357.pdf
+This weights ported from the Keras implementation. Achieves the following performance on the validation set:
+Loss:0.9173 Prec@1:78.892 Prec@5:94.292
+REMEMBER to set your image size to 3x299x299 for both test and validation
+normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
+                                  std=[0.5, 0.5, 0.5])
+The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299
+"""
+import math
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as model_zoo
+from torch.nn import init
+import torch
+from ..builder import MODELS
+from .common import conv_block, BN_MOMENTUM
+model_urls = {
+    'xception':'https://www.dropbox.com/s/1hplpzet9d7dv29/xception-c0a72b38.pth.tar?dl=1'
+}
+class SeparableConv2d(nn.Module):
+    def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False):
+        super(SeparableConv2d,self).__init__()
+        self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias)
+        self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias)
+    def forward(self,x):
+        x = self.conv1(x)
+        x = self.pointwise(x)
+        return x
+class Block(nn.Module):
+    def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True):
+        super(Block, self).__init__()
+        if out_filters != in_filters or strides!=1:
+            self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False)
+            self.skipbn = nn.BatchNorm2d(out_filters)
+        else:
+            self.skip=None
+        self.relu = nn.ReLU(inplace=True)
+        rep=[]
+        filters=in_filters
+        if grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
+            rep.append(nn.BatchNorm2d(out_filters))
+            filters = out_filters
+        for i in range(reps-1):
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False))
+            rep.append(nn.BatchNorm2d(filters))
+        if not grow_first:
+            rep.append(self.relu)
+            rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False))
+            rep.append(nn.BatchNorm2d(out_filters))
+        if not start_with_relu:
+            rep = rep[1:]
+        else:
+            rep[0] = nn.ReLU(inplace=False)
+        if strides != 1:
+            rep.append(nn.MaxPool2d(3,strides,1))
+        self.rep = nn.Sequential(*rep)
+    def forward(self,inp):
+        x = self.rep(inp)
+        if self.skip is not None:
+            skip = self.skip(inp)
+            skip = self.skipbn(skip)
+        else:
+            skip = inp
+        x+=skip
+        return x
+@MODELS.register_module()
+class Xception(nn.Module):
+    """
+    Xception optimized for the ImageNet dataset, as specified in
+    https://arxiv.org/pdf/1610.02357.pdf
+    """
+    def __init__(self,
+                 heads,
+                 head_conv=64,
+                 cls_based_hm=True,
+                 dropout_prob=0.5,
+                 **kwargs):
+        """ Constructor
+        Args:
+            num_classes: number of classes
+        """
+        self.heads = heads
+        self.head_conv = head_conv
+        self.cls_based_hm = cls_based_hm
+        self.dropout_prob = dropout_prob
+        super(Xception, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(32,64,3,bias=False)
+        self.bn2 = nn.BatchNorm2d(64)
+        #do relu here
+        self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True)
+        self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True)
+        self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True)
+        self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True)
+        self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False)
+        self.conv3 = SeparableConv2d(1024,1536,3,1,1)
+        self.bn3 = nn.BatchNorm2d(1536)
+        #do relu here
+        self.conv4 = SeparableConv2d(1536,2048,3,1,1)
+        self.bn4 = nn.BatchNorm2d(2048)
+        self.dropout = nn.Dropout2d(p=self.dropout_prob)
+        self.conv_block_1 = conv_block(2048, 256, (3,3), padding=1)
+        self.deconv_1 = nn.Sequential(
+            nn.ConvTranspose2d(
+                in_channels=256,
+                out_channels=256,
+                kernel_size=(4,4),
+                stride=2,
+                padding=1,
+                output_padding=0,
+                bias=False),
+            nn.BatchNorm2d(256, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True)
+        )
+        self.conv_block_2 = conv_block(256, 256, (3,3), padding=1)
+        self.deconv_2 = nn.Sequential(
+            nn.ConvTranspose2d(
+                in_channels=256,
+                out_channels=128,
+                kernel_size=(4,4),
+                stride=2,
+                padding=1,
+                output_padding=0,
+                bias=False),
+            nn.BatchNorm2d(128, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True)
+        )
+        self.conv_block_3 = conv_block(128, 128, (3,3), padding=1)
+        self.deconv_3 = nn.Sequential(
+            nn.ConvTranspose2d(
+                in_channels=128,
+                out_channels=64,
+                kernel_size=(4,4),
+                stride=2,
+                padding=1,
+                output_padding=0,
+                bias=False),
+            nn.BatchNorm2d(64, momentum=BN_MOMENTUM),
+            nn.ReLU(inplace=True)
+        )
+        for head in sorted(self.heads):
+            num_output = self.heads[head]
+            if self.head_conv > 0:
+                if head != 'cls':
+                    fc = nn.Sequential(
+                        nn.Conv2d(64, self.head_conv,
+                        kernel_size=3, padding=1, bias=False),
+                        nn.BatchNorm2d(self.head_conv),
+                        nn.ReLU(inplace=True),
+                        nn.Conv2d(self.head_conv, num_output,
+                        kernel_size=1, stride=1, padding=0)
+                    )
+                else:
+                    if self.cls_based_hm:
+                        fc = nn.Sequential(
+                                nn.AdaptiveAvgPool2d(head_conv//4),
+                                nn.Flatten(),
+                                nn.Linear((head_conv//4)**2, head_conv, bias=False),
+                                nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM),
+                                nn.ReLU(inplace=True),
+                                nn.Linear(head_conv, num_output, bias=True),
+                                nn.Sigmoid()
+                            )
+                    else:
+                        fc = nn.Sequential(
+                            nn.Conv2d(64, head_conv, kernel_size=3,
+                                      padding=1, bias=False),
+                            nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM),
+                            nn.ReLU(inplace=True),
+                            nn.Conv2d(head_conv, num_output, kernel_size=1,
+                                      stride=1, padding=0, bias=False),
+                            nn.BatchNorm2d(num_output),
+                            # nn.ReLU(inplace=True),
+                            nn.AdaptiveAvgPool2d(head_conv//4),
+                            nn.Flatten(),
+                            nn.Linear((head_conv//4)**2, head_conv, bias=False),
+                            nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM),
+                            nn.ReLU(inplace=True),
+                            nn.Linear(head_conv, num_output, bias=True),
+                            nn.Sigmoid()
+                        )
+            else:
+                fc = nn.Conv2d(
+                    in_channels=64,
+                    out_channels=num_output,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0
+                )
+            self.__setattr__(head, fc)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        x = self.block4(x)
+        x = self.block5(x)
+        x = self.block6(x)
+        x = self.block7(x)
+        x = self.block8(x)
+        x = self.block9(x)
+        x = self.block10(x)
+        x = self.block11(x)
+        x = self.block12(x)
+        x = self.conv3(x)
+        x = self.bn3(x)
+        x = self.relu(x)
+        x = self.conv4(x)
+        x = self.bn4(x)
+        x = self.relu(x)
+        x = self.dropout(x)
+        x = self.conv_block_1(x)
+        x = self.deconv_1(x)
+        x = self.conv_block_2(x)
+        x = self.deconv_2(x)
+        x = self.conv_block_3(x)
+        x = self.deconv_3(x)
+        ret = {}
+        x1_hm = None
+        for head in self.heads:
+            if not self.cls_based_hm or head != 'cls':
+                ret[head] = self.__getattr__(head)(x)
+                if head == 'hm':
+                    x1_hm = ret[head]
+            else:
+                assert 'hm' in ret.keys(), "Other heads need features from heatmap, please check it!"
+                ret[head] = self.__getattr__(head)(x1_hm)
+        return [ret]
+    def init_weights(self, pretrained=False):
+        if not pretrained:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                    m.weight.data.normal_(0, math.sqrt(2. / n))
+                elif isinstance(m, nn.BatchNorm2d):
+                    m.weight.data.fill_(1)
+                    m.bias.data.zero_()
+                elif isinstance(m, nn.ConvTranspose2d):
+                    n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                    m.weight.data.normal_(0, math.sqrt(2. / n))
+                    if self.deconv_with_bias:
+                        nn.init.constant_(m.bias, 0)
+        else:
+            self.load_state_dict(model_zoo.load_url(model_urls['xception']), strict=False)
+        # Init head parameters
+        for head in self.heads:
+            final_layer = self.__getattr__(head)
+            for i, m in enumerate(final_layer.modules()):
+                prior = 1/71
+        #         if isinstance(m, nn.Conv2d):
+        #             if m.weight.shape[0] == self.heads[head]:
+        #                 if 'hm' in head:
+        #                     # nn.init.constant_(m.bias, -2.19)
+        #                     nn.init.constant_(m.bias, -math.log((1-prior)/prior))
+        #                 else:
+        #                     nn.init.normal_(m.weight, std=0.001)
+        #                     # nn.init.constant_(m.bias, 0)
+                if isinstance(m, nn.Linear):
+                    if m.weight.shape[0] == self.heads[head]:
+                        nn.init.constant_(m.bias, -math.log((1-prior)/prior))
+                    # else:
+                        # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                        # m.weight.data.normal_(0, math.sqrt(2. / n))
+        #                 # nn.init.constant_(m.bias, 0)

models/utils.py ADDED Viewed

	@@ -0,0 +1,138 @@

+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import torch
+import torch.nn as nn
+layers_position = {
+    'PoseResNet_50': 158,
+    'PoseResNet_101': 311,
+    'PoseEfficientNet_B4': 415,
+}
+def preset_model(cfg, model, optimizer=None):
+    #Loading models from config, make sure the pretrained path correct to the model name
+    start_epoch = 0
+    if 'pretrained' in cfg.TRAIN and os.path.isfile(cfg.TRAIN.pretrained):
+        model, optimizer, start_epoch = load_model(model,
+                                                   cfg.TRAIN.pretrained,
+                                                   optimizer=optimizer,
+                                                   resume=cfg.TRAIN.resume,
+                                                   lr=cfg.TRAIN.lr,
+                                                   lr_step=cfg.TRAIN.lr_scheduler.milestones,
+                                                   gamma=cfg.TRAIN.lr_scheduler.gamma)
+    else:
+        model.init_weights(**cfg.MODEL.INIT_WEIGHTS)
+    print('Loading model successfully -- {}'.format(cfg.MODEL.type))
+    #Freeze backbone if begin_epoch < warm up
+    if cfg.TRAIN.freeze_backbone and start_epoch < cfg.TRAIN.warm_up:
+        freeze_backbone(cfg.MODEL, model)
+    print('Number of parameters', sum(p.numel() for p in model.parameters()))
+    print('Number of trainable parameters', sum(p.numel() for p in model.parameters() if p.requires_grad))
+    return model, optimizer, start_epoch
+def load_pretrained(model, weight_path):
+    '''
+    This function only care about state dict of model
+    For other modules such as optimizer, resume learning, please refer @load_model
+    '''
+    state_dict = torch.load(weight_path)['state_dict']
+    model.load_state_dict(state_dict, strict=True)
+    return model
+def freeze_backbone(cfg, model):
+    '''
+    This func to freeze some specific layers to warm up the models
+    '''
+    if hasattr(model, 'backbone'):
+        backbone = model.backbone
+        for param in backbone.parameters():
+            param.requires_grad = False
+    else:
+        for i, (n, p) in enumerate(model.named_parameters()):
+            if (i <= layers_position[f'{cfg.type}_{cfg.num_layers}']):
+                p.requires_grad = False
+def unfreeze_backbone(model):
+    '''
+    This func to unfreeze all model layers
+    '''
+    for param in model.parameters():
+        if not param.requires_grad:
+            param.requires_grad = True
+def load_model(model, model_path, optimizer=None, resume=False,
+               lr=None, lr_step=None, gamma=None):
+    start_epoch = 0
+    checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
+    print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
+    state_dict_ = checkpoint['state_dict']
+    state_dict = {}
+    # convert data_parallal to model
+    for k in state_dict_:
+        if k.startswith('module') and not k.startswith('module_list'):
+            state_dict[k[7:]] = state_dict_[k]
+        else:
+            state_dict[k] = state_dict_[k]
+    model_state_dict = model.state_dict()
+    # check loaded parameters and created model parameters
+    msg = 'If you see this, your model does not fully load the ' + \
+            'pre-trained weight. Please make sure ' + \
+            'you have correctly specified --arch xxx ' + \
+            'or set the correct --num_classes for your own dataset.'
+    for k in state_dict:
+        if k in model_state_dict:
+            if state_dict[k].shape != model_state_dict[k].shape:
+                print('Skip loading parameter {}, required shape{}, '\
+                      'loaded shape{}. {}'.format(
+                        k, model_state_dict[k].shape, state_dict[k].shape, msg))
+                state_dict[k] = model_state_dict[k]
+            else:
+                print('Drop parameter {}.'.format(k) + msg)
+    for k in model_state_dict:
+        if not (k in state_dict):
+            print('No param {}.'.format(k) + msg)
+            state_dict[k] = model_state_dict[k]
+    model.load_state_dict(state_dict, strict=False)
+    # resume optimizer parameters
+    if optimizer is not None and resume:
+        if 'optimizer' in checkpoint:
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            start_epoch = checkpoint['epoch'] + 1
+            start_lr = lr
+            for step in lr_step:
+                if start_epoch >= step:
+                    start_lr *= gamma
+            for param_group in optimizer.param_groups:
+                param_group['lr'] = start_lr
+            print('Resumed optimizer with start lr', start_lr)
+        else:
+            print('No optimizer parameters in checkpoint.')
+    return model, optimizer, start_epoch
+def save_model(path, epoch, model, optimizer=None):
+    if isinstance(model, torch.nn.DataParallel):
+        state_dict = model.module.state_dict()
+    else:
+        state_dict = model.state_dict()
+    data = {'epoch': epoch,
+            'state_dict': state_dict}
+    if not (optimizer is None):
+        data['optimizer'] = optimizer.state_dict()
+    torch.save(data, path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch>=2.0.0
+torchvision>=0.15.0
+opencv-python>=4.8.0
+numpy>=1.24.0
+Pillow>=10.0.0
+gradio>=3.50.0
+detectron2>=0.6.0; platform_system!="Darwin"  # Detectron2 not available for macOS
+fvcore>=0.1.5.post20221221; platform_system!="Darwin"  # Required for detectron2
+iopath>=0.1.9; platform_system!="Darwin"  # Required for detectron2
+pycocotools>=2.0.6; platform_system!="Darwin"  # Required for detectron2