diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..96a9cfc72a6352a7ee1fc9c834a9da22081d8f33 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +test3.png filter=lfs diff=lfs merge=lfs -text +test5.png filter=lfs diff=lfs merge=lfs -text diff --git a/configs/.DS_Store b/configs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ac21e0737d6994085a22e432323db18c3229ee7f Binary files /dev/null and b/configs/.DS_Store differ diff --git a/configs/.ipynb_checkpoints/__init__-checkpoint.py b/configs/.ipynb_checkpoints/__init__-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..676145d777810e4a51bdaf59fdec4f5358aae349 --- /dev/null +++ b/configs/.ipynb_checkpoints/__init__-checkpoint.py @@ -0,0 +1,7 @@ +import os +import sys +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(os.path.dirname(current_file_path)) +project_root_dir = os.path.dirname(parent_dir) +sys.path.append(parent_dir) +sys.path.append(project_root_dir) diff --git a/configs/__init__.py b/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..676145d777810e4a51bdaf59fdec4f5358aae349 --- /dev/null +++ b/configs/__init__.py @@ -0,0 +1,7 @@ +import os +import sys +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(os.path.dirname(current_file_path)) +project_root_dir = os.path.dirname(parent_dir) +sys.path.append(parent_dir) +sys.path.append(project_root_dir) diff --git a/configs/__pycache__/__init__.cpython-310.pyc b/configs/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da93cbecef8bd3ab49e2cc68a2e095866cb758f9 Binary files /dev/null and b/configs/__pycache__/__init__.cpython-310.pyc differ diff --git a/configs/__pycache__/__init__.cpython-311.pyc b/configs/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17fd72f555c2271c9d0716368f1f3f822733456e Binary files /dev/null and b/configs/__pycache__/__init__.cpython-311.pyc differ diff --git a/configs/__pycache__/__init__.cpython-312.pyc b/configs/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6babc064d0fcd01d8ec0ff8ff392ab8e18d7a597 Binary files /dev/null and b/configs/__pycache__/__init__.cpython-312.pyc differ diff --git a/configs/__pycache__/get_config.cpython-310.pyc b/configs/__pycache__/get_config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b45b915741424d64060cfe3022542a8e7dc77955 Binary files /dev/null and b/configs/__pycache__/get_config.cpython-310.pyc differ diff --git a/configs/__pycache__/get_config.cpython-311.pyc b/configs/__pycache__/get_config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fc67e19cfd2e4e1d8534955c149a8e13b3f6735 Binary files /dev/null and b/configs/__pycache__/get_config.cpython-311.pyc differ diff --git a/configs/__pycache__/get_config.cpython-312.pyc b/configs/__pycache__/get_config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b808edb5e25d7f4e82ba59903979e0405149fcc Binary files /dev/null and b/configs/__pycache__/get_config.cpython-312.pyc differ diff --git a/configs/detector/.DS_Store b/configs/detector/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ce4a21ac33d56b794b4e240803e428b48f9ec43e Binary files /dev/null and b/configs/detector/.DS_Store differ diff --git a/configs/detector/.ipynb_checkpoints/detector-checkpoint.yaml b/configs/detector/.ipynb_checkpoints/detector-checkpoint.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ce9ea89c706965d8a104ecb70128b720e43e7ce --- /dev/null +++ b/configs/detector/.ipynb_checkpoints/detector-checkpoint.yaml @@ -0,0 +1,86 @@ +# log dir +log_dir: ./logs_debug/clip_wavelet + +# model setting +pretrained: ./training/weights/clip_wavelet_best.pth # path to a pre-trained model, if using one +model_name: clip_wavelet # model name +backbone_name: vit # backbone name + +#backbone setting +backbone_config: + mode: original + num_classes: 2 + inc: 3 + dropout: false + +# dataset +all_dataset: [FaceShifter, Celeb-DF-v1, Celeb-DF-v2] +train_dataset: [iFakeFaceDB] +test_dataset: [Celeb-DF-v1, Celeb-DF-v2, FaceShifter] + +compression: c23 # compression-level for videos +train_batchSize: 64 # training batch size +test_batchSize: 64 # test batch size +workers: 8 # number of data loading workers +frame_num: {'train': 300, 'test': 300} # number of frames to use per video in training and testing +resolution: 224 # resolution of output image to network +with_mask: false # whether to include mask information in the input +with_landmark: false # whether to include facial landmark information in the input + + +# data augmentation +use_data_augmentation: true # Add this flag to enable/disable data augmentation +data_aug: + flip_prob: 0.5 + rotate_prob: 0.5 + rotate_limit: [-10, 10] + blur_prob: 0.5 + blur_limit: [3, 7] + brightness_prob: 0.5 + brightness_limit: [-0.1, 0.1] + contrast_limit: [-0.1, 0.1] + quality_lower: 40 + quality_upper: 100 + +# mean and std for normalization +mean: [0.5, 0.5, 0.5] +std: [0.5, 0.5, 0.5] + +# optimizer config +optimizer: + # choose between 'adam' and 'sgd' + type: adam + adam: + lr: 0.00001 # learning rate + beta1: 0.9 # beta1 for Adam optimizer + beta2: 0.999 # beta2 for Adam optimizer + eps: 0.00000001 # epsilon for Adam optimizer + weight_decay: 0.0005 # weight decay for regularization + amsgrad: false + sgd: + lr: 0.0002 # learning rate + momentum: 0.9 # momentum for SGD optimizer + weight_decay: 0.0005 # weight decay for regularization + +# training config +lr_scheduler: null # learning rate scheduler +nEpochs: 50 # number of epochs to train for +start_epoch: 0 # manual epoch number (useful for restarts) +save_epoch: 1 # interval epochs for saving models +rec_iter: 100 # interval iterations for recording +logdir: ./logs # folder to output images and logs +manualSeed: 1024 # manual seed for random number generation +save_ckpt: true # whether to save checkpoint +save_feat: true # whether to save features + +# loss function +loss_func: cross_entropy # loss function to use +losstype: null + +# metric +metric_scoring: auc # metric for evaluation (auc, acc, eer, ap) + +# cuda + +cuda: true # whether to use CUDA acceleration +cudnn: true # whether to use CuDNN for convolution operations diff --git a/configs/detector/.ipynb_checkpoints/detector2-checkpoint.yaml b/configs/detector/.ipynb_checkpoints/detector2-checkpoint.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f568ba0df6d5232970c3362d52118ba9f76e1a2 --- /dev/null +++ b/configs/detector/.ipynb_checkpoints/detector2-checkpoint.yaml @@ -0,0 +1,140 @@ +TASK: custom_laanet +PRECISION: float64 +METRICS_BASE: binary +SEED: 317 +DATA_RELOAD: False + +MODEL: + # PRETRAINED_PATH: '' + type: PoseEfficientNet + model_name: efficientnet-b4 + num_layers: B4 + include_top: False + include_hm_decoder: True + head_conv: 64 + use_c2: False + use_c3: True + use_c4: True + use_c51: True + efpn: True + tfpn: False + se_layer: False + heads: + hm: 1 + cls: 1 + cstency: 256 + INIT_WEIGHTS: + pretrained: True + advprop: True + +DATASET: + type: BinaryFaceForensic + FROM_FILE: False + PIN_MEMORY: True + NUM_WORKERS: 7 + COLOR_NORM: 'simple' + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + IMAGE_SUFFIX: png + COMPRESSION: c0 + IMAGE_SUFFIX: png + IMAGE_SIZE: [384, 384] + HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4] + SIGMA: 2 + ADAPTIVE_SIGMA: True + HEATMAP_TYPE: gaussian + SPLIT_IMAGE: False + DATA: + TYPE: frames + SAMPLES_PER_VIDEO: + ACTIVE: True + TRAIN: 8 + VAL: 8 + TEST: 32 + TRAIN: + NAME: custom_dataset + ROOT: ./datasets/train/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + + VAL: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TEST: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TRANSFORM: + geometry: + type: GeometryTransform + resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1 + normalize: 0 + horizontal_flip: 0.5 + cropping: [0.15, 0.5] #Format: [crop_limit, prob] + scale: [0.15, 0.5] #Format: [scale_limit, prob] + rand_erasing: [0.5, 1] #Format: [p, max_count] + color: + type: ColorJitterTransform + clahe: 0.0 + colorjitter: 0.3 + gaussianblur: 0.3 + gaussnoise: 0.3 + jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively + rgbshift: 0.3 + randomcontrast: 0.0 + randomgamma: 0.5 + randombrightness: 1 + huesat: 1 + normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + +TRAIN: + resume: True + gpus: [0] + pretrained_model: './training/weights/final_model.pth' + batch_size: 8 + lr: 0.00005 + epochs: 100 + begin_epoch: 48 + warm_up: 6 + every_val_epochs: 1 + loss: + type: CombinedFocalLoss + use_target_weight: False + cls_lmda: 1 + dst_hm_cls_lmda: 0 + offset_lmda: 0 + hm_lmda: 100 + cstency_lmda: 100 + mse_reduction: sum + ce_reduction: mean + optimizer: SAM + distributed: False + tensorboard: False + resume: True + lr_scheduler: + # type: MultiStepLR + milestones: [5, 15, 20, 25] + gamma: 0.5 + freeze_backbone: True + debug: + active: False + save_hm_gt: True + save_hm_pred: True + +TEST: + gpus: [0] + subtask: 'eval' + test_file: '' + vis_hm: True + threshold: 0.5 + flip_test: True + video_level: True + pretrained: './training/weights/final_model.pth' \ No newline at end of file diff --git a/configs/detector/.ipynb_checkpoints/efn4_fpn_hm_adv-checkpoint.yaml b/configs/detector/.ipynb_checkpoints/efn4_fpn_hm_adv-checkpoint.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1fa9a014fc19c4567dd066c86bbf38c4383674e2 --- /dev/null +++ b/configs/detector/.ipynb_checkpoints/efn4_fpn_hm_adv-checkpoint.yaml @@ -0,0 +1,159 @@ +TASK: EFN_hm10_EFPN_NoBasedCLS_Focal_C2_256Cst100_32FXRayv2_SAM(Adam)_ADV_Era3_OutSoftMax_5e5_div4_FZ +PRECISION: float64 +METRICS_BASE: binary +SEED: 317 +DATA_RELOAD: True +DATASET: + type: HeatmapFaceForensic + TRAIN: True #Switch to True for training mode, False for testing mode + COMPRESSION: c0 + IMAGE_SUFFIX: png + NUM_WORKERS: 7 + PIN_MEMORY: True + IMAGE_SIZE: [384, 384] + HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4] + SIGMA: 2 + ADAPTIVE_SIGMA: True + HEATMAP_TYPE: gaussian + SPLIT_IMAGE: False + DATA: + TYPE: frames + SAMPLES_PER_VIDEO: + ACTIVE: True + TRAIN: 32 # Dynamically random number of frames in each epoch + VAL: 32 + TEST: 32 + TRAIN: + NAME: FF++ # This field to define datasets that can be used to train/in-dataset/cross-dataset evaluation + ROOT: /home/users//data/FaceForensics++/c0/ + # ROOT: /data/deepfake_cluster/datasets_df/FaceForensics++/c0/ + FROM_FILE: True + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] + # ANNO_FILE: train/frames/FaceXRay/train_FF_FaceXRay.json + ANNO_FILE: processed_data/c0/dynamic_trainBI_FFv2.json + LABEL_FOLDER: [real, fake] + VAL: + NAME: FF++ # This field to define datasets that can be used to train/in-dataset/cross-dataset evaluation + ROOT: /Deep/datasets/ + FROM_FILE: True + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] # Choosing Deepfake techniques to be loaded for dataloader + # ANNO_FILE: val/frames/FaceXRay/val_FF_FaceXRay.json + ANNO_FILE: processed_data/c0/dynamic_valBI_FFv2.json + LABEL_FOLDER: [real, fake] + TEST: + NAME: DFW + # ROOT: /home/users//data/FaceForensics++/c0/ + # ROOT: /home/users//data/Celeb-DFv1/ + # ROOT: /data/deepfake_cluster/datasets_df/Celeb-DFv1/ + # ROOT: /home/users//data/Celeb-DFv2/ + # ROOT: /home/users//data/DFDCP/ + # ROOT: /home/users//data/DFDC/ + # ROOT: /home/users//data/DFD/ + ROOT: /home/users//data/DFW/ + FROM_FILE: False + # FAKETYPE: [original, Deepfakes] + # FAKETYPE: [original, Deepfakes, Face2Face, FaceSwap, NeuralTextures] + # FAKETYPE: [Celeb-real, Celeb-synthesis, YouTube-real] + # FAKETYPE: [method_A, method_B, original_videos] + # FAKETYPE: [fake, real] + # FAKETYPE: [DeepFakeDetection_original, DeepFakeDetection] + FAKETYPE: [real_test, fake_test] + ANNO_FILE: FaceXRay/test/test_FF_Xray.json + LABEL_FOLDER: [real, fake] + TRANSFORM: + geometry: + type: GeometryTransform + resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1 + normalize: 0 + horizontal_flip: 0.5 + cropping: [0.15, 0.5] #Format: [crop_limit, prob] + scale: [0.15, 0.5] #Format: [scale_limit, prob] + rand_erasing: [0.5, 1] #Format: [p, max_count] + color: + type: ColorJitterTransform + clahe: 0.0 + colorjitter: 0.3 + gaussianblur: 0.3 + gaussnoise: 0.3 + jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively + rgbshift: 0.3 + randomcontrast: 0.0 + randomgamma: 0.5 + randombrightness: 1 + huesat: 1 + normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + DEBUG: False + DYNAMIC_FXRAY: True +MODEL: + type: PoseEfficientNet + model_name: efficientnet-b4 + num_layers: B4 + include_top: False + include_hm_decoder: True + head_conv: 64 + use_c2: False + use_c3: True + use_c4: True + use_c51: True + efpn: True + tfpn: False + se_layer: False + heads: + hm: 1 + cls: 1 + cstency: 256 + INIT_WEIGHTS: + pretrained: True + advprop: True +TRAIN: + gpus: [0] + batch_size: 16 + lr: 0.00005 + epochs: 100 + begin_epoch: -1 + warm_up: 6 + every_val_epochs: 1 + loss: + type: CombinedFocalLoss + use_target_weight: False + cls_lmda: 1 + dst_hm_cls_lmda: 0 + offset_lmda: 0 + hm_lmda: 10 + cstency_lmda: 100 + mse_reduction: sum + ce_reduction: mean + optimizer: SAM + distributed: False + pretrained: '~/Deep/training/weights/LALALAND1.pth' + tensorboard: True + resume: True + lr_scheduler: + # type: MultiStepLR + milestones: [5, 15, 20, 25] + gamma: 0.5 + freeze_backbone: True + debug: + active: False + save_hm_gt: True + save_hm_pred: True +TEST: + gpus: [0] + subtask: 'eval' + test_file: '' + vis_hm: True + threshold: 0.5 + flip_test: True + video_level: True + pretrained: '~/Deep/training/weights/LALALAND1.pth' +PREPROCESSING: + DATASET: FaceForensics + SPLIT: train + ROOT: ~/Deep/datasets/ + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] + IMAGE_SUFFIX: jpg + DATA_TYPE: images + LABEL: [real, fake] + DEBUG: False diff --git a/configs/detector/detector.yaml b/configs/detector/detector.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ce9ea89c706965d8a104ecb70128b720e43e7ce --- /dev/null +++ b/configs/detector/detector.yaml @@ -0,0 +1,86 @@ +# log dir +log_dir: ./logs_debug/clip_wavelet + +# model setting +pretrained: ./training/weights/clip_wavelet_best.pth # path to a pre-trained model, if using one +model_name: clip_wavelet # model name +backbone_name: vit # backbone name + +#backbone setting +backbone_config: + mode: original + num_classes: 2 + inc: 3 + dropout: false + +# dataset +all_dataset: [FaceShifter, Celeb-DF-v1, Celeb-DF-v2] +train_dataset: [iFakeFaceDB] +test_dataset: [Celeb-DF-v1, Celeb-DF-v2, FaceShifter] + +compression: c23 # compression-level for videos +train_batchSize: 64 # training batch size +test_batchSize: 64 # test batch size +workers: 8 # number of data loading workers +frame_num: {'train': 300, 'test': 300} # number of frames to use per video in training and testing +resolution: 224 # resolution of output image to network +with_mask: false # whether to include mask information in the input +with_landmark: false # whether to include facial landmark information in the input + + +# data augmentation +use_data_augmentation: true # Add this flag to enable/disable data augmentation +data_aug: + flip_prob: 0.5 + rotate_prob: 0.5 + rotate_limit: [-10, 10] + blur_prob: 0.5 + blur_limit: [3, 7] + brightness_prob: 0.5 + brightness_limit: [-0.1, 0.1] + contrast_limit: [-0.1, 0.1] + quality_lower: 40 + quality_upper: 100 + +# mean and std for normalization +mean: [0.5, 0.5, 0.5] +std: [0.5, 0.5, 0.5] + +# optimizer config +optimizer: + # choose between 'adam' and 'sgd' + type: adam + adam: + lr: 0.00001 # learning rate + beta1: 0.9 # beta1 for Adam optimizer + beta2: 0.999 # beta2 for Adam optimizer + eps: 0.00000001 # epsilon for Adam optimizer + weight_decay: 0.0005 # weight decay for regularization + amsgrad: false + sgd: + lr: 0.0002 # learning rate + momentum: 0.9 # momentum for SGD optimizer + weight_decay: 0.0005 # weight decay for regularization + +# training config +lr_scheduler: null # learning rate scheduler +nEpochs: 50 # number of epochs to train for +start_epoch: 0 # manual epoch number (useful for restarts) +save_epoch: 1 # interval epochs for saving models +rec_iter: 100 # interval iterations for recording +logdir: ./logs # folder to output images and logs +manualSeed: 1024 # manual seed for random number generation +save_ckpt: true # whether to save checkpoint +save_feat: true # whether to save features + +# loss function +loss_func: cross_entropy # loss function to use +losstype: null + +# metric +metric_scoring: auc # metric for evaluation (auc, acc, eer, ap) + +# cuda + +cuda: true # whether to use CUDA acceleration +cudnn: true # whether to use CuDNN for convolution operations diff --git a/configs/detector/detector2 copie.yaml b/configs/detector/detector2 copie.yaml new file mode 100644 index 0000000000000000000000000000000000000000..050275df30864a8d247a57a0d7ea18786855fadc --- /dev/null +++ b/configs/detector/detector2 copie.yaml @@ -0,0 +1,142 @@ +TASK: custom_laanet +PRECISION: float32 +METRICS_BASE: binary +SEED: 317 +DATA_RELOAD: False +Resume: True +begin_epoch: 100 + +MODEL: + # PRETRAINED_PATH: '' + type: PoseEfficientNet + model_name: efficientnet-b4 + num_layers: B4 + include_top: False + include_hm_decoder: True + head_conv: 64 + use_c2: False + use_c3: True + use_c4: True + use_c51: True + efpn: True + tfpn: False + se_layer: False + heads: + hm: 1 + cls: 1 + cstency: 256 + INIT_WEIGHTS: + pretrained: True + advprop: True + +DATASET: + type: BinaryFaceForensic + FROM_FILE: False + PIN_MEMORY: True + NUM_WORKERS: 7 + COLOR_NORM: 'simple' + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + IMAGE_SUFFIX: png + COMPRESSION: c0 + IMAGE_SUFFIX: png + IMAGE_SIZE: [384, 384] + HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4] + SIGMA: 2 + ADAPTIVE_SIGMA: True + HEATMAP_TYPE: gaussian + SPLIT_IMAGE: False + DATA: + TYPE: frames + SAMPLES_PER_VIDEO: + ACTIVE: True + TRAIN: 8 + VAL: 8 + TEST: 32 + TRAIN: + NAME: custom_dataset + ROOT: ./datasets/train/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + + VAL: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TEST: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TRANSFORM: + geometry: + type: GeometryTransform + resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1 + normalize: 0 + horizontal_flip: 0.5 + cropping: [0.15, 0.5] #Format: [crop_limit, prob] + scale: [0.15, 0.5] #Format: [scale_limit, prob] + rand_erasing: [0.5, 1] #Format: [p, max_count] + color: + type: ColorJitterTransform + clahe: 0.0 + colorjitter: 0.3 + gaussianblur: 0.3 + gaussnoise: 0.3 + jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively + rgbshift: 0.3 + randomcontrast: 0.0 + randomgamma: 0.5 + randombrightness: 1 + huesat: 1 + normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + +TRAIN: + resume: True + gpus: [0] + pretrained_model: './logs/27-03-2025/PoseEfficientNet_custom_laanet_model_final.pth' + batch_size: 32 + lr: 0.00005 + epochs: 150 + begin_epoch: 100 + warm_up: 6 + every_val_epochs: 1 + loss: + type: CombinedFocalLoss + use_target_weight: False + cls_lmda: 1 + dst_hm_cls_lmda: 0 + offset_lmda: 0 + hm_lmda: 100 + cstency_lmda: 100 + mse_reduction: sum + ce_reduction: mean + optimizer: SAM + distributed: False + tensorboard: False + resume: True + lr_scheduler: + # type: MultiStepLR + milestones: [5, 15, 20, 25] + gamma: 0.5 + freeze_backbone: True + debug: + active: False + save_hm_gt: True + save_hm_pred: True + +TEST: + gpus: [0] + subtask: 'eval' + test_file: '' + vis_hm: True + threshold: 0.5 + flip_test: True + video_level: True + pretrained: './training/weights/final_model.pth' diff --git a/configs/detector/detector2.yaml b/configs/detector/detector2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..050275df30864a8d247a57a0d7ea18786855fadc --- /dev/null +++ b/configs/detector/detector2.yaml @@ -0,0 +1,142 @@ +TASK: custom_laanet +PRECISION: float32 +METRICS_BASE: binary +SEED: 317 +DATA_RELOAD: False +Resume: True +begin_epoch: 100 + +MODEL: + # PRETRAINED_PATH: '' + type: PoseEfficientNet + model_name: efficientnet-b4 + num_layers: B4 + include_top: False + include_hm_decoder: True + head_conv: 64 + use_c2: False + use_c3: True + use_c4: True + use_c51: True + efpn: True + tfpn: False + se_layer: False + heads: + hm: 1 + cls: 1 + cstency: 256 + INIT_WEIGHTS: + pretrained: True + advprop: True + +DATASET: + type: BinaryFaceForensic + FROM_FILE: False + PIN_MEMORY: True + NUM_WORKERS: 7 + COLOR_NORM: 'simple' + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + IMAGE_SUFFIX: png + COMPRESSION: c0 + IMAGE_SUFFIX: png + IMAGE_SIZE: [384, 384] + HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4] + SIGMA: 2 + ADAPTIVE_SIGMA: True + HEATMAP_TYPE: gaussian + SPLIT_IMAGE: False + DATA: + TYPE: frames + SAMPLES_PER_VIDEO: + ACTIVE: True + TRAIN: 8 + VAL: 8 + TEST: 32 + TRAIN: + NAME: custom_dataset + ROOT: ./datasets/train/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + + VAL: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TEST: + NAME: custom_dataset + ROOT: ./datasets/test/ + FROM_FILE: False + FAKETYPE: [fake] + LABEL_FOLDER: [real, fake] + TRANSFORM: + geometry: + type: GeometryTransform + resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1 + normalize: 0 + horizontal_flip: 0.5 + cropping: [0.15, 0.5] #Format: [crop_limit, prob] + scale: [0.15, 0.5] #Format: [scale_limit, prob] + rand_erasing: [0.5, 1] #Format: [p, max_count] + color: + type: ColorJitterTransform + clahe: 0.0 + colorjitter: 0.3 + gaussianblur: 0.3 + gaussnoise: 0.3 + jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively + rgbshift: 0.3 + randomcontrast: 0.0 + randomgamma: 0.5 + randombrightness: 1 + huesat: 1 + normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + +TRAIN: + resume: True + gpus: [0] + pretrained_model: './logs/27-03-2025/PoseEfficientNet_custom_laanet_model_final.pth' + batch_size: 32 + lr: 0.00005 + epochs: 150 + begin_epoch: 100 + warm_up: 6 + every_val_epochs: 1 + loss: + type: CombinedFocalLoss + use_target_weight: False + cls_lmda: 1 + dst_hm_cls_lmda: 0 + offset_lmda: 0 + hm_lmda: 100 + cstency_lmda: 100 + mse_reduction: sum + ce_reduction: mean + optimizer: SAM + distributed: False + tensorboard: False + resume: True + lr_scheduler: + # type: MultiStepLR + milestones: [5, 15, 20, 25] + gamma: 0.5 + freeze_backbone: True + debug: + active: False + save_hm_gt: True + save_hm_pred: True + +TEST: + gpus: [0] + subtask: 'eval' + test_file: '' + vis_hm: True + threshold: 0.5 + flip_test: True + video_level: True + pretrained: './training/weights/final_model.pth' diff --git a/configs/detector/efn4_fpn_hm_adv.yaml b/configs/detector/efn4_fpn_hm_adv.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1fa9a014fc19c4567dd066c86bbf38c4383674e2 --- /dev/null +++ b/configs/detector/efn4_fpn_hm_adv.yaml @@ -0,0 +1,159 @@ +TASK: EFN_hm10_EFPN_NoBasedCLS_Focal_C2_256Cst100_32FXRayv2_SAM(Adam)_ADV_Era3_OutSoftMax_5e5_div4_FZ +PRECISION: float64 +METRICS_BASE: binary +SEED: 317 +DATA_RELOAD: True +DATASET: + type: HeatmapFaceForensic + TRAIN: True #Switch to True for training mode, False for testing mode + COMPRESSION: c0 + IMAGE_SUFFIX: png + NUM_WORKERS: 7 + PIN_MEMORY: True + IMAGE_SIZE: [384, 384] + HEATMAP_SIZE: [96, 96] #[IMAGE_SIZE//4, IMAGE_SIZE//4] + SIGMA: 2 + ADAPTIVE_SIGMA: True + HEATMAP_TYPE: gaussian + SPLIT_IMAGE: False + DATA: + TYPE: frames + SAMPLES_PER_VIDEO: + ACTIVE: True + TRAIN: 32 # Dynamically random number of frames in each epoch + VAL: 32 + TEST: 32 + TRAIN: + NAME: FF++ # This field to define datasets that can be used to train/in-dataset/cross-dataset evaluation + ROOT: /home/users//data/FaceForensics++/c0/ + # ROOT: /data/deepfake_cluster/datasets_df/FaceForensics++/c0/ + FROM_FILE: True + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] + # ANNO_FILE: train/frames/FaceXRay/train_FF_FaceXRay.json + ANNO_FILE: processed_data/c0/dynamic_trainBI_FFv2.json + LABEL_FOLDER: [real, fake] + VAL: + NAME: FF++ # This field to define datasets that can be used to train/in-dataset/cross-dataset evaluation + ROOT: /Deep/datasets/ + FROM_FILE: True + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] # Choosing Deepfake techniques to be loaded for dataloader + # ANNO_FILE: val/frames/FaceXRay/val_FF_FaceXRay.json + ANNO_FILE: processed_data/c0/dynamic_valBI_FFv2.json + LABEL_FOLDER: [real, fake] + TEST: + NAME: DFW + # ROOT: /home/users//data/FaceForensics++/c0/ + # ROOT: /home/users//data/Celeb-DFv1/ + # ROOT: /data/deepfake_cluster/datasets_df/Celeb-DFv1/ + # ROOT: /home/users//data/Celeb-DFv2/ + # ROOT: /home/users//data/DFDCP/ + # ROOT: /home/users//data/DFDC/ + # ROOT: /home/users//data/DFD/ + ROOT: /home/users//data/DFW/ + FROM_FILE: False + # FAKETYPE: [original, Deepfakes] + # FAKETYPE: [original, Deepfakes, Face2Face, FaceSwap, NeuralTextures] + # FAKETYPE: [Celeb-real, Celeb-synthesis, YouTube-real] + # FAKETYPE: [method_A, method_B, original_videos] + # FAKETYPE: [fake, real] + # FAKETYPE: [DeepFakeDetection_original, DeepFakeDetection] + FAKETYPE: [real_test, fake_test] + ANNO_FILE: FaceXRay/test/test_FF_Xray.json + LABEL_FOLDER: [real, fake] + TRANSFORM: + geometry: + type: GeometryTransform + resize: [384, 384, 0] #h, w, p=probability. If no affine transform, set p=1 + normalize: 0 + horizontal_flip: 0.5 + cropping: [0.15, 0.5] #Format: [crop_limit, prob] + scale: [0.15, 0.5] #Format: [scale_limit, prob] + rand_erasing: [0.5, 1] #Format: [p, max_count] + color: + type: ColorJitterTransform + clahe: 0.0 + colorjitter: 0.3 + gaussianblur: 0.3 + gaussnoise: 0.3 + jpegcompression: [0.5, 40, 100] # prob, lower and upper quality respectively + rgbshift: 0.3 + randomcontrast: 0.0 + randomgamma: 0.5 + randombrightness: 1 + huesat: 1 + normalize: + mean: [0.5, 0.5, 0.5] + std: [0.5, 0.5, 0.5] + DEBUG: False + DYNAMIC_FXRAY: True +MODEL: + type: PoseEfficientNet + model_name: efficientnet-b4 + num_layers: B4 + include_top: False + include_hm_decoder: True + head_conv: 64 + use_c2: False + use_c3: True + use_c4: True + use_c51: True + efpn: True + tfpn: False + se_layer: False + heads: + hm: 1 + cls: 1 + cstency: 256 + INIT_WEIGHTS: + pretrained: True + advprop: True +TRAIN: + gpus: [0] + batch_size: 16 + lr: 0.00005 + epochs: 100 + begin_epoch: -1 + warm_up: 6 + every_val_epochs: 1 + loss: + type: CombinedFocalLoss + use_target_weight: False + cls_lmda: 1 + dst_hm_cls_lmda: 0 + offset_lmda: 0 + hm_lmda: 10 + cstency_lmda: 100 + mse_reduction: sum + ce_reduction: mean + optimizer: SAM + distributed: False + pretrained: '~/Deep/training/weights/LALALAND1.pth' + tensorboard: True + resume: True + lr_scheduler: + # type: MultiStepLR + milestones: [5, 15, 20, 25] + gamma: 0.5 + freeze_backbone: True + debug: + active: False + save_hm_gt: True + save_hm_pred: True +TEST: + gpus: [0] + subtask: 'eval' + test_file: '' + vis_hm: True + threshold: 0.5 + flip_test: True + video_level: True + pretrained: '~/Deep/training/weights/LALALAND1.pth' +PREPROCESSING: + DATASET: FaceForensics + SPLIT: train + ROOT: ~/Deep/datasets/ + FAKETYPE: [Deepfakes, Face2Face, FaceSwap, NeuralTextures] + IMAGE_SUFFIX: jpg + DATA_TYPE: images + LABEL: [real, fake] + DEBUG: False diff --git a/configs/get_config.py b/configs/get_config.py new file mode 100644 index 0000000000000000000000000000000000000000..386388084c5572ba252131021d214fbeabcd238b --- /dev/null +++ b/configs/get_config.py @@ -0,0 +1,16 @@ +#-*- coding: utf-8 -*- +import os + +from yaml import load, dump +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper +from box import Box as edict + + +def load_config(cfg): + with open(cfg) as f: + config = load(f, Loader=Loader) + + return edict(config) diff --git a/configs/test_config.yaml b/configs/test_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ea8ddc7960bfa196f1e41850b6b59f992d233aa --- /dev/null +++ b/configs/test_config.yaml @@ -0,0 +1,38 @@ +mode: test +lmdb: False +rgb_dir: '/ssd_scratch/deep_fake_dataset/' +lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/' +dataset_json_folder: './preprocessing/dataset_json_v6/' +label_dict: + # DFD + DFD_fake: 1 + DFD_real: 0 + # FF++ + FaceShifter(FF-real+FF-FH) + FF-SH: 1 + FF-F2F: 1 + FF-DF: 1 + FF-FS: 1 + FF-NT: 1 + FF-FH: 1 + FF-real: 0 + # CelebDF + CelebDFv1_real: 0 + CelebDFv1_fake: 1 + CelebDFv2_real: 0 + CelebDFv2_fake: 1 + # DFDCP + DFDCP_Real: 0 + DFDCP_FakeA: 1 + DFDCP_FakeB: 1 + # DFDC + DFDC_Fake: 1 + DFDC_Real: 0 + # DeeperForensics-1.0 + DF_fake: 1 + DF_real: 0 + # UADFV + UADFV_Fake: 1 + UADFV_Real: 0 + # Roop + roop_Real: 0 + roop_Fake: 1 diff --git a/configs/train_config copie.yaml b/configs/train_config copie.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9d1f53b1521b6d24f7d8de857c0ab6872241572 --- /dev/null +++ b/configs/train_config copie.yaml @@ -0,0 +1,43 @@ +mode: train +lmdb: False +dry_run: False +rgb_dir: '/ssd_scratch/deep_fake_dataset/' +lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/' +dataset_json_folder: './preprocessing/dataset_json_v6/' +SWA: False +save_avg: True +log_dir: ./logs/training/ +# label settings +label_dict: + # DFD + DFD_fake: 1 + DFD_real: 0 + # FF++ + FaceShifter(FF-real+FF-FH) + FF-SH: 1 + FF-F2F: 1 + FF-DF: 1 + FF-FS: 1 + FF-NT: 1 + FF-FH: 1 + FF-real: 0 + # CelebDF + CelebDFv1_real: 0 + CelebDFv1_fake: 1 + CelebDFv2_real: 0 + CelebDFv2_fake: 1 + # DFDCP + DFDCP_Real: 0 + DFDCP_FakeA: 1 + DFDCP_FakeB: 1 + # DFDC + DFDC_Fake: 1 + DFDC_Real: 0 + # DeeperForensics-1.0 + DF_fake: 1 + DF_real: 0 + # UADFV + UADFV_Fake: 1 + UADFV_Real: 0 + # Roop + roop_Real: 0 + roop_Fake: 1 diff --git a/configs/train_config.yaml b/configs/train_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..02e4a2af68449d67d336ba4ee5ba8a1b4ec367ea --- /dev/null +++ b/configs/train_config.yaml @@ -0,0 +1,46 @@ +mode: train +lmdb: False +dry_run: False +rgb_dir: '/ssd_scratch/deep_fake_dataset/' +lmdb_dir: '/ssd_scratch/deep_fake_dataset/datasets_lmdbs/' +dataset_json_folder: './preprocessing/dataset_json_v6/' +SWA: False +save_avg: True +log_dir: ./logs/training/ +# label settings +label_dict: + # iFakeFaceDB labels + real: 0 + fake: 1 + # DFD + DFD_fake: 1 + DFD_real: 0 + # FF++ + FaceShifter(FF-real+FF-FH) + FF-SH: 1 + FF-F2F: 1 + FF-DF: 1 + FF-FS: 1 + FF-NT: 1 + FF-FH: 1 + FF-real: 0 + # CelebDF + CelebDFv1_real: 0 + CelebDFv1_fake: 1 + CelebDFv2_real: 0 + CelebDFv2_fake: 1 + # DFDCP + DFDCP_Real: 0 + DFDCP_FakeA: 1 + DFDCP_FakeB: 1 + # DFDC + DFDC_Fake: 1 + DFDC_Real: 0 + # DeeperForensics-1.0 + DF_fake: 1 + DF_real: 0 + # UADFV + UADFV_Fake: 1 + UADFV_Real: 0 + # Roop + roop_Real: 0 + roop_Fake: 1 diff --git a/loss/__init__.py b/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..35db7f48eaf2934ec917019df03f8ba65b10c5df --- /dev/null +++ b/loss/__init__.py @@ -0,0 +1,11 @@ +import os +import sys +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(os.path.dirname(current_file_path)) +project_root_dir = os.path.dirname(parent_dir) +sys.path.append(parent_dir) +sys.path.append(project_root_dir) + +from metrics.registry import LOSSFUNC + +from .cross_entropy_loss import CrossEntropyLoss diff --git a/loss/__pycache__/__init__.cpython-312.pyc b/loss/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8dfeb279ab0ece3db9acdc7f1a6a489da9f7d35 Binary files /dev/null and b/loss/__pycache__/__init__.cpython-312.pyc differ diff --git a/loss/__pycache__/abstract_loss_func.cpython-312.pyc b/loss/__pycache__/abstract_loss_func.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..000c682e9a40cebcea8e7ebe6851f8d3ea6c2cc4 Binary files /dev/null and b/loss/__pycache__/abstract_loss_func.cpython-312.pyc differ diff --git a/loss/__pycache__/cross_entropy_loss.cpython-312.pyc b/loss/__pycache__/cross_entropy_loss.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a71613e80e310d3a2ca2a51766fef0cb44ed26b Binary files /dev/null and b/loss/__pycache__/cross_entropy_loss.cpython-312.pyc differ diff --git a/loss/abstract_loss_func.py b/loss/abstract_loss_func.py new file mode 100644 index 0000000000000000000000000000000000000000..45d3324ed53be4310867b326e9eaabd265634138 --- /dev/null +++ b/loss/abstract_loss_func.py @@ -0,0 +1,17 @@ +import torch.nn as nn + +class AbstractLossClass(nn.Module): + """Abstract class for loss functions.""" + def __init__(self): + super(AbstractLossClass, self).__init__() + + def forward(self, pred, label): + """ + Args: + pred: prediction of the model + label: ground truth label + + Return: + loss: loss value + """ + raise NotImplementedError('Each subclass should implement the forward method.') diff --git a/loss/cross_entropy_loss.py b/loss/cross_entropy_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..efa7123ed0ee0516743fa41d43b53e063c21a460 --- /dev/null +++ b/loss/cross_entropy_loss.py @@ -0,0 +1,26 @@ +import torch.nn as nn +from .abstract_loss_func import AbstractLossClass +from metrics.registry import LOSSFUNC + + +@LOSSFUNC.register_module(module_name="cross_entropy") +class CrossEntropyLoss(AbstractLossClass): + def __init__(self): + super().__init__() + self.loss_fn = nn.CrossEntropyLoss() + + def forward(self, inputs, targets): + """ + Computes the cross-entropy loss. + + Args: + inputs: A PyTorch tensor of size (batch_size, num_classes) containing the predicted scores. + targets: A PyTorch tensor of size (batch_size) containing the ground-truth class indices. + + Returns: + A scalar tensor representing the cross-entropy loss. + """ + # Compute the cross-entropy loss + loss = self.loss_fn(inputs, targets) + + return loss \ No newline at end of file diff --git a/metrics/__init__.py b/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..676145d777810e4a51bdaf59fdec4f5358aae349 --- /dev/null +++ b/metrics/__init__.py @@ -0,0 +1,7 @@ +import os +import sys +current_file_path = os.path.abspath(__file__) +parent_dir = os.path.dirname(os.path.dirname(current_file_path)) +project_root_dir = os.path.dirname(parent_dir) +sys.path.append(parent_dir) +sys.path.append(project_root_dir) diff --git a/metrics/__pycache__/__init__.cpython-312.pyc b/metrics/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb3899dc675c252eac1b136f3bd52509d4ae9f66 Binary files /dev/null and b/metrics/__pycache__/__init__.cpython-312.pyc differ diff --git a/metrics/__pycache__/base_metrics_class.cpython-312.pyc b/metrics/__pycache__/base_metrics_class.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17ca0d2a33164f756031e944018abce4b1d2ac88 Binary files /dev/null and b/metrics/__pycache__/base_metrics_class.cpython-312.pyc differ diff --git a/metrics/__pycache__/registry.cpython-312.pyc b/metrics/__pycache__/registry.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87f6724f3392d7670d3d37485a155e54073afc66 Binary files /dev/null and b/metrics/__pycache__/registry.cpython-312.pyc differ diff --git a/metrics/__pycache__/utils.cpython-312.pyc b/metrics/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c8e9e6661c6e26905af9c86c4bcdfde5cd5e663 Binary files /dev/null and b/metrics/__pycache__/utils.cpython-312.pyc differ diff --git a/metrics/base_metrics_class.py b/metrics/base_metrics_class.py new file mode 100644 index 0000000000000000000000000000000000000000..1e77a4f7df880ae22933722e5c692dcb9343be07 --- /dev/null +++ b/metrics/base_metrics_class.py @@ -0,0 +1,204 @@ +import numpy as np +from sklearn import metrics +import torch +import torch.nn as nn + + +def get_accracy(output, label): + _, prediction = torch.max(output, 1) # argmax + correct = (prediction == label).sum().item() + accuracy = correct / prediction.size(0) + return accuracy + + +def get_prediction(output, label): + prob = nn.functional.softmax(output, dim=1)[:, 1] + prob = prob.view(prob.size(0), 1) + label = label.view(label.size(0), 1) + #print(prob.size(), label.size()) + datas = torch.cat((prob, label.float()), dim=1) + return datas + + +def calculate_metrics_for_train(label, output): + if output.size(1) == 2: + prob = torch.softmax(output, dim=1)[:, 1] + else: + prob = output + + # Accuracy + _, prediction = torch.max(output, 1) + correct = (prediction == label).sum().item() + accuracy = correct / prediction.size(0) + + # Average Precision + y_true = label.cpu().detach().numpy() + y_pred = prob.cpu().detach().numpy() + ap = metrics.average_precision_score(y_true, y_pred) + + # AUC and EER + try: + fpr, tpr, thresholds = metrics.roc_curve(label.squeeze().cpu().numpy(), + prob.squeeze().cpu().numpy(), + pos_label=1) + except: + # for the case when we only have one sample + return None, None, accuracy, ap + + if np.isnan(fpr[0]) or np.isnan(tpr[0]): + # for the case when all the samples within a batch is fake/real + auc, eer = None, None + else: + auc = metrics.auc(fpr, tpr) + fnr = 1 - tpr + eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + + return auc, eer, accuracy, ap + + +# ------------ compute average metrics of batches--------------------- +class Metrics_batch(): + def __init__(self): + self.tprs = [] + self.mean_fpr = np.linspace(0, 1, 100) + self.aucs = [] + self.eers = [] + self.aps = [] + + self.correct = 0 + self.total = 0 + self.losses = [] + + def update(self, label, output): + acc = self._update_acc(label, output) + if output.size(1) == 2: + prob = torch.softmax(output, dim=1)[:, 1] + else: + prob = output + #label = 1-label + #prob = torch.softmax(output, dim=1)[:, 1] + auc, eer = self._update_auc(label, prob) + ap = self._update_ap(label, prob) + + return acc, auc, eer, ap + + def _update_auc(self, lab, prob): + fpr, tpr, thresholds = metrics.roc_curve(lab.squeeze().cpu().numpy(), + prob.squeeze().cpu().numpy(), + pos_label=1) + if np.isnan(fpr[0]) or np.isnan(tpr[0]): + return -1, -1 + + auc = metrics.auc(fpr, tpr) + interp_tpr = np.interp(self.mean_fpr, fpr, tpr) + interp_tpr[0] = 0.0 + self.tprs.append(interp_tpr) + self.aucs.append(auc) + + # return auc + + # EER + fnr = 1 - tpr + eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + self.eers.append(eer) + + return auc, eer + + def _update_acc(self, lab, output): + _, prediction = torch.max(output, 1) # argmax + correct = (prediction == lab).sum().item() + accuracy = correct / prediction.size(0) + # self.accs.append(accuracy) + self.correct = self.correct+correct + self.total = self.total+lab.size(0) + return accuracy + + def _update_ap(self, label, prob): + y_true = label.cpu().detach().numpy() + y_pred = prob.cpu().detach().numpy() + ap = metrics.average_precision_score(y_true,y_pred) + self.aps.append(ap) + + return np.mean(ap) + + def get_mean_metrics(self): + mean_acc, std_acc = self.correct/self.total, 0 + mean_auc, std_auc = self._mean_auc() + mean_err, std_err = np.mean(self.eers), np.std(self.eers) + mean_ap, std_ap = np.mean(self.aps), np.std(self.aps) + + return {'acc':mean_acc, 'auc':mean_auc, 'eer':mean_err, 'ap':mean_ap} + + def _mean_auc(self): + mean_tpr = np.mean(self.tprs, axis=0) + mean_tpr[-1] = 1.0 + mean_auc = metrics.auc(self.mean_fpr, mean_tpr) + std_auc = np.std(self.aucs) + return mean_auc, std_auc + + def clear(self): + self.tprs.clear() + self.aucs.clear() + # self.accs.clear() + self.correct=0 + self.total=0 + self.eers.clear() + self.aps.clear() + self.losses.clear() + + +# ------------ compute average metrics of all data --------------------- +class Metrics_all(): + def __init__(self): + self.probs = [] + self.labels = [] + self.correct = 0 + self.total = 0 + + def store(self, label, output): + prob = torch.softmax(output, dim=1)[:, 1] + _, prediction = torch.max(output, 1) # argmax + correct = (prediction == label).sum().item() + self.correct += correct + self.total += label.size(0) + self.labels.append(label.squeeze().cpu().numpy()) + self.probs.append(prob.squeeze().cpu().numpy()) + + def get_metrics(self): + y_pred = np.concatenate(self.probs) + y_true = np.concatenate(self.labels) + # auc + fpr, tpr, thresholds = metrics.roc_curve(y_true,y_pred,pos_label=1) + auc = metrics.auc(fpr, tpr) + # eer + fnr = 1 - tpr + eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + # ap + ap = metrics.average_precision_score(y_true,y_pred) + # acc + acc = self.correct / self.total + return {'acc':acc, 'auc':auc, 'eer':eer, 'ap':ap} + + def clear(self): + self.probs.clear() + self.labels.clear() + self.correct = 0 + self.total = 0 + + +# only used to record a series of scalar value +class Recorder: + def __init__(self): + self.sum = 0 + self.num = 0 + def update(self, item, num=1): + if item is not None: + self.sum += item * num + self.num += num + def average(self): + if self.num == 0: + return None + return self.sum/self.num + def clear(self): + self.sum = 0 + self.num = 0 diff --git a/metrics/registry.py b/metrics/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..1608e4513f5144dfc22d0f29d15a926d693f939d --- /dev/null +++ b/metrics/registry.py @@ -0,0 +1,19 @@ +class Registry(object): + def __init__(self): + self.data = {} + + def register_module(self, module_name=None): + def _register(cls): + name = module_name + if module_name is None: + name = cls.__name__ + self.data[name] = cls + return cls + return _register + + def __getitem__(self, key): + return self.data[key] + +DETECTOR = Registry() +TRAINER = Registry() +LOSSFUNC = Registry() diff --git a/metrics/utils.py b/metrics/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bb08c6a08f0867afc094402f333154c124ac88a4 --- /dev/null +++ b/metrics/utils.py @@ -0,0 +1,92 @@ +from sklearn import metrics +import numpy as np + +def parse_metric_for_print(metric_dict): + if metric_dict is None: + return "\n" + str = "\n" + str += "================================ Each dataset best metric ================================ \n" + for key, value in metric_dict.items(): + if key != 'avg': + str= str+ f"| {key}: " + for k,v in value.items(): + str = str + f" {k}={v} " + str= str+ "| \n" + else: + str += "============================================================================================= \n" + str += "================================== Average best metric ====================================== \n" + avg_dict = value + for avg_key, avg_value in avg_dict.items(): + if avg_key == 'dataset_dict': + for key,value in avg_value.items(): + str = str + f"| {key}: {value} | \n" + else: + str = str + f"| avg {avg_key}: {avg_value} | \n" + str += "=============================================================================================" + return str + + +def get_test_metrics(y_pred, y_true, img_names): + def get_video_metrics(image, pred, label): + result_dict = {} + new_label = [] + new_pred = [] + # print(image[0]) + # print(pred.shape) + # print(label.shape) + for item in np.transpose(np.stack((image, pred, label)), (1, 0)): + + s = item[0] + if '\\' in s: + parts = s.split('\\') + else: + parts = s.split('/') + a = parts[-2] + b = parts[-1] + + if a not in result_dict: + result_dict[a] = [] + + result_dict[a].append(item) + image_arr = list(result_dict.values()) + + for video in image_arr: + pred_sum = 0 + label_sum = 0 + leng = 0 + for frame in video: + pred_sum += float(frame[1]) + label_sum += int(frame[2]) + leng += 1 + new_pred.append(pred_sum / leng) + new_label.append(int(label_sum / leng)) + fpr, tpr, thresholds = metrics.roc_curve(new_label, new_pred) + v_auc = metrics.auc(fpr, tpr) + fnr = 1 - tpr + v_eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + return v_auc, v_eer + + + y_pred = y_pred.squeeze() + # For UCF, where labels for different manipulations are not consistent. + y_true[y_true >= 1] = 1 + # auc + fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred, pos_label=1) + auc = metrics.auc(fpr, tpr) + # eer + fnr = 1 - tpr + eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))] + # ap + ap = metrics.average_precision_score(y_true, y_pred) + # acc + prediction_class = (y_pred > 0.5).astype(int) + correct = (prediction_class == np.clip(y_true, a_min=0, a_max=1)).sum().item() + acc = correct / len(prediction_class) + if type(img_names[0]) is not list: + # calculate video-level auc for the frame-level methods. + v_auc, _ = get_video_metrics(img_names, y_pred, y_true) + else: + # video-level methods + v_auc=auc + + return {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap, 'pred': y_pred, 'video_auc': v_auc, 'label': y_true} diff --git a/models/.DS_Store b/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..95b9267f9e928fab38455455f1493fff55c85cce Binary files /dev/null and b/models/.DS_Store differ diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d8fd823836c9502b9dc8f1c46078bf56642740ec --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,29 @@ +#-*- coding: utf-8 -*- +from .builder import MODELS, build_model +from .networks.arcface import ( + SimpleClassificationDF, +) +from .networks.mrsa_resnet import ( + PoseResNet, resnet_spec, Bottleneck +) +from .networks.pose_hrnet import ( + PoseHighResolutionNet +) +from .networks.xception import ( + Xception +) +from.networks.pose_efficientNet import ( + PoseEfficientNet +) +from .networks.common import * +from .utils import ( + load_pretrained, freeze_backbone, + load_model, save_model, unfreeze_backbone, + preset_model, +) + + +__all__=['SimpleClassificationDF', 'PoseResNet', 'MODELS', 'build_model', + 'load_pretrained', 'freeze_backbone', 'resnet_spec', + 'load_model', 'save_model', 'unfreeze_backbone', 'Bottleneck', + 'preset_model', 'PoseHighResolutionNet', 'Xception', 'PoseEfficientNet'] diff --git a/models/__pycache__/__init__.cpython-310.pyc b/models/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dcd95eb4c6cba7bf39d236676e13e85d067030d Binary files /dev/null and b/models/__pycache__/__init__.cpython-310.pyc differ diff --git a/models/__pycache__/__init__.cpython-312.pyc b/models/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..57a9942c7f871682adc468658840a5f1d6c52e1f Binary files /dev/null and b/models/__pycache__/__init__.cpython-312.pyc differ diff --git a/models/__pycache__/builder.cpython-310.pyc b/models/__pycache__/builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9f3f626a49f1b4450358cb3315fa8135282417c3 Binary files /dev/null and b/models/__pycache__/builder.cpython-310.pyc differ diff --git a/models/__pycache__/builder.cpython-312.pyc b/models/__pycache__/builder.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1c9732f1dd31d58fddbd7592258b5e54e6426d7 Binary files /dev/null and b/models/__pycache__/builder.cpython-312.pyc differ diff --git a/models/__pycache__/utils.cpython-310.pyc b/models/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f755ed67c03b4ae1bb6d67c14f1dc3b83e374b30 Binary files /dev/null and b/models/__pycache__/utils.cpython-310.pyc differ diff --git a/models/__pycache__/utils.cpython-312.pyc b/models/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89d8b21291624bcc73166eb72a92fcf5da3a4a92 Binary files /dev/null and b/models/__pycache__/utils.cpython-312.pyc differ diff --git a/models/builder.py b/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..e5fb85d80da3ab748047f1cfc9a8e4d41456db3e --- /dev/null +++ b/models/builder.py @@ -0,0 +1,45 @@ +#-*- coding: utf-8 -*- +from typing import Dict, Any, Optional + +import os +import sys +if not os.getcwd() in sys.path: + sys.path.append(os.getcwd()) + +from torch.nn import Sequential + +from register.register import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) +HEADS = MODELS +BACKBONES = MODELS + + +def build_model(cfg: Dict, + model: Registry, + build_func=build_model_from_cfg, + default_args: Optional[Dict] = None) -> Any: + return build_func(cfg, model, default_args) diff --git a/models/networks/__pycache__/arcface.cpython-310.pyc b/models/networks/__pycache__/arcface.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f585f9cb59d029242b6286bb94394d93d75f5b27 Binary files /dev/null and b/models/networks/__pycache__/arcface.cpython-310.pyc differ diff --git a/models/networks/__pycache__/arcface.cpython-312.pyc b/models/networks/__pycache__/arcface.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91d8dea6a4b13ccd377969fb4a450a35d6e0a2d9 Binary files /dev/null and b/models/networks/__pycache__/arcface.cpython-312.pyc differ diff --git a/models/networks/__pycache__/common.cpython-310.pyc b/models/networks/__pycache__/common.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27c3af178582067ac014e819a7a638d2817de0c5 Binary files /dev/null and b/models/networks/__pycache__/common.cpython-310.pyc differ diff --git a/models/networks/__pycache__/common.cpython-312.pyc b/models/networks/__pycache__/common.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f4ed32f3aebd93c50afe71da560239af3ea2a8b5 Binary files /dev/null and b/models/networks/__pycache__/common.cpython-312.pyc differ diff --git a/models/networks/__pycache__/efficientNet.cpython-310.pyc b/models/networks/__pycache__/efficientNet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfb3419e5e7530acdfac08d3d2ed398da5a8ab4f Binary files /dev/null and b/models/networks/__pycache__/efficientNet.cpython-310.pyc differ diff --git a/models/networks/__pycache__/efficientNet.cpython-312.pyc b/models/networks/__pycache__/efficientNet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca4061d8c546927380c883b6dcc33c7996695da6 Binary files /dev/null and b/models/networks/__pycache__/efficientNet.cpython-312.pyc differ diff --git a/models/networks/__pycache__/mrsa_resnet.cpython-310.pyc b/models/networks/__pycache__/mrsa_resnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b7260fc81bffbeeb040d5442e9de8f0ebed6dff Binary files /dev/null and b/models/networks/__pycache__/mrsa_resnet.cpython-310.pyc differ diff --git a/models/networks/__pycache__/mrsa_resnet.cpython-312.pyc b/models/networks/__pycache__/mrsa_resnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b90acb210b955e5547b53df81eb57baf6eab30d Binary files /dev/null and b/models/networks/__pycache__/mrsa_resnet.cpython-312.pyc differ diff --git a/models/networks/__pycache__/pose_efficientNet.cpython-310.pyc b/models/networks/__pycache__/pose_efficientNet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9e87102e8fe7c7b05e9e651d8a82804a52029b7 Binary files /dev/null and b/models/networks/__pycache__/pose_efficientNet.cpython-310.pyc differ diff --git a/models/networks/__pycache__/pose_efficientNet.cpython-312.pyc b/models/networks/__pycache__/pose_efficientNet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7cc69b0813a1945867f7b1c815b54a5d381fc495 Binary files /dev/null and b/models/networks/__pycache__/pose_efficientNet.cpython-312.pyc differ diff --git a/models/networks/__pycache__/pose_hrnet.cpython-310.pyc b/models/networks/__pycache__/pose_hrnet.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f366985d674d48ecd026b859aa2758b38697d2ec Binary files /dev/null and b/models/networks/__pycache__/pose_hrnet.cpython-310.pyc differ diff --git a/models/networks/__pycache__/pose_hrnet.cpython-312.pyc b/models/networks/__pycache__/pose_hrnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ace8e21a246ea696dd41b16a75302d13ac7a318d Binary files /dev/null and b/models/networks/__pycache__/pose_hrnet.cpython-312.pyc differ diff --git a/models/networks/__pycache__/xception.cpython-310.pyc b/models/networks/__pycache__/xception.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..863a9f309acc59e473b734b44518d6910862ab83 Binary files /dev/null and b/models/networks/__pycache__/xception.cpython-310.pyc differ diff --git a/models/networks/__pycache__/xception.cpython-312.pyc b/models/networks/__pycache__/xception.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5770cee462869dba41158c927a092ef66176fdb6 Binary files /dev/null and b/models/networks/__pycache__/xception.cpython-312.pyc differ diff --git a/models/networks/arcface.py b/models/networks/arcface.py new file mode 100644 index 0000000000000000000000000000000000000000..1613d6021974bb4b122db085fd8e8d6a4d3828d9 --- /dev/null +++ b/models/networks/arcface.py @@ -0,0 +1,384 @@ +#-*- coding: utf-8 -*- +import os +import math +from collections import namedtuple + +from torch.nn import (Linear, Conv2d, BatchNorm1d, Softmax, + BatchNorm2d, PReLU, ReLU, Sigmoid, + Dropout2d, Dropout, AvgPool2d, MaxPool2d, + AdaptiveAvgPool2d, Sequential, Module, Parameter) +import torch.nn.functional as F +import torch + +from ..builder import ( + MODELS, HEADS, BACKBONES, + build_model, +) + + +################################## Original Arcface Model ############################################################# + + +class Flatten(Module): + def forward(self, input): + return input.view(input.size(0), -1) + + +def l2_norm(input,axis=1): + norm = torch.norm(input, 2, axis, True) + output = torch.div(input, norm) + return output + + +class SEModule(Module): + def __init__(self, channels, reduction): + super(SEModule, self).__init__() + self.avg_pool = AdaptiveAvgPool2d(1) + self.fc1 = Conv2d( + channels, channels // reduction, kernel_size=1, padding=0 ,bias=False) + self.relu = ReLU(inplace=True) + self.fc2 = Conv2d( + channels // reduction, channels, kernel_size=1, padding=0 ,bias=False) + self.sigmoid = Sigmoid() + + def forward(self, x): + module_input = x + x = self.avg_pool(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.sigmoid(x) + return module_input * x + + +class bottleneck_IR(Module): + def __init__(self, in_channel, depth, stride): + super(bottleneck_IR, self).__init__() + if in_channel == depth: + self.shortcut_layer = MaxPool2d(1, stride) + else: + self.shortcut_layer = Sequential( + Conv2d(in_channel, depth, (1, 1), stride ,bias=False), BatchNorm2d(depth)) + self.res_layer = Sequential( + BatchNorm2d(in_channel), + Conv2d(in_channel, depth, (3, 3), (1, 1), 1 ,bias=False), PReLU(depth), + Conv2d(depth, depth, (3, 3), stride, 1 ,bias=False), BatchNorm2d(depth)) + + def forward(self, x): + shortcut = self.shortcut_layer(x) + res = self.res_layer(x) + return res + shortcut + + +class bottleneck_IR_SE(Module): + def __init__(self, in_channel, depth, stride): + super(bottleneck_IR_SE, self).__init__() + if in_channel == depth: + self.shortcut_layer = MaxPool2d(1, stride) + else: + self.shortcut_layer = Sequential( + Conv2d(in_channel, depth, (1, 1), stride ,bias=False), + BatchNorm2d(depth)) + self.res_layer = Sequential( + BatchNorm2d(in_channel), + Conv2d(in_channel, depth, (3,3), (1,1),1 ,bias=False), + PReLU(depth), + Conv2d(depth, depth, (3,3), stride, 1 ,bias=False), + BatchNorm2d(depth), + SEModule(depth,16) + ) + + def forward(self,x): + shortcut = self.shortcut_layer(x) + res = self.res_layer(x) + return res + shortcut + + +class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): + '''A named tuple describing a ResNet block.''' + + +def get_block(in_channel, depth, num_units, stride = 2): + return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units-1)] + + +def get_blocks(num_layers): + if num_layers == 50: + blocks = [ + get_block(in_channel=64, depth=64, num_units = 3), + get_block(in_channel=64, depth=128, num_units=4), + get_block(in_channel=128, depth=256, num_units=14), + get_block(in_channel=256, depth=512, num_units=3) + ] + elif num_layers == 100: + blocks = [ + get_block(in_channel=64, depth=64, num_units=3), + get_block(in_channel=64, depth=128, num_units=13), + get_block(in_channel=128, depth=256, num_units=30), + get_block(in_channel=256, depth=512, num_units=3) + ] + elif num_layers == 152: + blocks = [ + get_block(in_channel=64, depth=64, num_units=3), + get_block(in_channel=64, depth=128, num_units=8), + get_block(in_channel=128, depth=256, num_units=36), + get_block(in_channel=256, depth=512, num_units=3) + ] + return blocks + + +@BACKBONES.register_module() +class ResNet(Module): + def __init__(self, num_layers=50, drop_ratio=0.6, mode='ir', **kwargs): + """ + Implementation for ResNet 50, 101, 152 with/out SE module + """ + super(ResNet, self).__init__() + assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' + assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' + blocks = get_blocks(num_layers) + if mode == 'ir': + unit_module = bottleneck_IR + elif mode == 'ir_se': + unit_module = bottleneck_IR_SE + self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1 ,bias=False), + BatchNorm2d(64), + PReLU(64)) + self.output_layer = Sequential(BatchNorm2d(512), + Dropout(drop_ratio), + Flatten(), + Linear(512 * 7 * 7, 512), + BatchNorm1d(512)) + modules = [] + for block in blocks: + for bottleneck in block: + modules.append( + unit_module(bottleneck.in_channel, + bottleneck.depth, + bottleneck.stride)) + self.body = Sequential(*modules) + + def forward(self,x): + x = self.input_layer(x) + x = self.body(x) + x = self.output_layer(x) + x = l2_norm(x) + return x + + +@HEADS.register_module() +class SimpleClassificationHead(Module): + def __init__(self, drop_ratio=0.6, in_planes=512, **kwargs): + super(SimpleClassificationHead, self).__init__() + self.classification_head = Sequential(Dropout(drop_ratio), + Linear(in_planes, 256), + BatchNorm1d(256), + Dropout(drop_ratio), + Linear(256, 128), + BatchNorm1d(128), + Dropout(drop_ratio), + Linear(128, 64), + BatchNorm1d(64), + Dropout(drop_ratio), + Linear(64, 32), + BatchNorm1d(32), + # Dropout(drop_ratio), + Linear(32, 1), + Sigmoid()) + + def forward(self, x): + x = self.classification_head(x) + return x + + +@MODELS.register_module() +class SimpleClassificationDF(Module): + def __init__(self, cfg: dict, **kwargs): + super(SimpleClassificationDF, self).__init__() + assert 'backbone' in cfg, 'Config for Backbones is mandatory!' + assert 'head' in cfg, 'Config for Heads is mandatory!' + + self.backbone = BACKBONES.get(cfg.backbone.type)(**cfg.backbone) + self.head = HEADS.get(cfg.head.type)(**cfg.head) + self.model = Sequential(*[self.backbone, + self.head]) + + def forward(self, x): + x = self.model(x) + return x + + +################################## MobileFaceNet ############################################################# + + +class Conv_block(Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(Conv_block, self).__init__() + self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) + self.bn = BatchNorm2d(out_c) + self.prelu = PReLU(out_c) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.prelu(x) + return x + + +class Linear_block(Module): + def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1): + super(Linear_block, self).__init__() + self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False) + self.bn = BatchNorm2d(out_c) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class Depth_Wise(Module): + def __init__(self, in_c, out_c, residual = False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1): + super(Depth_Wise, self).__init__() + self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) + self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride) + self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)) + self.residual = residual + + def forward(self, x): + if self.residual: + short_cut = x + x = self.conv(x) + x = self.conv_dw(x) + x = self.project(x) + if self.residual: + output = short_cut + x + else: + output = x + return output + + +class Residual(Module): + def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)): + super(Residual, self).__init__() + modules = [] + for _ in range(num_block): + modules.append(Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups)) + self.model = Sequential(*modules) + + def forward(self, x): + return self.model(x) + + +class MobileFaceNet(Module): + def __init__(self, embedding_size): + super(MobileFaceNet, self).__init__() + self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) + self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) + self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) + self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) + self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) + self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) + self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) + self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0)) + self.conv_6_flatten = Flatten() + self.linear = Linear(512, embedding_size, bias=False) + self.bn = BatchNorm1d(embedding_size) + + def forward(self, x): + out = self.conv1(x) + out = self.conv2_dw(out) + out = self.conv_23(out) + out = self.conv_3(out) + out = self.conv_34(out) + out = self.conv_4(out) + out = self.conv_45(out) + out = self.conv_5(out) + out = self.conv_6_sep(out) + out = self.conv_6_dw(out) + out = self.conv_6_flatten(out) + out = self.linear(out) + out = self.bn(out) + + return l2_norm(out) + + +################################## Arcface head ############################################################# + + +class Arcface(Module): + # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599 + def __init__(self, embedding_size=512, classnum=51332, s=64., m=0.5): + super(Arcface, self).__init__() + self.classnum = classnum + self.kernel = Parameter(torch.Tensor(embedding_size,classnum)) + # initial kernel + self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5) + self.m = m # the margin value, default is 0.5 + self.s = s # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369 + self.cos_m = math.cos(m) + self.sin_m = math.sin(m) + self.mm = self.sin_m * m # issue 1 + self.threshold = math.cos(math.pi - m) + + def forward(self, embbedings, label): + # weights norm + nB = len(embbedings) + kernel_norm = l2_norm(self.kernel,axis=0) + # cos(theta+m) + cos_theta = torch.mm(embbedings,kernel_norm) +# output = torch.mm(embbedings,kernel_norm) + cos_theta = cos_theta.clamp(-1,1) # for numerical stability + cos_theta_2 = torch.pow(cos_theta, 2) + sin_theta_2 = 1 - cos_theta_2 + sin_theta = torch.sqrt(sin_theta_2) + cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m) + # this condition controls the theta+m should in range [0, pi] + # 0<=theta+m<=pi + # -m<=theta<=pi-m + cond_v = cos_theta - self.threshold + cond_mask = cond_v <= 0 + keep_val = (cos_theta - self.mm) # when theta not in [0,pi], use cosface instead + cos_theta_m[cond_mask] = keep_val[cond_mask] + output = cos_theta * 1.0 # a little bit hacky way to prevent in_place operation on cos_theta + idx_ = torch.arange(0, nB, dtype=torch.long) + output[idx_, label] = cos_theta_m[idx_, label] + output *= self.s # scale up in order to make softmax work, first introduced in normface + return output + + +################################## Cosface head ############################################################# + + +class Am_softmax(Module): + # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599 + def __init__(self,embedding_size=512,classnum=51332): + super(Am_softmax, self).__init__() + self.classnum = classnum + self.kernel = Parameter(torch.Tensor(embedding_size,classnum)) + # initial kernel + self.kernel.data.uniform_(-1, 1).renorm_(2,1,1e-5).mul_(1e5) + self.m = 0.35 # additive margin recommended by the paper + self.s = 30. # see normface https://arxiv.org/abs/1704.06369 + + def forward(self,embbedings,label): + kernel_norm = l2_norm(self.kernel,axis=0) + cos_theta = torch.mm(embbedings,kernel_norm) + cos_theta = cos_theta.clamp(-1,1) # for numerical stability + phi = cos_theta - self.m + label = label.view(-1,1) #size=(B,1) + index = cos_theta.data * 0.0 #size=(B,Classnum) + index.scatter_(1,label.data.view(-1,1),1) + index = index.byte() + output = cos_theta * 1.0 + output[index] = phi[index] #only change the correct predicted output + output *= self.s # scale up in order to make softmax work, first introduced in normface + return output + + +if __name__ == "__main__": + cfg = dict(num_layers=50, drop_ratio=0.6, mode='ir', type='Backbone') + backbone = MODELS.build(cfg) + print(backbone) diff --git a/models/networks/common.py b/models/networks/common.py new file mode 100644 index 0000000000000000000000000000000000000000..aad5ef1e0c4cbe45e5d1a249c21292a7e6b2b630 --- /dev/null +++ b/models/networks/common.py @@ -0,0 +1,75 @@ +#-*- coding: utf-8 -*- +import torch +import torch.nn as nn + + +BN_MOMENTUM = 0.1 + + +def point_wise_block(inplanes, outplanes): + return nn.Sequential( + nn.Conv2d(in_channels=inplanes, out_channels=outplanes, kernel_size=1, padding=0, stride=1, bias=False), + nn.BatchNorm2d(outplanes, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + ) + + +def conv_block(inplanes, outplanes, kernel_size, stride=1, padding=0): + return nn.Sequential( + nn.Conv2d(in_channels=inplanes, out_channels=outplanes, kernel_size=kernel_size, padding=padding, stride=stride, bias=False), + nn.BatchNorm2d(outplanes, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class InceptionBlock(nn.Module): + def __init__(self, inplanes, outplanes, stride=1, pool_size=3): + self.inplanes = inplanes + self.outplanes = outplanes + self.stride = stride + self.pool_size = pool_size + super(InceptionBlock, self).__init__() + + self.pw_block = point_wise_block(self.inplanes, self.outplanes//4) + self.mp_layer = nn.MaxPool2d(kernel_size=self.pool_size, stride=stride, padding=1) + self.conv3_block = conv_block(self.outplanes//4, self.outplanes//4, kernel_size=3, stride=1, padding=1) + self.conv5_block = conv_block(self.outplanes//4, self.outplanes//4, kernel_size=5, stride=1, padding=2) + + def forward(self, x): + x1 = self.pw_block(x) + + x2 = self.pw_block(x) + x2 = self.conv3_block(x2) + + x3 = self.pw_block(x) + x3 = self.conv5_block(x3) + + x4 = self.mp_layer(x) + x4 = self.pw_block(x4) + + x = torch.cat((x1, x2, x3, x4), dim=1) + return x + + +class SELayer(nn.Module): + def __init__(self, channel, reduction=16): + super(SELayer, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction, bias=False), + nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel, bias=False), + nn.Sigmoid() + ) + + def forward(self, x): + b, c, _, _ = x.size() + y = self.avg_pool(x).view(b, c) + y = self.fc(y).view(b, c, 1, 1) + return x * y.expand_as(x) diff --git a/models/networks/efficientNet.py b/models/networks/efficientNet.py new file mode 100644 index 0000000000000000000000000000000000000000..4badc358620a7566e440fa6f4acc6e037cf45749 --- /dev/null +++ b/models/networks/efficientNet.py @@ -0,0 +1,490 @@ +#-*- coding: utf-8 -*- +import math +import re +import collections +from functools import partial + +import torch +import torch.nn as nn +from torch.nn import functional as F +from torch.utils import model_zoo + + +# Parameters for the entire model (stem, all blocks, and head) +GlobalParams = collections.namedtuple('GlobalParams', [ + 'width_coefficient', 'depth_coefficient', 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', 'batch_norm_epsilon', + 'drop_connect_rate', 'depth_divisor', 'min_depth', 'include_top', + 'include_hm_decoder', 'head_conv', 'heads', 'num_layers', 'INIT_WEIGHTS', + 'use_c2', 'use_c3', 'use_c4', 'use_c51', 'efpn', 'se_layer', 'tfpn']) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple('BlockArgs', [ + 'num_repeat', 'kernel_size', 'stride', 'expand_ratio', + 'input_filters', 'output_filters', 'se_ratio', 'id_skip']) + +# Set GlobalParams and BlockArgs's defaults +GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) + + +# Swish activation function +if hasattr(nn, 'SiLU'): + Swish = nn.SiLU +else: + # For compatibility with old PyTorch versions + class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +def round_filters(filters, global_params): + """Calculate and round number of filters based on width multiplier. + Use width_coefficient, depth_divisor and min_depth of global_params. + Args: + filters (int): Filters number to be calculated. + global_params (namedtuple): Global params of the model. + Returns: + new_filters: New filters number after calculating. + """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + # TODO: modify the params names. + # maybe the names (width_divisor,min_width) + # are more suitable than (depth_divisor,min_depth). + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor # pay attention to this line when using min_depth + # follow the formula transferred from official TensorFlow implementation + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """Calculate module's repeat number of a block based on depth multiplier. + Use depth_coefficient of global_params. + Args: + repeats (int): num_repeat to be calculated. + global_params (namedtuple): Global params of the model. + Returns: + new repeat: New repeat number after calculating. + """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + # follow the formula transferred from official TensorFlow implementation + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """Drop connect. + Args: + input (tensor: BCWH): Input of this structure. + p (float: 0.0~1.0): Probability of drop connection. + training (bool): The running mode. + Returns: + output: Output after drop connection. + """ + assert 0 <= p <= 1, 'p must be in range of [0,1]' + + if not training: + return inputs + + batch_size = inputs.shape[0] + keep_prob = 1 - p + + # generate binary_tensor mask according to probability (p for 0, 1-p for 1) + random_tensor = keep_prob + random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) + binary_tensor = torch.floor(random_tensor) + + output = inputs / keep_prob * binary_tensor + return output + + +def get_same_padding_conv2d(image_size=None): + """Chooses static padding if you have specified an image size, and dynamic padding otherwise. + Static padding is necessary for ONNX exporting of models. + Args: + image_size (int or tuple): Size of the image. + Returns: + Conv2dDynamicSamePadding or Conv2dStaticSamePadding. + """ + if image_size is None: + return Conv2dDynamicSamePadding + else: + return partial(Conv2dStaticSamePadding, image_size=image_size) + + +class Conv2dDynamicSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow, for a dynamic image size. + The padding is operated in forward function by calculating dynamically. + """ + + # Tips for 'SAME' mode padding. + # Given the following: + # i: width or height + # s: stride + # k: kernel size + # d: dilation + # p: padding + # Output after Conv2d: + # o = floor((i+p-((k-1)*d+1))/s+1) + # If o equals i, i = floor((i+p-((k-1)*d+1))/s+1), + # => p = (i-1)*s+((k-1)*d+1)-i + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) # change the output size according to stride ! ! ! + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + +class Conv2dStaticSamePadding(nn.Conv2d): + """2D Convolutions like TensorFlow's 'SAME' mode, with the given input image size. + The padding mudule is calculated in construction function, then used in forward. + """ + + # With the same calculation as Conv2dDynamicSamePadding + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, image_size=None, **kwargs): + super().__init__(in_channels, out_channels, kernel_size, stride, **kwargs) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + # Calculate padding based on image size and save it + assert image_size is not None + ih, iw = (image_size, image_size) if isinstance(image_size, int) else image_size + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, + pad_h // 2, pad_h - pad_h // 2)) + else: + self.static_padding = nn.Identity() + + def forward(self, x): + x = self.static_padding(x) + x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + return x + + +def get_model_params(model_name, override_params): + """Get the block args and global params for a given model name. + Args: + model_name (str): Model's name. + override_params (dict): A dict to modify global_params. + Returns: + blocks_args, global_params + """ + if model_name.startswith('efficientnet'): + w, d, s, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet( + width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s) + else: + raise NotImplementedError('model name is not pre-defined: {}'.format(model_name)) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def efficientnet_params(model_name): + """Map EfficientNet model name to parameter coefficients. + Args: + model_name (str): Model name to be queried. + Returns: + params_dict[model_name]: A (width,depth,res,dropout) tuple. + """ + params_dict = { + # Coefficients: width,depth,res,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + 'efficientnet-b8': (2.2, 3.6, 672, 0.5), + 'efficientnet-l2': (4.3, 5.3, 800, 0.5), + } + return params_dict[model_name] + + +def efficientnet(width_coefficient=None, depth_coefficient=None, image_size=None, + dropout_rate=0.2, drop_connect_rate=0.2, num_classes=1000, + include_top=True, include_hm_decoder=False, head_conv=None, + heads=None, use_c2=False, use_c3=False, use_c4=False, use_c51=False, + num_layers=None, INIT_WEIGHTS=None, efpn=False, se_layer=False, tfpn=False): + """Create BlockArgs and GlobalParams for efficientnet model. + Args: + width_coefficient (float) + depth_coefficient (float) + image_size (int) + dropout_rate (float) + drop_connect_rate (float) + num_classes (int) + Meaning as the name suggests. + Returns: + blocks_args, global_params. + """ + + # Blocks args for the whole model(efficientnet-b0 by default) + # It will be modified in the construction of EfficientNet Class according to model + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + image_size=image_size, + dropout_rate=dropout_rate, + + num_classes=num_classes, + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + drop_connect_rate=drop_connect_rate, + depth_divisor=8, + min_depth=None, + include_top=include_top, + include_hm_decoder=include_hm_decoder, + head_conv=head_conv, + heads=heads, + use_c2=use_c2, + use_c3=use_c3, + use_c4=use_c4, + use_c51=use_c51, + efpn=efpn, + tfpn=tfpn, + se_layer=se_layer, + num_layers=num_layers, + INIT_WEIGHTS=INIT_WEIGHTS + ) + + return blocks_args, global_params + + +class BlockDecoder(object): + """Block Decoder for readability, + straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string): + """Get a block through a string notation of arguments. + Args: + block_string (str): A string notation of arguments. + Examples: 'r1_k3_s11_e1_i32_o16_se0.25_noskip'. + Returns: + BlockArgs: The namedtuple defined at the top of this file. + """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert (('s' in options and len(options['s']) == 1) or + (len(options['s']) == 2 and options['s'][0] == options['s'][1])) + + return BlockArgs( + num_repeat=int(options['r']), + kernel_size=int(options['k']), + stride=[int(options['s'][0])], + expand_ratio=int(options['e']), + input_filters=int(options['i']), + output_filters=int(options['o']), + se_ratio=float(options['se']) if 'se' in options else None, + id_skip=('noskip' not in block_string)) + + @staticmethod + def _encode_block_string(block): + """Encode a block to a string. + Args: + block (namedtuple): A BlockArgs type argument. + Returns: + block_string: A String form of BlockArgs. + """ + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """Decode a list of string notations to specify blocks inside the network. + Args: + string_list (list[str]): A list of strings, each string is a notation of block. + Returns: + blocks_args: A list of BlockArgs namedtuples of block args. + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """Encode a list of BlockArgs to a list of strings. + Args: + blocks_args (list[namedtuples]): A list of BlockArgs namedtuples of block args. + Returns: + block_strings: A list of strings, each string is a notation of block. + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +class SwishImplementation(torch.autograd.Function): + @staticmethod + def forward(ctx, i): + result = i * torch.sigmoid(i) + ctx.save_for_backward(i) + return result + + @staticmethod + def backward(ctx, grad_output): + i = ctx.saved_tensors[0] + sigmoid_i = torch.sigmoid(i) + return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) + + +def get_width_and_height_from_size(x): + """Obtain height and width from x. + Args: + x (int, tuple or list): Data size. + Returns: + size: A tuple or list (H,W). + """ + if isinstance(x, int): + return x, x + if isinstance(x, list) or isinstance(x, tuple): + return x + else: + raise TypeError() + + +def calculate_output_image_size(input_image_size, stride): + """Calculates the output image size when using Conv2dSamePadding with a stride. + Necessary for static padding. Thanks to mannatsingh for pointing this out. + Args: + input_image_size (int, tuple or list): Size of input image. + stride (int, tuple or list): Conv2d operation's stride. + Returns: + output_image_size: A list [H,W]. + """ + if input_image_size is None: + return None + image_height, image_width = get_width_and_height_from_size(input_image_size) + stride = stride if isinstance(stride, int) else stride[0] + image_height = int(math.ceil(image_height / stride)) + image_width = int(math.ceil(image_width / stride)) + return [image_height, image_width] + + +class MemoryEfficientSwish(nn.Module): + def forward(self, x): + return SwishImplementation.apply(x) + + +url_map_advprop = { + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b0-b64d5a18.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b1-0f3ce85a.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b2-6e9d97e5.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b3-cdd7c0f4.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b4-44fb3a87.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b5-86493f6b.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b6-ac80338e.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b7-4652b6dd.pth', + 'efficientnet-b8': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/adv-efficientnet-b8-22a8fe65.pth', +} + + +url_map = { + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', +} + + +def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True, advprop=False, verbose=True): + """Loads pretrained weights from weights path or download using url. + Args: + model (Module): The whole model of efficientnet. + model_name (str): Model name of efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model. + advprop (bool): Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + """ + if isinstance(weights_path, str): + state_dict = torch.load(weights_path) + else: + # AutoAugment or Advprop (different preprocessing) + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[model_name]) + + if load_fc: + ret = model.load_state_dict(state_dict, strict=False) + assert not ret.missing_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys) + else: + state_dict.pop('_fc.weight') + state_dict.pop('_fc.bias') + ret = model.load_state_dict(state_dict, strict=False) + + # if len(ret.missing_keys): + # assert set(ret.missing_keys) == set( + # ['_fc.weight', '_fc.bias']), 'Missing keys when loading pretrained weights: {}'.format(ret.missing_keys) + assert not ret.unexpected_keys, 'Missing keys when loading pretrained weights: {}'.format(ret.unexpected_keys) + + if verbose: + print('Loaded pretrained weights for {}'.format(model_name)) diff --git a/models/networks/mrsa_resnet.py b/models/networks/mrsa_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a4af5ef36b65a488385b466a3dce6f20f276377a --- /dev/null +++ b/models/networks/mrsa_resnet.py @@ -0,0 +1,464 @@ +#-*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import math + +import torch +import torch.nn as nn +from torch.nn.modules.activation import ReLU +from torch.nn.modules.batchnorm import BatchNorm2d +from torch.nn.modules.pooling import MaxPool2d +import torch.utils.model_zoo as model_zoo + +from ..builder import MODELS, build_model +from .common import ( + BN_MOMENTUM, + conv_block, + point_wise_block, + InceptionBlock, +) + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + @staticmethod + def __repr__(): + return 'BasicBlock' + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion, + momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + @staticmethod + def __repr__(): + return 'Bottleneck' + + +@MODELS.register_module() +class PoseResNet(nn.Module): + def __init__(self, + block, + layers, + heads, + head_conv, + dropout_prob, + fpn=False, + cls_based_hm=True, + use_c2=False, + **kwargs): + self.inplanes = 64 + self.deconv_with_bias = False + self.heads = heads + self.fpn = fpn + self.cls_based_hm= cls_based_hm + self.use_c2 = use_c2 + + #Convert Cls name into Cls Object + if isinstance(block, str): + for bl in [BasicBlock, Bottleneck]: + if block == bl.__repr__(): + block = bl + + for k, v in kwargs.items(): + if v is None: + raise ValueError(f'The {k} argument receive a None value, Please check!') + self.__setattr__(k, v) + + super(PoseResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + # Custom dropout layer + self.dropout_layer = nn.Dropout(dropout_prob) + + if self.fpn: + # Adding sidmoid layer + self.sigmoid_layer = nn.Sigmoid() + + # Adding pointwise block + self.pw_block_1 = self._point_wise_block(2048, 1024) + + # used for deconv layers + deconv_filters = [256, 128, 256] if self.fpn else [256, 256, 256] + self.deconv_layers = self._make_deconv_layer( + 3, + deconv_filters, + [4, 4, 4], + ) + + # Adding inception block + if self.fpn: + for idx, deconv_layer in enumerate(self.deconv_layers): + self.__setattr__(f'deconv_layer_{idx}', nn.Sequential(deconv_layer)) + self.pw_block_2 = self._point_wise_block(512, 512) + if self.use_c2: + self.pw_block_3 = self._point_wise_block(512, 256) + self.pw_block_c3 = self._point_wise_block(1024, 256) + self.pw_block_c2 = self._point_wise_block(512, 128) + self.inception_block = InceptionBlock(256, 256, stride=1, pool_size=3) + + for head in sorted(self.heads): + num_output = self.heads[head] + if head_conv > 0: + if head != 'cls': + fc = nn.Sequential( + nn.Conv2d(256, head_conv, + kernel_size=3, padding=1, bias=True), + nn.BatchNorm2d(head_conv), + nn.ReLU(inplace=True), + nn.Conv2d(head_conv, num_output, + kernel_size=1, stride=1, padding=0) + ) + else: + if self.cls_based_hm: + fc = nn.Sequential( + nn.AdaptiveMaxPool2d(head_conv//4), + nn.Flatten(), + nn.Linear(num_output*((head_conv//4)**2), head_conv, bias=True), + nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Linear(head_conv, 1, bias=True), + nn.Sigmoid() + ) + else: + fc = nn.Sequential( + nn.Conv2d(256, head_conv, kernel_size=3, + padding=1, bias=True), + nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + # nn.Conv2d(head_conv, num_output, kernel_size=1, + # stride=1, padding=0, bias=True), + # nn.BatchNorm2d(num_output), + # nn.ReLU(inplace=True), + # nn.AdaptiveMaxPool2d(head_conv//4), + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + # nn.Linear((head_conv//4)**2, head_conv, bias=True), + # nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM), + # nn.ReLU(inplace=True), + nn.Linear(head_conv, 1, bias=True), + # nn.Sigmoid() + ) + else: + fc = nn.Conv2d( + in_channels=256, + out_channels=num_output, + kernel_size=1, + stride=1, + padding=0 + ) + self.__setattr__(head, fc) + + def _point_wise_block(self, inplanes, outplanes): + self.inplanes = outplanes + module = point_wise_block(inplanes, outplanes) + return module + + def _conv_block(self, inplanes, outplanes, kernel_size, stride=1): + self.inplanes = outplanes + module = conv_block(inplanes, outplanes, kernel_size=kernel_size, stride=stride) + return module + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _get_deconv_cfg(self, deconv_kernel, index): + if deconv_kernel == 4: + padding = 1 + output_padding = 0 + elif deconv_kernel == 3: + padding = 1 + output_padding = 1 + elif deconv_kernel == 2: + padding = 0 + output_padding = 0 + + return deconv_kernel, padding, output_padding + + def _make_deconv_layer(self, num_layers, num_filters, num_kernels): + assert num_layers == len(num_filters), \ + 'ERROR: num_deconv_layers is different len(num_deconv_filters)' + assert num_layers == len(num_kernels), \ + 'ERROR: num_deconv_layers is different len(num_deconv_filters)' + + layers = [] + for i in range(num_layers): + kernel, padding, output_padding = \ + self._get_deconv_cfg(num_kernels[i], i) + + planes = num_filters[i] + layers.append(nn.Sequential( + nn.ConvTranspose2d( + in_channels=self.inplanes, + out_channels=planes, + kernel_size=kernel, + stride=2, + padding=padding, + output_padding=output_padding, + bias=self.deconv_with_bias), + nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) + ) + if (not self.fpn): + layers.append(nn.ReLU(inplace=True)) + + self.inplanes = planes if not self.fpn else planes * 2 + + if self.fpn: + return layers + else: + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x1 = self.layer1(x) #256 x 64 x 64 + x2 = self.layer2(x1) #512 x 32 x 32 + x3 = self.layer3(x2) #1024 x 16 x 16 + x4 = self.layer4(x3) #2048 x 8 x 8 + + # Custom dropout layer + x = self.dropout_layer(x4) #B x 8 x 8 x 2048 + x3 = self.dropout_layer(x3) + x2 = self.dropout_layer(x2) + x1 = self.dropout_layer(x1) + + # Custom FPN + if self.fpn: + assert isinstance(self.deconv_layers, list), "To custom FPN, decompose deconv layers as a list!" + x = self.pw_block_1(x) # B x 1024 x 8 x 8 + x = self.deconv_layer_0(x) # B x 256 x 16 x 16 + # x = self.relu(x) # B x 256 x 16 x 16 + + x_weighted = self.sigmoid_layer(x) # B x 256 x 16 x 16 + x_inverse = torch.sub(1, x_weighted, alpha=1) # B x 256 x 16 x 16 + x3 = self.pw_block_c3(x3) #B x 256 x 16 x 16 + x3_ = torch.multiply(x3, x_inverse) #B x 256 x 16 x 16 + x = torch.cat((x, x3_), dim=1) #B x 512 x 16 x 16 + + x = self.pw_block_2(x) #B x 512 x 16 x 16 + x = self.deconv_layer_1(x) #B x 128 x 32 x 32 + # x = self.relu(x) #B x 128 x 32 x 32 + + x_weighted = self.sigmoid_layer(x) #B x 128 x 32 x 32 + x_inverse = torch.sub(1, x_weighted, alpha=1) #B x 128 x 32 x 32 + x2 = self.pw_block_c2(x2) + x2_ = torch.multiply(x2, x_inverse) #B x 128 x 32 x 32 + x = torch.cat((x, x2_), dim=1) #B x 256 x 32 x 32 + + x = self.inception_block(x) #B x 256 x 64 x 64 + x = self.deconv_layer_2(x) #B x 256 x 64 x 64 + + if self.use_c2: + x_weighted = self.sigmoid_layer(x) + x_inverse = torch.sub(1, x_weighted, alpha=1) + x1_ = torch.multiply(x1, x_inverse) + x = torch.cat((x, x1_), dim=1) + x = self.pw_block_3(x) + else: + x = self.relu(x) #B x 256 x 64 x 64 + else: + assert isinstance(self.deconv_layers, nn.Module), "Deconv Layer must be nn Module to compute!" + x = self.deconv_layers(x) + + ret = {} + x1_hm = None + for head in self.heads: + if self.cls_based_hm and head == 'cls' and x1_hm is not None: + x = x1_hm + elif head == 'hm': + x1_hm = x + + ret[head] = self.__getattr__(head)(x) + + return [ret] + + def init_weights(self, pretrained=True, **kwargs): + num_layers = kwargs.get('num_layers') + if pretrained: + if self.fpn: + for bl in [self.pw_block_1, self.pw_block_2]: + for _, l in bl.named_parameters(): + if isinstance(l, nn.Conv2d): + nn.init.normal_(l.weight, std=0.001) + nn.init.constant_(l.bias, 0) + + for _, l in self.inception_block.named_parameters(): + if isinstance(l, nn.Conv2d): + nn.init.normal_(l.weight, std=0.001) + nn.init.constant_(l.bias, 0) + + # print('=> init resnet deconv weights from normal distribution') + if isinstance(self.deconv_layers, nn.Module): + for _, m in self.deconv_layers.named_modules(): + if isinstance(m, nn.ConvTranspose2d): + # print('=> init {}.weight as normal(0, 0.001)'.format(name)) + # print('=> init {}.bias as 0'.format(name)) + nn.init.normal_(m.weight, std=0.001) + if self.deconv_with_bias: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + # print('=> init {}.weight as 1'.format(name)) + # print('=> init {}.bias as 0'.format(name)) + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + else: + for layer in [self.deconv_layer_0, self.deconv_layer_1, self.deconv_layer_2]: + for _, m in layer.named_modules(): + if isinstance(m, nn.ConvTranspose2d): + # print('=> init {}.weight as normal(0, 0.001)'.format(name)) + # print('=> init {}.bias as 0'.format(name)) + nn.init.normal_(m.weight, std=0.001) + if self.deconv_with_bias: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + # print('=> init {}.weight as 1'.format(name)) + # print('=> init {}.bias as 0'.format(name)) + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # print('=> init final conv weights from normal distribution') + for head in self.heads: + final_layer = self.__getattr__(head) + for i, m in enumerate(final_layer.modules()): + if isinstance(m, nn.Conv2d): + # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + # print('=> init {}.weight as normal(0, 0.001)'.format(name)) + # print('=> init {}.bias as 0'.format(name)) + if m.weight.shape[0] == self.heads[head]: + if 'hm' in head: + nn.init.constant_(m.bias, -2.19) + else: + nn.init.normal_(m.weight, std=0.001) + nn.init.constant_(m.bias, 0) + # if isinstance(m, nn.Linear): + # if m.weight.shape[0] == self.heads[head]: + # prior = 1/71 + # nn.init.constant_(m.bias, -math.log((1-prior)/prior)) + # else: + # nn.init.normal_(m.weight, std=0.001) + # nn.init.constant_(m.bias, 0) + + #pretrained_state_dict = torch.load(pretrained) + url = model_urls['resnet{}'.format(num_layers)] + pretrained_state_dict = model_zoo.load_url(url) + print('=> loading pretrained model {}'.format(url)) + self.load_state_dict(pretrained_state_dict, strict=False) + else: + print('=> imagenet pretrained model dose not exist') + print('=> please download it first') + raise ValueError('imagenet pretrained model does not exist') + + +resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), + 34: (BasicBlock, [3, 4, 6, 3]), + 50: (Bottleneck, [3, 4, 6, 3]), + 101: (Bottleneck, [3, 4, 23, 3]), + 152: (Bottleneck, [3, 8, 36, 3])} diff --git a/models/networks/pose_efficientNet.py b/models/networks/pose_efficientNet.py new file mode 100644 index 0000000000000000000000000000000000000000..0d6c7c578e51d2d77da4128cf62c36f56b4f062a --- /dev/null +++ b/models/networks/pose_efficientNet.py @@ -0,0 +1,788 @@ +#-*- coding: utf-8 -*- +import math +import sys +import os +if not os.getcwd() in sys.path: + sys.path.append(os.getcwd()) + +import torch +from torch import nn +from torch.nn import functional as F +from torch.utils import model_zoo + +from ..builder import MODELS, build_model +from .efficientNet import ( + round_filters, + round_repeats, + drop_connect, + get_same_padding_conv2d, + get_model_params, + efficientnet_params, + load_pretrained_weights, + Swish, + MemoryEfficientSwish, + calculate_output_image_size, + url_map_advprop, + url_map +) +from .common import ( + InceptionBlock, + conv_block, + BN_MOMENTUM, + SELayer +) + + +VALID_MODELS = ( + 'efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2', 'efficientnet-b3', + 'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7', + 'efficientnet-b8', + + # Support the construction of 'efficientnet-l2' without pretrained weights + 'efficientnet-l2' +) + + +class MBConvBlock(nn.Module): + """Mobile Inverted Residual Bottleneck Block. + Args: + block_args (namedtuple): BlockArgs, defined in utils.py. + global_params (namedtuple): GlobalParam, defined in utils.py. + image_size (tuple or list): [image_height, image_width]. + References: + [1] https://arxiv.org/abs/1704.04861 (MobileNet v1) + [2] https://arxiv.org/abs/1801.04381 (MobileNet v2) + [3] https://arxiv.org/abs/1905.02244 (MobileNet v3) + """ + + def __init__(self, block_args, global_params, image_size=None): + super().__init__() + self._block_args = block_args + self._bn_mom = 1 - global_params.batch_norm_momentum # pytorch's difference from tensorflow + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip # whether to use skip connection and drop connect + + # Expansion phase (Inverted Bottleneck) + inp = self._block_args.input_filters # number of input channels + oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels + if self._block_args.expand_ratio != 1: + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + # image_size = calculate_output_image_size(image_size, 1) <-- this wouldn't modify image_size + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._depthwise_conv = Conv2d( + in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise + kernel_size=k, stride=s, bias=False) + self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + image_size = calculate_output_image_size(image_size, s) + + # Squeeze and Excitation layer, if desired + if self.has_se: + Conv2d = get_same_padding_conv2d(image_size=(1, 1)) + num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) + self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) + self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) + + # Pointwise convolution phase + final_oup = self._block_args.output_filters + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) + self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) + self._swish = MemoryEfficientSwish() + + def forward(self, inputs, drop_connect_rate=None): + """MBConvBlock's forward function. + Args: + inputs (tensor): Input tensor. + drop_connect_rate (bool): Drop connect rate (float, between 0 and 1). + Returns: + Output of this block after processing. + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = self._expand_conv(inputs) + x = self._bn0(x) + x = self._swish(x) + + x = self._depthwise_conv(x) + x = self._bn1(x) + x = self._swish(x) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._se_reduce(x_squeezed) + x_squeezed = self._swish(x_squeezed) + x_squeezed = self._se_expand(x_squeezed) + x = torch.sigmoid(x_squeezed) * x + + # Pointwise Convolution + x = self._project_conv(x) + x = self._bn2(x) + + # Skip connection and drop connect + input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters + if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: + # The combination of skip connection and drop connect brings about stochastic depth. + if drop_connect_rate: + x = drop_connect(x, p=drop_connect_rate, training=self.training) + x = x + inputs # skip connection + return x + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + + +@MODELS.register_module() +class EfficientNet(nn.Module): + """EfficientNet model. + Most easily loaded with the .from_name or .from_pretrained methods. + Args: + blocks_args (list[namedtuple]): A list of BlockArgs to construct blocks. + global_params (namedtuple): A set of GlobalParams shared between blocks. + References: + [1] https://arxiv.org/abs/1905.11946 (EfficientNet) + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> model.eval() + >>> outputs = model(inputs) + """ + + def __init__(self, blocks_args=None, global_params=None): + super().__init__() + assert isinstance(blocks_args, list), 'blocks_args should be a list' + assert len(blocks_args) > 0, 'block args must be greater than 0' + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Get stem static or dynamic convolution depending on image size + image_size = global_params.image_size + Conv2d = get_same_padding_conv2d(image_size=image_size) + + # Stem + in_channels = 3 # rgb + out_channels = round_filters(32, self._global_params) # number of output channels + self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + image_size = calculate_output_image_size(image_size, 2) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, self._global_params), + output_filters=round_filters(block_args.output_filters, self._global_params), + num_repeat=round_repeats(block_args.num_repeat, self._global_params) + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) + image_size = calculate_output_image_size(image_size, block_args.stride) + if block_args.num_repeat > 1: # modify block_args to keep same output size + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + self._blocks.append(MBConvBlock(block_args, self._global_params, image_size=image_size)) + # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 + + # Head + in_channels = block_args.output_filters # output of final block + out_channels = round_filters(1280, self._global_params) + Conv2d = get_same_padding_conv2d(image_size=image_size) + self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False) + self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + + # Final linear layer + self._avg_pooling = nn.AdaptiveAvgPool2d(1) + if self._global_params.include_top: + self._dropout = nn.Dropout(self._global_params.dropout_rate) + self._fc = nn.Linear(out_channels, self._global_params.num_classes) + + # Heatmap Decoder Construction + if self._global_params.include_hm_decoder: + print("Constructing the heatmap Decoder!") + self.efpn = self._global_params.efpn + self.tfpn = self._global_params.tfpn + + assert not (self.efpn and self.tfpn), "Only one of E-FPN or FPN is intergrated!" + + self.se_layer = self._global_params.se_layer + # self.hm_decoder_filters = [1792, 448, 160, 56] if self.fpn else [1792, 256, 256, 128] + self.hm_decoder_filters = [1792, 448, 160, 56] + num_kernels = [4, 4, 4, 4] if (self.efpn or self.tfpn) else [4, 4, 4] + self._dropout = nn.Dropout(self._global_params.dropout_rate) + self._sigmoid = nn.Sigmoid() + self._relu = nn.ReLU(inplace=True) + self._relu1 = nn.ReLU(inplace=False) + self.deconv_with_bias = False + if self._global_params.use_c3: + self.inception_block = InceptionBlock(112, 112, stride=1, pool_size=3) + else: + self.inception_block = InceptionBlock(56, 56, stride=1, pool_size=3) + self.heads = self._global_params.heads + n_deconv = len(self.hm_decoder_filters) + self.fpn_layers = [self._global_params.use_c51, self._global_params.use_c4, self._global_params.use_c3] + + if self.efpn or self.tfpn: + for idx in range(n_deconv): + in_decod_filters = self.hm_decoder_filters[idx] + + if idx == 0: + out_decod_filters = self.hm_decoder_filters[idx+1] + deconv = nn.Sequential( + conv_block(in_decod_filters, out_decod_filters, (3,3), stride=1, padding=1), + ) + else: + in_decod_filters = in_decod_filters*2 if self.fpn_layers[idx-1] else in_decod_filters + kernel, padding, output_padding = self._get_deconv_cfg(num_kernels[idx]) + + if idx+1 < n_deconv: + out_decod_filters = self.hm_decoder_filters[idx+1] + deconv = nn.Sequential( + conv_block(in_decod_filters, out_decod_filters, (3,3), stride=1, padding=1), + nn.ConvTranspose2d( + in_channels=out_decod_filters, + out_channels=out_decod_filters, + kernel_size=kernel, + stride=2, + padding=padding, + output_padding=output_padding, + bias=self.deconv_with_bias), + nn.BatchNorm2d(out_decod_filters, momentum=BN_MOMENTUM), + ) + else: + out_decod_filters = in_decod_filters + deconv = nn.Sequential( + self.inception_block, + nn.ConvTranspose2d( + in_channels=out_decod_filters, + out_channels=out_decod_filters, + kernel_size=kernel, + stride=2, + padding=padding, + output_padding=output_padding, + bias=self.deconv_with_bias), + nn.BatchNorm2d(out_decod_filters, momentum=BN_MOMENTUM), + ) + + # In case of using C2, this conv to apply to C2 features to get the same filters of the last deconv + if self._global_params.use_c2: + self.conv_c2 = conv_block(32, out_decod_filters, (3,3), stride=1, padding=1) + if self.se_layer: + se = SELayer(channel=out_decod_filters*2) + self.__setattr__(f'se_layer_{idx+1}', se) + + self.__setattr__(f'deconv_{idx+1}', deconv) + else: + self.deconv_layers = self._make_deconv_layer( + len(num_kernels), + self.hm_decoder_filters, + num_kernels, + ) + + for head, num_output in self.heads.items(): + head_conv = int(self._global_params.head_conv) + num_output = int(num_output) + if self._global_params.use_c2: + assert self._global_params.efpn or self._global_params.tfpn, "FPN Design must be set active!" + assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2" + in_head_filters = self.hm_decoder_filters[-1]*4 + elif self._global_params.use_c3: + in_head_filters = self.hm_decoder_filters[-1]*2 + else: + in_head_filters = self.hm_decoder_filters[-1] + + if head_conv > 0: + if head != 'cls': + fc = nn.Sequential( + nn.Conv2d(in_head_filters, head_conv, + kernel_size=3, padding=1, bias=True), + nn.BatchNorm2d(head_conv), + nn.ReLU(inplace=True), + nn.Conv2d(head_conv, num_output, + kernel_size=1, stride=1, padding=0) + ) + else: + fc = nn.Sequential( + nn.Conv2d(in_head_filters, head_conv, kernel_size=3, + padding=1, bias=True), + nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + # nn.Conv2d(head_conv, num_output, kernel_size=1, + # stride=1, padding=0, bias=True), + # nn.BatchNorm2d(num_output), + # nn.ReLU(inplace=True), + # nn.AdaptiveMaxPool2d(head_conv//4), + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + # nn.Linear((head_conv//4)**2, head_conv, bias=True), + # nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM), + # nn.ReLU(inplace=True), + nn.Linear(head_conv, num_output, bias=True), + # nn.Sigmoid(), + # nn.Softmax(dim=-1) + ) + else: + fc = nn.Conv2d( + in_channels=in_head_filters, + out_channels=num_output, + kernel_size=1, + stride=1, + padding=0 + ) + self.__setattr__(head, fc) + + # set activation to memory efficient swish by default + self._swish = MemoryEfficientSwish() + + def _get_deconv_cfg(self, deconv_kernel): + if deconv_kernel == 4: + padding = 1 + output_padding = 0 + elif deconv_kernel == 3: + padding = 1 + output_padding = 1 + elif deconv_kernel == 2: + padding = 0 + output_padding = 0 + + return deconv_kernel, padding, output_padding + + def _make_deconv_layer(self, num_layers, num_filters, num_kernels): + assert num_layers == (len(num_filters) - 1), \ + 'ERROR: num_deconv_layers is different len(num_deconv_filters)' + assert num_layers == len(num_kernels), \ + 'ERROR: num_deconv_layers is different len(num_deconv_filters)' + + layers = [] + for i in range(num_layers): + kernel, padding, output_padding = \ + self._get_deconv_cfg(num_kernels[i]) + + in_planes = num_filters[i] + out_planes = num_filters[i+1] + + layers.append(nn.Sequential( + nn.ConvTranspose2d( + in_channels=in_planes, + out_channels=out_planes, + kernel_size=kernel, + stride=2, + padding=padding, + output_padding=output_padding, + bias=self.deconv_with_bias), + nn.BatchNorm2d(out_planes, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True)) + ) + + return nn.Sequential(*layers) + + def set_swish(self, memory_efficient=True): + """Sets swish function as memory efficient (for training) or standard (for export). + Args: + memory_efficient (bool): Whether to use memory-efficient version of swish. + """ + self._swish = MemoryEfficientSwish() if memory_efficient else Swish() + for block in self._blocks: + block.set_swish(memory_efficient) + + def extract_endpoints(self, inputs): + """Use convolution layer to extract features + from reduction levels i in [1, 2, 3, 4, 5]. + Args: + inputs (tensor): Input tensor. + Returns: + Dictionary of last intermediate features + with reduction levels i in [1, 2, 3, 4, 5]. + Example: + >>> import torch + >>> from efficientnet.model import EfficientNet + >>> inputs = torch.rand(1, 3, 224, 224) + >>> model = EfficientNet.from_pretrained('efficientnet-b0') + >>> endpoints = model.extract_endpoints(inputs) + >>> print(endpoints['reduction_1'].shape) # torch.Size([1, 16, 112, 112]) + >>> print(endpoints['reduction_2'].shape) # torch.Size([1, 24, 56, 56]) + >>> print(endpoints['reduction_3'].shape) # torch.Size([1, 40, 28, 28]) + >>> print(endpoints['reduction_4'].shape) # torch.Size([1, 112, 14, 14]) + >>> print(endpoints['reduction_5'].shape) # torch.Size([1, 320, 7, 7]) + >>> print(endpoints['reduction_6'].shape) # torch.Size([1, 1280, 7, 7]) + """ + endpoints = dict() + + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + prev_x = x + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + # print('Prev', prev_x.size()) + # print('X', x.size()) + if prev_x.size(2) > x.size(2): + endpoints['reduction_{}'.format(len(endpoints) + 1)] = prev_x + elif idx == len(self._blocks) - 1: + endpoints['reduction_{}'.format(len(endpoints) + 1)] = x + prev_x = x + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + endpoints['reduction_{}'.format(len(endpoints) + 1)] = x + + return endpoints + + def extract_features(self, inputs): + """use convolution layer to extract feature . + Args: + inputs (tensor): Input tensor. + Returns: + Output of the final convolution + layer in the efficientnet model. + """ + # Stem + x = self._swish(self._bn0(self._conv_stem(inputs))) + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) # scale drop connect_rate + x = block(x, drop_connect_rate=drop_connect_rate) + + # Head + x = self._swish(self._bn1(self._conv_head(x))) + + return x + + def forward(self, inputs): + """EfficientNet's forward function. + Calls extract_features to extract features, applies final linear layer, and returns logits. + Args: + inputs (tensor): Input tensor. + Returns: + Output of this model after processing. + """ + # Convolution layers + # x = self.extract_features(inputs) + endpoints = self.extract_endpoints(inputs) + x1 = endpoints['reduction_6'] + x2 = endpoints['reduction_5'] + x3 = endpoints['reduction_4'] + x4 = endpoints['reduction_3'] + x5 = endpoints['reduction_2'] + x = x1 + + if self._global_params.include_top: + # Pooling and final linear layer + x = self._avg_pooling(x) + + x = x.flatten(start_dim=1) + x = self._dropout(x) + x = self._fc(x) + return x + + if self._global_params.include_hm_decoder: + x1 = self._dropout(x1) + x2 = self._dropout(x2) + x3 = self._dropout(x3) + x4 = self._dropout(x4) + + if self.efpn: + assert self._global_params.use_c51, "C51 must be utilized for FPN intergration" + + x = self.__getattr__('deconv_1')(x1) + + if self._global_params.use_c51: + x_weighted = self._sigmoid(x) + x_inv = torch.sub(1, x_weighted, alpha=1) + x2_ = torch.multiply(x_inv, x2) + x = torch.cat([x, x2_], dim=1) + + if self.se_layer: + x = self.__getattr__('se_layer_1')(x) + else: + x = self._relu(x) + + x = self.__getattr__('deconv_2')(x) + + if self._global_params.use_c4: + x_weighted = self._sigmoid(x) + x_inv = torch.sub(1, x_weighted, alpha=1) + x3_ = torch.multiply(x_inv, x3) + x = torch.cat([x, x3_], dim=1) + + if self.se_layer: + x = self.__getattr__('se_layer_2')(x) + else: + x = self._relu(x) + + x = self.__getattr__('deconv_3')(x) + + if self._global_params.use_c3: + assert self._global_params.use_c4, "C4 must be utilized for FPN intergration of C3" + + x_weighted = self._sigmoid(x) + x_inv = torch.sub(1, x_weighted, alpha=1) + x4_ = torch.multiply(x_inv, x4) + x = torch.cat([x, x4_], dim=1) + + if self.se_layer: + x = self.__getattr__('se_layer_3')(x) + else: + x = self._relu(x) + + x = self.__getattr__('deconv_4')(x) + + if not self._global_params.use_c2: + x = self._relu(x) + else: + assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2" + + x5 = self._dropout(x5) + x5_ = self.conv_c2(x5) + x_weighted = self._sigmoid(x) + x_inv = torch.sub(1, x_weighted, alpha=1) + x5_ = torch.multiply(x_inv, x5_) + x = torch.cat([x, x5_], dim=1) + + if self.se_layer: + x = self.__getattr__('se_layer_4')(x) + elif self.tfpn: + assert self._global_params.use_c51, "C51 must be utilized for FPN intergration" + x = self.__getattr__('deconv_1')(x1) + x = self._relu1(x) + x = torch.cat([x, x2], dim=1) + + x = self.__getattr__('deconv_2')(x) + if not self._global_params.use_c4: + x = self._relu1(x) + else: + x = torch.cat([x, x3], dim=1) + + x = self.__getattr__('deconv_3')(x) + if not self._global_params.use_c3: + x = self._relu1(x) + else: + assert self._global_params.use_c4, "C4 must be utilized for FPN intergration of C3" + x = torch.cat([x, x4], dim=1) + + x = self.__getattr__('deconv_4')(x) + if not self._global_params.use_c2: + x = self._relu(x) + else: + assert self._global_params.use_c3, "C3 must be utilized for FPN intergration of C2" + x5 = self._dropout(x5) + x5 = self.conv_c2(x5) + x = self._relu1(x) + x = torch.cat([x, x5], dim=1) + else: + x = self.deconv_layers(x1) + + ret = {} + for head in self.heads: + ret[head] = self.__getattr__(head)(x) + + return [ret] + + @classmethod + def from_name(cls, model_name, in_channels=3, **override_params): + """Create an efficientnet model according to name. + Args: + model_name (str): Name for efficientnet. + in_channels (int): Input data's channel number. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'num_classes', 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + Returns: + An efficientnet model. + """ + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params(model_name, override_params) + model = cls(blocks_args, global_params) + model._change_in_channels(in_channels) + return model + + @classmethod + def from_pretrained(cls, model_name, weights_path=None, advprop=False, + in_channels=3, num_classes=1000, **override_params): + """Create an efficientnet model according to name. + Args: + model_name (str): Name for efficientnet. + weights_path (None or str): + str: path to pretrained weights file on the local disk. + None: use pretrained weights downloaded from the Internet. + advprop (bool): + Whether to load pretrained weights + trained with advprop (valid when weights_path is None). + in_channels (int): Input data's channel number. + num_classes (int): + Number of categories for classification. + It controls the output size for final linear layer. + override_params (other key word params): + Params to override model's global_params. + Optional key: + 'width_coefficient', 'depth_coefficient', + 'image_size', 'dropout_rate', + 'batch_norm_momentum', + 'batch_norm_epsilon', 'drop_connect_rate', + 'depth_divisor', 'min_depth' + Returns: + A pretrained efficientnet model. + """ + model = cls.from_name(model_name, num_classes=num_classes, **override_params) + load_pretrained_weights(model, model_name, weights_path=weights_path, + load_fc=((num_classes == 1000) and (model._global_params.include_top)), advprop=advprop) + model._change_in_channels(in_channels) + return model + + @classmethod + def get_image_size(cls, model_name): + """Get the input image size for a given efficientnet model. + Args: + model_name (str): Name for efficientnet. + Returns: + Input image size (resolution). + """ + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name): + """Validates model name. + Args: + model_name (str): Name for efficientnet. + Returns: + bool: Is a valid name or not. + """ + if model_name not in VALID_MODELS: + raise ValueError('model_name should be one of: ' + ', '.join(VALID_MODELS)) + + def _change_in_channels(self, in_channels): + """Adjust model's first convolution layer to in_channels, if in_channels not equals 3. + Args: + in_channels (int): Input data's channel number. + """ + if in_channels != 3: + Conv2d = get_same_padding_conv2d(image_size=self._global_params.image_size) + out_channels = round_filters(32, self._global_params) + self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) + + +@MODELS.register_module() +class PoseEfficientNet(EfficientNet): + def __init__(self, model_name, in_channels=3, **override_params): + self.model_name = model_name + self.in_channels = in_channels + + # Initialize Parent Class + super()._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params(model_name, override_params) + super().__init__(blocks_args, global_params) + + @classmethod + def from_name(cls, model_name, in_channels, **override_params): + return NotImplemented + + @classmethod + def from_pretrained(cls, model_name, weights_path, advprop, in_channels, num_classes, **override_params): + return NotImplemented + + def _change_in_channels(self, in_channels): + return NotImplemented + + def init_weights(self, pretrained=False, advprop=False, verbose=True): + if pretrained: + url_map_ = url_map_advprop if advprop else url_map + state_dict = model_zoo.load_url(url_map_[self.model_name]) + self.load_state_dict(state_dict, strict=False) + + # Initialize weights for Deconvolution Layer + if self._global_params.include_hm_decoder: + if self.efpn or self.tfpn: + deconv_layers = [self.deconv_1, self.deconv_2, self.deconv_3, self.deconv_4] + else: + deconv_layers = self.deconv_layers + + for layer in deconv_layers: + for _, m in layer.named_modules(): + if isinstance(m, nn.ConvTranspose2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if self.deconv_with_bias: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Init head parameters + for head in self.heads: + final_layer = self.__getattr__(head) + for i, m in enumerate(final_layer.modules()): + if isinstance(m, nn.Conv2d): + if m.weight.shape[0] == self.heads[head]: + if 'hm' in head: + nn.init.constant_(m.bias, -2.19) + else: + # nn.init.normal_(m.weight, std=0.001) + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + nn.init.constant_(m.bias, 0) + + self._change_in_channels(in_channels=self.in_channels) + if verbose: + print('Loaded pretrained weights for {}'.format(self.model_name)) + + +if __name__ == '__main__': + cfg = dict(type='PoseEfficientNet', + model_name='efficientnet-b4', + include_top=False, + include_hm_decoder=True, + head_conv=64, + heads={'hm':1, 'cls':1, 'cstency':256}, + use_c2=True) + model = build_model(cfg, MODELS) + model.init_weights(pretrained=True) + model.eval() + inputs = torch.rand((1, 3, 384, 384)) + + for i, (n, p) in enumerate(model.named_parameters()): + print(i, n) + + # To show the whole pose EFN model outputs shape + x = model(inputs)[0] + for head in x.keys(): + print(f'{head} shape is --- {x[head].shape}') + + # To show the endpoints features shape + # endpoints = model.extract_endpoints(inputs) + # for k in endpoints.keys(): + # print(endpoints[k].shape) diff --git a/models/networks/pose_hrnet.py b/models/networks/pose_hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1a687e2e3b54a4e46c75dc11bec47a26a0461250 --- /dev/null +++ b/models/networks/pose_hrnet.py @@ -0,0 +1,515 @@ +#-*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import logging +import re + +import torch +import torch.nn as nn +from ..builder import MODELS + +from .common import conv3x3, BN_MOMENTUM + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion, + momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class HighResolutionModule(nn.Module): + def __init__(self, num_branches, blocks, num_blocks, num_inchannels, + num_channels, fuse_method, multi_scale_output=True): + super(HighResolutionModule, self).__init__() + self._check_branches( + num_branches, blocks, num_blocks, num_inchannels, num_channels) + + self.num_inchannels = num_inchannels + self.fuse_method = fuse_method + self.num_branches = num_branches + + self.multi_scale_output = multi_scale_output + + self.branches = self._make_branches( + num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(True) + + def _check_branches(self, num_branches, blocks, num_blocks, + num_inchannels, num_channels): + if num_branches != len(num_blocks): + error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( + num_branches, len(num_blocks)) + # logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( + num_branches, len(num_channels)) + # logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_inchannels): + error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( + num_branches, len(num_inchannels)) + # logger.error(error_msg) + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, + stride=1): + downsample = None + if stride != 1 or \ + self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.num_inchannels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, stride=stride, bias=False + ), + nn.BatchNorm2d( + num_channels[branch_index] * block.expansion, + momentum=BN_MOMENTUM + ), + ) + + layers = [] + layers.append( + block( + self.num_inchannels[branch_index], + num_channels[branch_index], + stride, + downsample + ) + ) + self.num_inchannels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.num_inchannels[branch_index], + num_channels[branch_index] + ) + ) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels) + ) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + if self.num_branches == 1: + return None + + num_branches = self.num_branches + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + nn.Conv2d( + num_inchannels[j], + num_inchannels[i], + 1, 1, 0, bias=False + ), + nn.BatchNorm2d(num_inchannels[i]), + nn.Upsample(scale_factor=2**(j-i), mode='nearest') + ) + ) + elif j == i: + fuse_layer.append(None) + else: + conv3x3s = [] + for k in range(i-j): + if k == i - j - 1: + num_outchannels_conv3x3 = num_inchannels[i] + conv3x3s.append( + nn.Sequential( + nn.Conv2d( + num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False + ), + nn.BatchNorm2d(num_outchannels_conv3x3) + ) + ) + else: + num_outchannels_conv3x3 = num_inchannels[j] + conv3x3s.append( + nn.Sequential( + nn.Conv2d( + num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False + ), + nn.BatchNorm2d(num_outchannels_conv3x3), + nn.ReLU(True) + ) + ) + fuse_layer.append(nn.Sequential(*conv3x3s)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def get_num_inchannels(self): + return self.num_inchannels + + def forward(self, x): + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + + for i in range(len(self.fuse_layers)): + y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y = y + x[j] + else: + y = y + self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + + return x_fuse + + +blocks_dict = { + 'BASIC': BasicBlock, + 'BOTTLENECK': Bottleneck +} + + +@MODELS.register_module() +class PoseHighResolutionNet(nn.Module): + def __init__(self, + cfg, + **kwargs): + self.inplanes = 64 + extra = cfg.MODEL.EXTRA + self.cls_based_hm = cfg.MODEL.cls_based_hm + self.heads = cfg.MODEL.heads + super(PoseHighResolutionNet, self).__init__() + + # stem net + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) + self.relu = nn.ReLU(inplace=True) + self.layer1 = self._make_layer(Bottleneck, 64, 4) + + self.stage2_cfg = cfg['MODEL']['EXTRA']['STAGE2'] + num_channels = self.stage2_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage2_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels)) + ] + self.transition1 = self._make_transition_layer([256], num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + self.stage3_cfg = cfg['MODEL']['EXTRA']['STAGE3'] + num_channels = self.stage3_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage3_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels)) + ] + self.transition2 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + self.stage4_cfg = cfg['MODEL']['EXTRA']['STAGE4'] + num_channels = self.stage4_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage4_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels)) + ] + self.transition3 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multi_scale_output=False) + + self.final_layer = nn.Conv2d( + in_channels=pre_stage_channels[0], + out_channels=cfg.MODEL.NUM_JOINTS, + kernel_size=extra.FINAL_CONV_KERNEL, + stride=1, + padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0 + ) + + self.final_layer_cls = nn.Sequential( + nn.BatchNorm2d(cfg.MODEL.NUM_JOINTS, momentum=BN_MOMENTUM), + nn.AdaptiveMaxPool2d(cfg.MODEL.HEATMAP_SIZE[0]//4), + nn.Flatten(), + nn.Linear((cfg.MODEL.HEATMAP_SIZE[0]//4)**2, cfg.MODEL.NUM_JOINTS, bias=True), + nn.Sigmoid() + ) + + self.pretrained_layers = cfg['MODEL']['EXTRA']['PRETRAINED_LAYERS'] + + def _make_transition_layer( + self, num_channels_pre_layer, num_channels_cur_layer): + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + nn.Conv2d( + num_channels_pre_layer[i], + num_channels_cur_layer[i], + 3, 1, 1, bias=False + ), + nn.BatchNorm2d(num_channels_cur_layer[i]), + nn.ReLU(inplace=True) + ) + ) + else: + transition_layers.append(None) + else: + conv3x3s = [] + for j in range(i+1-num_branches_pre): + inchannels = num_channels_pre_layer[-1] + outchannels = num_channels_cur_layer[i] \ + if j == i-num_branches_pre else inchannels + conv3x3s.append( + nn.Sequential( + nn.Conv2d( + inchannels, outchannels, 3, 2, 1, bias=False + ), + nn.BatchNorm2d(outchannels), + nn.ReLU(inplace=True) + ) + ) + transition_layers.append(nn.Sequential(*conv3x3s)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False + ), + nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, num_inchannels, + multi_scale_output=True): + num_modules = layer_config['NUM_MODULES'] + num_branches = layer_config['NUM_BRANCHES'] + num_blocks = layer_config['NUM_BLOCKS'] + num_channels = layer_config['NUM_CHANNELS'] + block = blocks_dict[layer_config['BLOCK']] + fuse_method = layer_config['FUSE_METHOD'] + + modules = [] + for i in range(num_modules): + # multi_scale_output is only used last module + if not multi_scale_output and i == num_modules - 1: + reset_multi_scale_output = False + else: + reset_multi_scale_output = True + + modules.append( + HighResolutionModule( + num_branches, + block, + num_blocks, + num_inchannels, + num_channels, + fuse_method, + reset_multi_scale_output + ) + ) + num_inchannels = modules[-1].get_num_inchannels() + + return nn.Sequential(*modules), num_inchannels + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['NUM_BRANCHES']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['NUM_BRANCHES']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['NUM_BRANCHES']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + x = self.final_layer(y_list[0]) + + ret = {} + for head in self.heads.keys(): + if head == 'hm': + ret[head] = x + else: + x1 = self.final_layer_cls(x) + ret[head] = x1 + return [ret] + + def init_weights(self, pretrained='', **kwargs): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + nn.init.normal_(m.weight, std=0.001) + for name, _ in m.named_parameters(): + if name in ['bias']: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.ConvTranspose2d): + nn.init.normal_(m.weight, std=0.001) + for name, _ in m.named_parameters(): + if name in ['bias']: + nn.init.constant_(m.bias, 0) + + if os.path.isfile(pretrained): + pretrained_state_dict = torch.load(pretrained) + + need_init_state_dict = {} + for name, m in pretrained_state_dict.items(): + if name.split('.')[0] in self.pretrained_layers \ + or self.pretrained_layers[0] == '*': + need_init_state_dict[name] = m + self.load_state_dict(need_init_state_dict, strict=False) + elif pretrained: + raise ValueError('{} is not exist!'.format(pretrained)) + + +def get_pose_net(cfg, is_train, **kwargs): + model = PoseHighResolutionNet(cfg, **kwargs) + + if is_train and cfg.MODEL.INIT_WEIGHTS: + model.init_weights(cfg.MODEL.PRETRAINED) + + return model + + +if __name__ == "__main__": + from configs.get_config import load_config + from builder import build_model + cfg = load_config("configs/hrnet_sbi.yaml") + + hrnet = build_model(cfg.MODEL, MODELS, default_args=dict(cfg=cfg)) + print(hrnet) diff --git a/models/networks/xception.py b/models/networks/xception.py new file mode 100644 index 0000000000000000000000000000000000000000..901bb278f92bbef47183fb5ef3f2efc98b38073e --- /dev/null +++ b/models/networks/xception.py @@ -0,0 +1,338 @@ +""" +Creates an Xception Model as defined in: + +Francois Chollet +Xception: Deep Learning with Depthwise Separable Convolutions +https://arxiv.org/pdf/1610.02357.pdf + +This weights ported from the Keras implementation. Achieves the following performance on the validation set: + +Loss:0.9173 Prec@1:78.892 Prec@5:94.292 + +REMEMBER to set your image size to 3x299x299 for both test and validation + +normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]) + +The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 +""" +import math + +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +from torch.nn import init +import torch + +from ..builder import MODELS +from .common import conv_block, BN_MOMENTUM + + +model_urls = { + 'xception':'https://www.dropbox.com/s/1hplpzet9d7dv29/xception-c0a72b38.pth.tar?dl=1' +} + + +class SeparableConv2d(nn.Module): + def __init__(self,in_channels,out_channels,kernel_size=1,stride=1,padding=0,dilation=1,bias=False): + super(SeparableConv2d,self).__init__() + + self.conv1 = nn.Conv2d(in_channels,in_channels,kernel_size,stride,padding,dilation,groups=in_channels,bias=bias) + self.pointwise = nn.Conv2d(in_channels,out_channels,1,1,0,1,1,bias=bias) + + def forward(self,x): + x = self.conv1(x) + x = self.pointwise(x) + return x + + +class Block(nn.Module): + def __init__(self,in_filters,out_filters,reps,strides=1,start_with_relu=True,grow_first=True): + super(Block, self).__init__() + + if out_filters != in_filters or strides!=1: + self.skip = nn.Conv2d(in_filters,out_filters,1,stride=strides, bias=False) + self.skipbn = nn.BatchNorm2d(out_filters) + else: + self.skip=None + + self.relu = nn.ReLU(inplace=True) + rep=[] + + filters=in_filters + if grow_first: + rep.append(self.relu) + rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(out_filters)) + filters = out_filters + + for i in range(reps-1): + rep.append(self.relu) + rep.append(SeparableConv2d(filters,filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(filters)) + + if not grow_first: + rep.append(self.relu) + rep.append(SeparableConv2d(in_filters,out_filters,3,stride=1,padding=1,bias=False)) + rep.append(nn.BatchNorm2d(out_filters)) + + if not start_with_relu: + rep = rep[1:] + else: + rep[0] = nn.ReLU(inplace=False) + + if strides != 1: + rep.append(nn.MaxPool2d(3,strides,1)) + self.rep = nn.Sequential(*rep) + + def forward(self,inp): + x = self.rep(inp) + + if self.skip is not None: + skip = self.skip(inp) + skip = self.skipbn(skip) + else: + skip = inp + + x+=skip + return x + + +@MODELS.register_module() +class Xception(nn.Module): + """ + Xception optimized for the ImageNet dataset, as specified in + https://arxiv.org/pdf/1610.02357.pdf + """ + def __init__(self, + heads, + head_conv=64, + cls_based_hm=True, + dropout_prob=0.5, + **kwargs): + """ Constructor + Args: + num_classes: number of classes + """ + self.heads = heads + self.head_conv = head_conv + self.cls_based_hm = cls_based_hm + self.dropout_prob = dropout_prob + super(Xception, self).__init__() + + self.conv1 = nn.Conv2d(3, 32, 3,2, 0, bias=False) + self.bn1 = nn.BatchNorm2d(32) + self.relu = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(32,64,3,bias=False) + self.bn2 = nn.BatchNorm2d(64) + #do relu here + + self.block1=Block(64,128,2,2,start_with_relu=False,grow_first=True) + self.block2=Block(128,256,2,2,start_with_relu=True,grow_first=True) + self.block3=Block(256,728,2,2,start_with_relu=True,grow_first=True) + + self.block4=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block5=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block6=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block7=Block(728,728,3,1,start_with_relu=True,grow_first=True) + + self.block8=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block9=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block10=Block(728,728,3,1,start_with_relu=True,grow_first=True) + self.block11=Block(728,728,3,1,start_with_relu=True,grow_first=True) + + self.block12=Block(728,1024,2,2,start_with_relu=True,grow_first=False) + + self.conv3 = SeparableConv2d(1024,1536,3,1,1) + self.bn3 = nn.BatchNorm2d(1536) + + #do relu here + self.conv4 = SeparableConv2d(1536,2048,3,1,1) + self.bn4 = nn.BatchNorm2d(2048) + + self.dropout = nn.Dropout2d(p=self.dropout_prob) + + self.conv_block_1 = conv_block(2048, 256, (3,3), padding=1) + self.deconv_1 = nn.Sequential( + nn.ConvTranspose2d( + in_channels=256, + out_channels=256, + kernel_size=(4,4), + stride=2, + padding=1, + output_padding=0, + bias=False), + nn.BatchNorm2d(256, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + self.conv_block_2 = conv_block(256, 256, (3,3), padding=1) + self.deconv_2 = nn.Sequential( + nn.ConvTranspose2d( + in_channels=256, + out_channels=128, + kernel_size=(4,4), + stride=2, + padding=1, + output_padding=0, + bias=False), + nn.BatchNorm2d(128, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + self.conv_block_3 = conv_block(128, 128, (3,3), padding=1) + self.deconv_3 = nn.Sequential( + nn.ConvTranspose2d( + in_channels=128, + out_channels=64, + kernel_size=(4,4), + stride=2, + padding=1, + output_padding=0, + bias=False), + nn.BatchNorm2d(64, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True) + ) + + for head in sorted(self.heads): + num_output = self.heads[head] + if self.head_conv > 0: + if head != 'cls': + fc = nn.Sequential( + nn.Conv2d(64, self.head_conv, + kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(self.head_conv), + nn.ReLU(inplace=True), + nn.Conv2d(self.head_conv, num_output, + kernel_size=1, stride=1, padding=0) + ) + else: + if self.cls_based_hm: + fc = nn.Sequential( + nn.AdaptiveAvgPool2d(head_conv//4), + nn.Flatten(), + nn.Linear((head_conv//4)**2, head_conv, bias=False), + nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Linear(head_conv, num_output, bias=True), + nn.Sigmoid() + ) + else: + fc = nn.Sequential( + nn.Conv2d(64, head_conv, kernel_size=3, + padding=1, bias=False), + nn.BatchNorm2d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Conv2d(head_conv, num_output, kernel_size=1, + stride=1, padding=0, bias=False), + nn.BatchNorm2d(num_output), + # nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d(head_conv//4), + nn.Flatten(), + nn.Linear((head_conv//4)**2, head_conv, bias=False), + nn.BatchNorm1d(head_conv, momentum=BN_MOMENTUM), + nn.ReLU(inplace=True), + nn.Linear(head_conv, num_output, bias=True), + nn.Sigmoid() + ) + else: + fc = nn.Conv2d( + in_channels=64, + out_channels=num_output, + kernel_size=1, + stride=1, + padding=0 + ) + self.__setattr__(head, fc) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + + x = self.block1(x) + x = self.block2(x) + x = self.block3(x) + x = self.block4(x) + x = self.block5(x) + x = self.block6(x) + x = self.block7(x) + x = self.block8(x) + x = self.block9(x) + x = self.block10(x) + x = self.block11(x) + x = self.block12(x) + + x = self.conv3(x) + x = self.bn3(x) + x = self.relu(x) + + x = self.conv4(x) + x = self.bn4(x) + x = self.relu(x) + + x = self.dropout(x) + + x = self.conv_block_1(x) + x = self.deconv_1(x) + + x = self.conv_block_2(x) + x = self.deconv_2(x) + + x = self.conv_block_3(x) + x = self.deconv_3(x) + + ret = {} + x1_hm = None + for head in self.heads: + if not self.cls_based_hm or head != 'cls': + ret[head] = self.__getattr__(head)(x) + if head == 'hm': + x1_hm = ret[head] + else: + assert 'hm' in ret.keys(), "Other heads need features from heatmap, please check it!" + ret[head] = self.__getattr__(head)(x1_hm) + return [ret] + + def init_weights(self, pretrained=False): + if not pretrained: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.ConvTranspose2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + if self.deconv_with_bias: + nn.init.constant_(m.bias, 0) + else: + self.load_state_dict(model_zoo.load_url(model_urls['xception']), strict=False) + + # Init head parameters + for head in self.heads: + final_layer = self.__getattr__(head) + for i, m in enumerate(final_layer.modules()): + prior = 1/71 + # if isinstance(m, nn.Conv2d): + # if m.weight.shape[0] == self.heads[head]: + # if 'hm' in head: + # # nn.init.constant_(m.bias, -2.19) + # nn.init.constant_(m.bias, -math.log((1-prior)/prior)) + # else: + # nn.init.normal_(m.weight, std=0.001) + # # nn.init.constant_(m.bias, 0) + if isinstance(m, nn.Linear): + if m.weight.shape[0] == self.heads[head]: + nn.init.constant_(m.bias, -math.log((1-prior)/prior)) + # else: + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, math.sqrt(2. / n)) + # # nn.init.constant_(m.bias, 0) diff --git a/models/utils.py b/models/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6c810d016d24b7c175cdd77069bd2adfc4e91d3a --- /dev/null +++ b/models/utils.py @@ -0,0 +1,138 @@ +#-*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import torch +import torch.nn as nn + + +layers_position = { + 'PoseResNet_50': 158, + 'PoseResNet_101': 311, + 'PoseEfficientNet_B4': 415, +} + + +def preset_model(cfg, model, optimizer=None): + #Loading models from config, make sure the pretrained path correct to the model name + start_epoch = 0 + if 'pretrained' in cfg.TRAIN and os.path.isfile(cfg.TRAIN.pretrained): + model, optimizer, start_epoch = load_model(model, + cfg.TRAIN.pretrained, + optimizer=optimizer, + resume=cfg.TRAIN.resume, + lr=cfg.TRAIN.lr, + lr_step=cfg.TRAIN.lr_scheduler.milestones, + gamma=cfg.TRAIN.lr_scheduler.gamma) + else: + model.init_weights(**cfg.MODEL.INIT_WEIGHTS) + print('Loading model successfully -- {}'.format(cfg.MODEL.type)) + + #Freeze backbone if begin_epoch < warm up + if cfg.TRAIN.freeze_backbone and start_epoch < cfg.TRAIN.warm_up: + freeze_backbone(cfg.MODEL, model) + + print('Number of parameters', sum(p.numel() for p in model.parameters())) + print('Number of trainable parameters', sum(p.numel() for p in model.parameters() if p.requires_grad)) + return model, optimizer, start_epoch + + +def load_pretrained(model, weight_path): + ''' + This function only care about state dict of model + For other modules such as optimizer, resume learning, please refer @load_model + ''' + state_dict = torch.load(weight_path)['state_dict'] + model.load_state_dict(state_dict, strict=True) + return model + + +def freeze_backbone(cfg, model): + ''' + This func to freeze some specific layers to warm up the models + ''' + if hasattr(model, 'backbone'): + backbone = model.backbone + for param in backbone.parameters(): + param.requires_grad = False + else: + for i, (n, p) in enumerate(model.named_parameters()): + if (i <= layers_position[f'{cfg.type}_{cfg.num_layers}']): + p.requires_grad = False + + +def unfreeze_backbone(model): + ''' + This func to unfreeze all model layers + ''' + for param in model.parameters(): + if not param.requires_grad: + param.requires_grad = True + + +def load_model(model, model_path, optimizer=None, resume=False, + lr=None, lr_step=None, gamma=None): + start_epoch = 0 + checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) + print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) + state_dict_ = checkpoint['state_dict'] + state_dict = {} + + # convert data_parallal to model + for k in state_dict_: + if k.startswith('module') and not k.startswith('module_list'): + state_dict[k[7:]] = state_dict_[k] + else: + state_dict[k] = state_dict_[k] + model_state_dict = model.state_dict() + + # check loaded parameters and created model parameters + msg = 'If you see this, your model does not fully load the ' + \ + 'pre-trained weight. Please make sure ' + \ + 'you have correctly specified --arch xxx ' + \ + 'or set the correct --num_classes for your own dataset.' + for k in state_dict: + if k in model_state_dict: + if state_dict[k].shape != model_state_dict[k].shape: + print('Skip loading parameter {}, required shape{}, '\ + 'loaded shape{}. {}'.format( + k, model_state_dict[k].shape, state_dict[k].shape, msg)) + state_dict[k] = model_state_dict[k] + else: + print('Drop parameter {}.'.format(k) + msg) + for k in model_state_dict: + if not (k in state_dict): + print('No param {}.'.format(k) + msg) + state_dict[k] = model_state_dict[k] + model.load_state_dict(state_dict, strict=False) + + # resume optimizer parameters + if optimizer is not None and resume: + if 'optimizer' in checkpoint: + optimizer.load_state_dict(checkpoint['optimizer']) + start_epoch = checkpoint['epoch'] + 1 + start_lr = lr + for step in lr_step: + if start_epoch >= step: + start_lr *= gamma + for param_group in optimizer.param_groups: + param_group['lr'] = start_lr + print('Resumed optimizer with start lr', start_lr) + else: + print('No optimizer parameters in checkpoint.') + return model, optimizer, start_epoch + + +def save_model(path, epoch, model, optimizer=None): + if isinstance(model, torch.nn.DataParallel): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + data = {'epoch': epoch, + 'state_dict': state_dict} + if not (optimizer is None): + data['optimizer'] = optimizer.state_dict() + torch.save(data, path) diff --git a/output/.DS_Store b/output/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..875f0a83dd4eb81a66581dc2d19be8e066e14a93 Binary files /dev/null and b/output/.DS_Store differ diff --git a/register/.ipynb_checkpoints/register-checkpoint.py b/register/.ipynb_checkpoints/register-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..edc1cd2d240adc65839f271672fe2a82a3f71e2e --- /dev/null +++ b/register/.ipynb_checkpoints/register-checkpoint.py @@ -0,0 +1,318 @@ +#-*- coding: utf-8 -*- +import inspect +import warnings +from functools import partial +from typing import Any, Dict, Optional + +from .misc import deprecated_api_warning, is_seq_of + + +def build_from_cfg(cfg: Dict, + registry: 'Registry', + default_args: Optional[Dict] = None) -> Any: + """Build a module from config dict when it is a class configuration, or + call a function from config dict when it is a function configuration. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = build_from_cfg(dict(type='Resnet'), MODELS) + >>> # Returns an instantiated object + >>> @MODELS.register_module() + >>> def resnet50(): + >>> pass + >>> resnet = build_from_cfg(dict(type='resnet50'), MODELS) + >>> # Return a result of the calling function + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' + f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' + f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError( + f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type) or inspect.isfunction(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a str or valid type, but got {type(obj_type)}') + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f'{obj_cls.__name__}: {e}') + + +class Registry: + """A registry to map strings to classes or functions. + Registered object could be built from registry. Meanwhile, registered + functions could be called from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + >>> @MODELS.register_module() + >>> def resnet50(): + >>> pass + >>> resnet = MODELS.build(dict(type='resnet50')) + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + \ + f'(name={self._name}, ' \ + f'items={self._module_dict})' + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + The name of the package where registry is defined will be returned. + Example: + >>> # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + Returns: + str: The inferred scope name. + """ + # We access the caller using inspect.currentframe() instead of + # inspect.stack() for performance reasons. See details in PR #1844 + frame = inspect.currentframe() + # get the frame where `infer_scope()` is called + infer_scope_caller = frame.f_back.f_back + filename = inspect.getmodule(infer_scope_caller).__name__ + split_filename = filename.split('.') + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + The first scope will be split from key. + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + Return: + tuple[str | None, str]: The former element is the first scope of + the key, which can be ``None``. The latter is the remaining key. + """ + split_index = key.find('.') + if split_index != -1: + return key[:split_index], key[split_index + 1:] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + Args: + key (str): The class name in string format. + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert registry.scope not in self.children, \ + f'scope {registry.scope} exists in {self.name} registry' + self.children[registry.scope] = registry + + @deprecated_api_warning(name_dict=dict(module_class='module')) + def _register_module(self, module, module_name=None, force=False): + if not inspect.isclass(module) and not inspect.isfunction(module): + raise TypeError('module must be a class or a function, ' + f'but got {type(module)}') + + if module_name is None: + module_name = module.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f'{name} is already registered ' + f'in {self.name}') + self._module_dict[name] = module + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.', + DeprecationWarning) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class or function to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + 'name must be either of None, an instance of str or a sequence' + f' of str, but got {type(name)}') + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(module): + self._register_module(module=module, module_name=name, force=force) + return module + + return _register diff --git a/register/__pycache__/misc.cpython-310.pyc b/register/__pycache__/misc.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6f5baca42282acb728754828f5be8f34b08e762d Binary files /dev/null and b/register/__pycache__/misc.cpython-310.pyc differ diff --git a/register/__pycache__/misc.cpython-311.pyc b/register/__pycache__/misc.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78dc475d587f1c1faba91313bf6e99c41661e729 Binary files /dev/null and b/register/__pycache__/misc.cpython-311.pyc differ diff --git a/register/__pycache__/misc.cpython-312.pyc b/register/__pycache__/misc.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41ae39ac567093d181ba9677afba09ccf4fa7516 Binary files /dev/null and b/register/__pycache__/misc.cpython-312.pyc differ diff --git a/register/__pycache__/register.cpython-310.pyc b/register/__pycache__/register.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6746599c41a5e4ae88d7ea286a5858068b66929d Binary files /dev/null and b/register/__pycache__/register.cpython-310.pyc differ diff --git a/register/__pycache__/register.cpython-311.pyc b/register/__pycache__/register.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b78477f126e8c9b91f7854d182d02b79d35b44c Binary files /dev/null and b/register/__pycache__/register.cpython-311.pyc differ diff --git a/register/__pycache__/register.cpython-312.pyc b/register/__pycache__/register.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fba585886bc7624208d9154002178866d6d2f939 Binary files /dev/null and b/register/__pycache__/register.cpython-312.pyc differ diff --git a/register/misc.py b/register/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..ccc3bb3707083e8a0454c9da3715ced0ec498cb3 --- /dev/null +++ b/register/misc.py @@ -0,0 +1,352 @@ +#-*- coding: utf-8 -*- +import collections.abc +import functools +import itertools +import subprocess +import warnings +from collections import abc +from importlib import import_module +from inspect import getfullargspec +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + + def parse(x): + if isinstance(x, collections.abc.Iterable): + return x + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +def is_str(x): + """Whether the input is an string instance. + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + Returns: + list[module] | module | None: The imported modules. + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError( + f'custom_imports must be a list but got type {type(imports)}') + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError( + f'{imp} is of type {type(imp)} and cannot be imported.') + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f'{imp} failed to import and is ignored.', + UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +def iter_cast(inputs, dst_type, return_type=None): + """Cast elements of an iterable object into some type. + Args: + inputs (Iterable): The input object. + dst_type (type): Destination type. + return_type (type, optional): If specified, the output object will be + converted to this type, otherwise an iterator. + Returns: + iterator or specified type: The converted object. + """ + if not isinstance(inputs, abc.Iterable): + raise TypeError('inputs must be an iterable object') + if not isinstance(dst_type, type): + raise TypeError('"dst_type" must be a valid type') + + out_iterable = map(dst_type, inputs) + + if return_type is None: + return out_iterable + else: + return return_type(out_iterable) + + +def list_cast(inputs, dst_type): + """Cast elements of an iterable object into a list of some type. + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=list) + + +def tuple_cast(inputs, dst_type): + """Cast elements of an iterable object into a tuple of some type. + A partial method of :func:`iter_cast`. + """ + return iter_cast(inputs, dst_type, return_type=tuple) + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_list_of(seq, expected_type): + """Check whether it is a list of some type. + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=list) + + +def is_tuple_of(seq, expected_type): + """Check whether it is a tuple of some type. + A partial method of :func:`is_seq_of`. + """ + return is_seq_of(seq, expected_type, seq_type=tuple) + + +def slice_list(in_list, lens): + """Slice a list into several sub lists by a list of given length. + Args: + in_list (list): The list to be sliced. + lens(int or list): The expected length of each out list. + Returns: + list: A list of sliced list. + """ + if isinstance(lens, int): + assert len(in_list) % lens == 0 + lens = [lens] * int(len(in_list) / lens) + if not isinstance(lens, list): + raise TypeError('"indices" must be an integer or a list of integers') + elif sum(lens) != len(in_list): + raise ValueError('sum of lens and list length does not ' + f'match: {sum(lens)} != {len(in_list)}') + out_list = [] + idx = 0 + for i in range(len(lens)): + out_list.append(in_list[idx:idx + lens[i]]) + idx += lens[i] + return out_list + + +def concat_list(in_list): + """Concatenate a list of list into a single list. + Args: + in_list (list): The list of list to be merged. + Returns: + list: The concatenated flat list. + """ + return list(itertools.chain(*in_list)) + + +def check_prerequisites( + prerequisites, + checker, + msg_tmpl='Prerequisites "{}" are required in method "{}" but not ' + 'found, please install them first.'): # yapf: disable + """A decorator factory to check if prerequisites are satisfied. + Args: + prerequisites (str of list[str]): Prerequisites to be checked. + checker (callable): The checker method that returns True if a + prerequisite is meet, False otherwise. + msg_tmpl (str): The message template with two variables. + Returns: + decorator: A specific decorator. + """ + + def wrap(func): + + @functools.wraps(func) + def wrapped_func(*args, **kwargs): + requirements = [prerequisites] if isinstance( + prerequisites, str) else prerequisites + missing = [] + for item in requirements: + if not checker(item): + missing.append(item) + if missing: + print(msg_tmpl.format(', '.join(missing), func.__name__)) + raise RuntimeError('Prerequisites not meet.') + else: + return func(*args, **kwargs) + + return wrapped_func + + return wrap + + +def _check_py_package(package): + try: + import_module(package) + except ImportError: + return False + else: + return True + + +def _check_executable(cmd): + if subprocess.call(f'which {cmd}', shell=True) != 0: + return False + else: + return True + + +def requires_package(prerequisites): + """A decorator to check if some python packages are installed. + Example: + >>> @requires_package('numpy') + >>> func(arg1, args): + >>> return numpy.zeros(1) + array([0.]) + >>> @requires_package(['numpy', 'non_package']) + >>> func(arg1, args): + >>> return numpy.zeros(1) + ImportError + """ + return check_prerequisites(prerequisites, checker=_check_py_package) + + +def requires_executable(prerequisites): + """A decorator to check if some executable files are installed. + Example: + >>> @requires_executable('ffmpeg') + >>> func(arg1, args): + >>> print(1) + 1 + """ + return check_prerequisites(prerequisites, checker=_check_executable) + + +def deprecated_api_warning(name_dict, cls_name=None): + """A decorator to check if some arguments are deprecate and try to replace + deprecate src_arg_name to dst_arg_name. + Args: + name_dict(dict): + key (str): Deprecate argument names. + val (str): Expected argument names. + Returns: + func: New function. + """ + + def api_warning_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get name of the function + func_name = old_func.__name__ + if cls_name is not None: + func_name = f'{cls_name}.{func_name}' + if args: + arg_names = args_info.args[:len(args)] + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in arg_names: + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead', DeprecationWarning) + arg_names[arg_names.index(src_arg_name)] = dst_arg_name + if kwargs: + for src_arg_name, dst_arg_name in name_dict.items(): + if src_arg_name in kwargs: + + assert dst_arg_name not in kwargs, ( + f'The expected behavior is to replace ' + f'the deprecated key `{src_arg_name}` to ' + f'new key `{dst_arg_name}`, but got them ' + f'in the arguments at the same time, which ' + f'is confusing. `{src_arg_name} will be ' + f'deprecated in the future, please ' + f'use `{dst_arg_name}` instead.') + + warnings.warn( + f'"{src_arg_name}" is deprecated in ' + f'`{func_name}`, please use "{dst_arg_name}" ' + 'instead', DeprecationWarning) + kwargs[dst_arg_name] = kwargs.pop(src_arg_name) + + # apply converted arguments to the decorated method + output = old_func(*args, **kwargs) + return output + + return new_func + + return api_warning_wrapper + + +def is_method_overridden(method, base_class, derived_class): + """Check if a method of base class is overridden in derived class. + Args: + method (str): the method name to check. + base_class (type): the class of the base class. + derived_class (type | Any): the class or instance of the derived class. + """ + assert isinstance(base_class, type), \ + "base_class doesn't accept instance, Please pass class instead." + + if not isinstance(derived_class, type): + derived_class = derived_class.__class__ + + base_method = getattr(base_class, method) + derived_method = getattr(derived_class, method) + return derived_method != base_method + + +def has_method(obj: object, method: str) -> bool: + """Check whether the object has a method. + Args: + method (str): The method name to check. + obj (object): The object to check. + Returns: + bool: True if the object has the method else False. + """ + return hasattr(obj, method) and callable(getattr(obj, method)) diff --git a/register/register.py b/register/register.py new file mode 100644 index 0000000000000000000000000000000000000000..edc1cd2d240adc65839f271672fe2a82a3f71e2e --- /dev/null +++ b/register/register.py @@ -0,0 +1,318 @@ +#-*- coding: utf-8 -*- +import inspect +import warnings +from functools import partial +from typing import Any, Dict, Optional + +from .misc import deprecated_api_warning, is_seq_of + + +def build_from_cfg(cfg: Dict, + registry: 'Registry', + default_args: Optional[Dict] = None) -> Any: + """Build a module from config dict when it is a class configuration, or + call a function from config dict when it is a function configuration. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = build_from_cfg(dict(type='Resnet'), MODELS) + >>> # Returns an instantiated object + >>> @MODELS.register_module() + >>> def resnet50(): + >>> pass + >>> resnet = build_from_cfg(dict(type='resnet50'), MODELS) + >>> # Return a result of the calling function + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + if default_args is None or 'type' not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f'but got {cfg}\n{default_args}') + if not isinstance(registry, Registry): + raise TypeError('registry must be an mmcv.Registry object, ' + f'but got {type(registry)}') + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError('default_args must be a dict or None, ' + f'but got {type(default_args)}') + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop('type') + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError( + f'{obj_type} is not in the {registry.name} registry') + elif inspect.isclass(obj_type) or inspect.isfunction(obj_type): + obj_cls = obj_type + else: + raise TypeError( + f'type must be a str or valid type, but got {type(obj_type)}') + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f'{obj_cls.__name__}: {e}') + + +class Registry: + """A registry to map strings to classes or functions. + Registered object could be built from registry. Meanwhile, registered + functions could be called from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + >>> @MODELS.register_module() + >>> def resnet50(): + >>> pass + >>> resnet = MODELS.build(dict(type='resnet50')) + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = self.__class__.__name__ + \ + f'(name={self._name}, ' \ + f'items={self._module_dict})' + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + The name of the package where registry is defined will be returned. + Example: + >>> # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + Returns: + str: The inferred scope name. + """ + # We access the caller using inspect.currentframe() instead of + # inspect.stack() for performance reasons. See details in PR #1844 + frame = inspect.currentframe() + # get the frame where `infer_scope()` is called + infer_scope_caller = frame.f_back.f_back + filename = inspect.getmodule(infer_scope_caller).__name__ + split_filename = filename.split('.') + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + The first scope will be split from key. + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + Return: + tuple[str | None, str]: The former element is the first scope of + the key, which can be ``None``. The latter is the remaining key. + """ + split_index = key.find('.') + if split_index != -1: + return key[:split_index], key[split_index + 1:] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + Args: + key (str): The class name in string format. + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert registry.scope not in self.children, \ + f'scope {registry.scope} exists in {self.name} registry' + self.children[registry.scope] = registry + + @deprecated_api_warning(name_dict=dict(module_class='module')) + def _register_module(self, module, module_name=None, force=False): + if not inspect.isclass(module) and not inspect.isfunction(module): + raise TypeError('module must be a class or a function, ' + f'but got {type(module)}') + + if module_name is None: + module_name = module.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f'{name} is already registered ' + f'in {self.name}') + self._module_dict[name] = module + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + 'The old API of register_module(module, force=False) ' + 'is deprecated and will be removed, please use the new API ' + 'register_module(name=None, force=False, module=None) instead.', + DeprecationWarning) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class or function to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f'force must be a boolean, but got {type(force)}') + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + 'name must be either of None, an instance of str or a sequence' + f' of str, but got {type(name)}') + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(module): + self._register_module(module=module, module_name=name, force=force) + return module + + return _register diff --git a/test3.png b/test3.png new file mode 100644 index 0000000000000000000000000000000000000000..e7065b62e0e3ec4039e0370e75260c701fbee4d4 --- /dev/null +++ b/test3.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a90f43b43052e8a06fd45af9de469782c8970ff888761340f4f32af3208887 +size 731203 diff --git a/test5.png b/test5.png new file mode 100644 index 0000000000000000000000000000000000000000..1043cfec240a247a4e9ebf3ab339f0333af211dc --- /dev/null +++ b/test5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f7462ec42da02d1c7278a6bd99fcdb433c561b20f5cfc7b1073097abaebd2c +size 2944986