Spaces:

Aryan6192
/

deep

Configuration error

App Files Files Community

Aryan6192 commited on May 4, 2025

Commit

79cf6ef

verified ·

1 Parent(s): 814d96f

deep

Browse files

Files changed (44) hide show

.gitattributes +2 -0
.gitignore +11 -0
Dockerfile +55 -0
LICENSE +201 -0
README.md +207 -10
config.yaml +4 -0
convert_tf_to_pt.sh +6 -0
copy_weights.py +36 -0
datasets.py +162 -0
detect_faces_on_videos.py +82 -0
dsfacedetector/__init__.py +0 -0
dsfacedetector/data/__init__.py +0 -0
dsfacedetector/data/config.py +57 -0
dsfacedetector/face_ssd_infer.py +156 -0
dsfacedetector/layers/__init__.py +3 -0
dsfacedetector/layers/detection.py +157 -0
dsfacedetector/layers/modules.py +98 -0
dsfacedetector/layers/prior_box.py +133 -0
dsfacedetector/utils.py +101 -0
external_data/convert_tf_to_pt.py +174 -0
external_data/original_tf/__init__.py +0 -0
external_data/original_tf/efficientnet_builder.py +329 -0
external_data/original_tf/efficientnet_model.py +713 -0
external_data/original_tf/eval_ckpt_main.py +221 -0
external_data/original_tf/preprocessing.py +241 -0
external_data/original_tf/utils.py +405 -0
extract_tracks_from_videos.py +105 -0
generate_aligned_tracks.py +99 -0
generate_track_pairs.py +70 -0
generate_tracks.py +70 -0
images/augmented_mixup.jpg +3 -0
images/clip_example.jpg +0 -0
images/first_and_second_model_inputs.jpg +0 -0
images/mixup_example.jpg +3 -0
images/pred_transform.jpg +0 -0
images/third_model_input.jpg +0 -0
models/.gitkeep +0 -0
predict.py +399 -0
tracker/__init__.py +0 -0
tracker/iou_tracker.py +58 -0
tracker/utils.py +35 -0
train_b7_ns_aa_original_large_crop_100k.py +257 -0
train_b7_ns_aa_original_re_100k.py +266 -0
train_b7_ns_seq_aa_original_100k.py +281 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/augmented_mixup.jpg filter=lfs diff=lfs merge=lfs -text
+images/mixup_example.jpg filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+# PyCharm
+.idea
+# Jupyter Notebook
+.ipynb_checkpoints
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class

Dockerfile ADDED Viewed

	@@ -0,0 +1,55 @@

+FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
+SHELL ["/bin/bash", "-c"]
+RUN rm /etc/apt/sources.list.d/cuda.list \
+    /etc/apt/sources.list.d/nvidia-ml.list && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        software-properties-common \
+        wget \
+        git && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        python3.6 \
+        python3.6-dev && \
+    wget -O ~/get-pip.py \
+        https://bootstrap.pypa.io/get-pip.py && \
+    python3.6 ~/get-pip.py && \
+    pip3 --no-cache-dir install \
+        numpy==1.17.4 \
+        PyYAML==5.1.2 \
+        mkl==2019.0 \
+        mkl-include==2019.0 \
+        cmake==3.15.3 \
+        cffi==1.13.2 \
+        typing==3.7.4.1 \
+        six==1.13.0 \
+        Pillow==6.2.1 \
+        scipy==1.4.1 && \
+    cd /tmp && \
+    git clone https://github.com/pytorch/pytorch.git && \
+    cd pytorch && \
+    git checkout v1.3.0 && \
+    git submodule update --init --recursive && \
+    python3.6 setup.py install && \
+    cd /tmp && \
+    git clone https://github.com/pytorch/vision.git && \
+    cd vision && \
+    git checkout v0.4.1 && \
+    python3.6 setup.py install && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        ffmpeg && \
+    pip3 --no-cache-dir install \
+	    opencv-python==4.1.2.30 \
+	    albumentations==0.4.3 \
+	    tqdm==4.39.0 \
+	    timm==0.1.18 \
+	    efficientnet-pytorch==0.6.3 \
+	    ffmpeg-python==0.2.0 \
+	    tensorflow==1.15.2 && \
+    cd / && \
+    apt-get clean && \
+    apt-get autoremove && \
+    rm -rf /var/lib/apt/lists/* /tmp/*

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2020 N-TECH.LAB LTD
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,10 +1,207 @@
----
-title: Deep
-emoji: 🔥
-colorFrom: green
-colorTo: pink
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Deepfake Detection Challenge
+Solution for the [Deepfake Detection Challenge](https://www.kaggle.com/c/deepfake-detection-challenge).
+Private LB score: **0.43452**
+## Solution description
+### Summary
+Our solution consists of three EfficientNet-B7 models (we used the Noisy Student pre-trained weights). We did not use
+external data, except for pre-trained weights. One model runs on frame sequences (a 3D convolution has been added to
+each EfficientNet-B7 block). The other two models work frame-by-frame and differ in the size of the face crop and
+augmentations during training. To tackle overfitting problem, we used mixup technique on aligned real-fake pairs. In
+addition, we used the following augmentations: AutoAugment, Random Erasing, Random Crops, Random Flips, and various
+video compression parameters. Video compression augmentation was done on-the-fly. To do this, short cropped tracks (50
+frames each) were saved in PNG format, and at each training iteration they were loaded and reencoded with random
+parameters using ffmpeg. Due to the mixup, model predictions were “uncertain”, so at the inference stage, model
+confidence was strengthened by a simple transformation. The final prediction was obtained by averaging the predictions
+of models with weights proportional to confidence.The total training and preprocessing time is approximately 5 days on
+DGX-1.
+### Key ingredients
+#### Mixup on aligned real-fake pairs
+One of the main difficulties of this competition is a severe overfitting. Initially, all models overfitted in 2-3 epochs
+(the validation loss started to increase). The idea, which helped a lot with the overfitting, is to train the model on
+a mix of real and fake faces: for each fake face, we take the corresponding real face from the original video (with the
+same box coordinates and the same frame number) an do a linear combination of them. In terms of tensor it’s
+```python
+input_tensor = (1.0 - target) * real_input_tensor + target * fake_input_tensor
+```
+where target is drawn from a Beta distribution with parameters alpha=beta=0.5. With these parameters, there is a very
+high probability of picking values close to 0 or 1 (pure real or pure fake face). You can see the examples below:
+![mixup example](images/mixup_example.jpg "Mixup example")
+Due to the fact that real and fake samples are aligned, the background remains almost unchanged on interpolated samples,
+which reduces overfitting and makes the model pay more attention to the face.
+#### Video compression augmentation
+In the paper \[1\] it was pointed out that augmentations close to degradations seen in real-life video distributions
+were applied to the test data. Specifically, these augmentations were (1) reduce the FPS of the video to 15; (2) reduce
+the resolution of the video to 1/4 of its original size; and (3) reduce the overall encoding quality. In order to make
+the model resistant to various parameters of video compression, we added augmentations with random parameters of video
+encoding to training. It would be infeasible to apply such augmentations to the original videos on-the-fly during
+training, so instead of the original videos, cropped (1.5x areas around the face) short (50 frames) clips were used.
+Each clip was saved as separate frames in png format. An example of a clip is given below:
+![clip example](images/clip_example.jpg "Clip example")
+ For on-the-fly augmentation, ffmpeg-python was used. At each iteration, the following parameters were randomly sampled
+ (see \[2\]):
+- FPS (15 to 30)
+- scale (0.25 to 1.0)
+- CRF (17 to 40)
+- random tuning option
+#### Model architecture
+As a result of the experiments, we found out that the EfficientNet models work better than others (we checked ResNet,
+ResNeXt, SE-ResNeXt). The best model was EfficientNet-B7 with Noisy Student pre-trained weights \[3\]. The size of the
+input image is 224x192 (most of the faces in the training dataset are smaller). The final ensemble consists of three
+models, two of which are frame-by-frame, and the third works on sequence.
+##### Frame-by-frame models
+Frame-by-frame models work quite well. They differ in the size of the area around the face and augmentations during
+training. Below are examples of input images for each of the models:
+![first and second model inputs](images/first_and_second_model_inputs.jpg "First and second model input examples")
+##### Sequence-based model
+Probably, time dependencies can be useful for detecting fakes. Therefore, we added a 3d convolution to each block of the
+EfficientNet model. This model worked slightly better than similar frame-by-frame model. The length of the input
+sequence is 7 frames. The step between frames is 1/15 of a second. An example of an input sequence is given below:
+![third model input](images/third_model_input.jpg "Third model input example")
+#### Image augmentations
+To improve model generalization, we used the following augmentations: AutoAugment \[4\], Random Erasing, Random Crops,
+Random Horizontal Flips. Since we used mixup, it was important to augment real-fake pairs the same way (see example).
+For a sequence-based model, it was important to augment frames that belong to the same clip in the same way.
+![augmented mixup](images/augmented_mixup.jpg "Augmented mixup example")
+#### Inference post-processing
+Due to mixup, the predictions of the models were uncertain, which was not optimal for the logloss. To increase
+confidence, we applied the following transformation:
+![prediction transform](images/pred_transform.jpg "Prediction transformation")
+Due to computational limitations, predictions are made on a subsample of frames. Half of the frames were horizontally
+flipped. The prediction for the video is obtained by averaging all the predictions with weights proportional to the
+confidence (the closer the prediction to 0.5, the lower its weight). Such averaging works like attention, because the
+model gives predictions close to 0.5 on poor quality frames (profile faces, blur, etc.).
+#### References
+\[1\] Brian Dolhansky, Russ Howes, Ben Pflaum, Nicole Baram, Cristian Canton Ferrer, “The Deepfake Detection Challenge
+(DFDC) Preview Dataset”
+\[2\] [https://trac.ffmpeg.org/wiki/Encode/H.264](https://trac.ffmpeg.org/wiki/Encode/H.264)
+\[3\] Qizhe Xie, Minh-Thang Luong, Eduard Hovy, Quoc V. Le, “Self-training with Noisy Student improves ImageNet classification”
+\[4\] Ekin D. Cubuk, Barret Zoph, Dandelion Mane, Vijay Vasudevan, Quoc V. Le, “AutoAugment: Learning Augmentation Policies from Data”
+## The hardware we used
+- CPU: Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz
+- GPU: 8x NVIDIA Tesla V100 SXM2 32 GB
+- RAM: 512 GB
+- SSD: 6 TB
+## Prerequisites
+### Environment
+Use the docker to get an environment close to what was used in the training. Run the following command to build the docker image:
+```bash
+cd path/to/solution
+sudo docker build -t dfdc .
+```
+### Data
+Download the [deepfake-detection-challenge-data](https://www.kaggle.com/c/deepfake-detection-challenge/data) and extract all files to `/path/to/dfdc-data`. This directory must have the following structure:
+```
+dfdc-data
+├── dfdc_train_part_0
+├── dfdc_train_part_1
+├── dfdc_train_part_10
+├── dfdc_train_part_11
+├── dfdc_train_part_12
+├── dfdc_train_part_13
+├── dfdc_train_part_14
+├── dfdc_train_part_15
+├── dfdc_train_part_16
+├── dfdc_train_part_17
+├── dfdc_train_part_18
+├── dfdc_train_part_19
+├── dfdc_train_part_2
+├── dfdc_train_part_20
+├── dfdc_train_part_21
+├── dfdc_train_part_22
+├── dfdc_train_part_23
+├── dfdc_train_part_24
+├── dfdc_train_part_25
+├── dfdc_train_part_26
+├── dfdc_train_part_27
+├── dfdc_train_part_28
+├── dfdc_train_part_29
+├── dfdc_train_part_3
+├── dfdc_train_part_30
+├── dfdc_train_part_31
+├── dfdc_train_part_32
+├── dfdc_train_part_33
+├── dfdc_train_part_34
+├── dfdc_train_part_35
+├── dfdc_train_part_36
+├── dfdc_train_part_37
+├── dfdc_train_part_38
+├── dfdc_train_part_39
+├── dfdc_train_part_4
+├── dfdc_train_part_40
+├── dfdc_train_part_41
+├── dfdc_train_part_42
+├── dfdc_train_part_43
+├── dfdc_train_part_44
+├── dfdc_train_part_45
+├── dfdc_train_part_46
+├── dfdc_train_part_47
+├── dfdc_train_part_48
+├── dfdc_train_part_49
+├── dfdc_train_part_5
+├── dfdc_train_part_6
+├── dfdc_train_part_7
+├── dfdc_train_part_8
+├── dfdc_train_part_9
+└── test_videos
+```
+### External data
+According to the rules of the competition, external data is allowed. The solution does not use other external data, except for pre-trained models. Below is a table with information about these models.
+| File Name | Source | Direct Link | Forum Post |
+| --------- | ------ | ----------- | ---------- |
+| WIDERFace_DSFD_RES152.pth | [github](https://github.com/Tencent/FaceDetection-DSFD/tree/31aa8bdeaf01a0c408adaf2709754a16b17aec79) | [google drive](https://drive.google.com/file/d/1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-/view) | [link](https://www.kaggle.com/c/deepfake-detection-challenge/discussion/121203#761391) |
+| noisy_student_efficientnet-b7.tar.gz | [github](https://github.com/tensorflow/tpu/tree/4719695c9128622fb26dedb19ea19bd9d1ee3177/models/official/efficientnet) | [link](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b7.tar.gz) | [link](https://www.kaggle.com/c/deepfake-detection-challenge/discussion/121203#748358) |
+Download these files and copy them to the `external_data` folder.
+## How to train the model
+Run the docker container with the paths correctly mounted:
+```bash
+sudo docker run --runtime=nvidia -i -t -d --rm --ipc=host -v /path/to/dfdc-data:/kaggle/input/deepfake-detection-challenge:ro -v /path/to/solution:/kaggle/solution --name dfdc dfdc
+sudo docker exec -it dfdc /bin/bash
+cd /kaggle/solution
+```
+Convert pre-trained model from tensorflow to pytorch:
+```bash
+bash convert_tf_to_pt.sh
+```
+Detect faces on videos:
+```bash
+python3.6 detect_faces_on_videos.py
+```
+_Note: You can parallelize this operation using the `--part` and `--num_parts` arguments_
+Generate tracks:
+```bash
+python3.6 generate_tracks.py
+```
+Generate aligned tracks:
+```bash
+python3.6 generate_aligned_tracks.py
+```
+Extract tracks from videos:
+```bash
+python3.6 extract_tracks_from_videos.py
+```
+_Note: You can parallelize this operation using the `--part` and `--num_parts` arguments_
+Generate track pairs:
+```bash
+python3.6 generate_track_pairs.py
+```
+Train models:
+```bash
+python3.6 train_b7_ns_aa_original_large_crop_100k.py
+python3.6 train_b7_ns_aa_original_re_100k.py
+python3.6 train_b7_ns_seq_aa_original_100k.py
+```
+Copy the final weights and convert them to FP16:
+```bash
+python3.6 copy_weights.py
+```
+## Serialized copy of the trained model
+You can download the final weights that were used in the competition (the result of the `copy_weights.py` script): [GoogleDrive](https://drive.google.com/file/d/1S-HeppZcbXDF0F-BO96zhqZyrRWOaan6/view?usp=sharing)
+## How to generate submission
+Run the following command
+```bash
+python3.6 predict.py
+```

config.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+DFDC_DATA_PATH: "/kaggle/input/deepfake-detection-challenge"
+ARTIFACTS_PATH: "/kaggle/solution/artifacts"
+MODELS_PATH: "/kaggle/solution/models"
+SUBMISSION_PATH: "/kaggle/solution/output/submission.csv"

convert_tf_to_pt.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/bin/bash
+cd external_data && \
+tar -xzf noisy_student_efficientnet-b7.tar.gz && \
+python3.6 convert_tf_to_pt.py --model_name efficientnet-b7 --tf_checkpoint noisy-student-efficientnet-b7 --output_file noisy_student_efficientnet-b7.pth && \
+rm -rf noisy-student-efficientnet-b7 tmp && \
+cd ..

copy_weights.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+import yaml
+import torch
+WEIGHTS_MAPPING = {
+    'snapshots/efficientnet-b7_ns_aa-original-mstd0.5_large_crop_100k/snapshot_100000.pth': 'efficientnet-b7_ns_aa-original-mstd0.5_large_crop_100k_v4_cad79a/snapshot_100000.fp16.pth',
+    'snapshots/efficientnet-b7_ns_aa-original-mstd0.5_re_100k/snapshot_100000.pth': 'efficientnet-b7_ns_aa-original-mstd0.5_re_100k_v4_cad79a/snapshot_100000.fp16.pth',
+    'snapshots/efficientnet-b7_ns_seq_aa-original-mstd0.5_100k/snapshot_100000.pth': 'efficientnet-b7_ns_seq_aa-original-mstd0.5_100k_v4_cad79a/snapshot_100000.fp16.pth'
+}
+SRC_DETECTOR_WEIGHTS = 'external_data/WIDERFace_DSFD_RES152.pth'
+DST_DETECTOR_WEIGHTS = 'WIDERFace_DSFD_RES152.fp16.pth'
+def copy_weights(src_path, dst_path):
+    state = torch.load(src_path, map_location=lambda storage, loc: storage)
+    state = {key: value.half() for key, value in state.items()}
+    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+    torch.save(state, dst_path)
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    for src_rel_path, dst_rel_path in WEIGHTS_MAPPING.items():
+        src_path = os.path.join(config['ARTIFACTS_PATH'], src_rel_path)
+        dst_path = os.path.join(config['MODELS_PATH'], dst_rel_path)
+        copy_weights(src_path, dst_path)
+    copy_weights(SRC_DETECTOR_WEIGHTS, os.path.join(config['MODELS_PATH'], DST_DETECTOR_WEIGHTS))
+if __name__ == '__main__':
+    main()

datasets.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import os
+import random
+import glob
+import cv2
+import numpy as np
+from torch.utils.data import Dataset
+class UnlabeledVideoDataset(Dataset):
+    def __init__(self, root_dir, content=None, transform=None):
+        self.root_dir = os.path.normpath(root_dir)
+        self.transform = transform
+        if content is not None:
+            self.content = content
+        else:
+            self.content = []
+            for path in glob.iglob(os.path.join(self.root_dir, '**', '*.mp4'), recursive=True):
+                rel_path = path[len(self.root_dir) + 1:]
+                self.content.append(rel_path)
+            self.content = sorted(self.content)
+    def __len__(self):
+        return len(self.content)
+    def __getitem__(self, idx):
+        rel_path = self.content[idx]
+        path = os.path.join(self.root_dir, rel_path)
+        capture = cv2.VideoCapture(path)
+        frames = []
+        if capture.isOpened():
+            while True:
+                ret, frame = capture.read()
+                if not ret:
+                    break
+                if self.transform is not None:
+                    frame = self.transform(frame)
+                frames.append(frame)
+        sample = {
+            'frames': frames,
+            'index': idx
+        }
+        return sample
+class FaceDataset(Dataset):
+    def __init__(self, root_dir, content, labels=None, transform=None):
+        self.root_dir = os.path.normpath(root_dir)
+        self.content = content
+        self.labels = labels
+        self.transform = transform
+    def __len__(self):
+        return len(self.content)
+    def __getitem__(self, idx):
+        rel_path = self.content[idx]
+        path = os.path.join(self.root_dir, rel_path)
+        face = cv2.imread(path, cv2.IMREAD_COLOR)
+        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
+        if self.transform is not None:
+            face = self.transform(image=face)['image']
+        sample = {
+            'face': face,
+            'index': idx
+        }
+        if self.labels is not None:
+            sample['label'] = self.labels[idx]
+        return sample
+class TrackPairDataset(Dataset):
+    FPS = 30
+    def __init__(self, tracks_root, pairs_path, indices, track_length, track_transform=None, image_transform=None,
+                 sequence_mode=True):
+        self.tracks_root = os.path.normpath(tracks_root)
+        self.track_transform = track_transform
+        self.image_transform = image_transform
+        self.indices = np.asarray(indices, dtype=np.int32)
+        self.track_length = track_length
+        self.sequence_mode = sequence_mode
+        self.pairs = []
+        with open(pairs_path, 'r') as f:
+            for line in f:
+                real_track, fake_track = line.strip().split(',')
+                self.pairs.append((real_track, fake_track))
+    def __len__(self):
+        return len(self.pairs)
+    def __getitem__(self, idx):
+        real_track_path, fake_track_path = self.pairs[idx]
+        real_track_path = os.path.join(self.tracks_root, real_track_path)
+        fake_track_path = os.path.join(self.tracks_root, fake_track_path)
+        if self.track_transform is not None:
+            img = self.load_img(real_track_path, 0)
+            src_height, src_width = img.shape[:2]
+            track_transform_params = self.track_transform.get_params(self.FPS, src_height, src_width)
+        else:
+            track_transform_params = None
+        real_track = self.load_track(real_track_path, self.indices, track_transform_params)
+        fake_track = self.load_track(fake_track_path, self.indices, track_transform_params)
+        if self.image_transform is not None:
+            prev_state = random.getstate()
+            transformed_real_track = []
+            for img in real_track:
+                if self.sequence_mode:
+                    random.setstate(prev_state)
+                transformed_real_track.append(self.image_transform(image=img)['image'])
+            real_track = transformed_real_track
+            random.setstate(prev_state)
+            transformed_fake_track = []
+            for img in fake_track:
+                if self.sequence_mode:
+                    random.setstate(prev_state)
+                transformed_fake_track.append(self.image_transform(image=img)['image'])
+            fake_track = transformed_fake_track
+        sample = {
+            'real': real_track,
+            'fake': fake_track
+        }
+        return sample
+    def load_img(self, track_path, idx):
+        img = cv2.imread(os.path.join(track_path, '{}.png'.format(idx)))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        return img
+    def load_track(self, track_path, indices, transform_params):
+        if transform_params is None:
+            track = np.stack([self.load_img(track_path, idx) for idx in indices])
+        else:
+            track = self.track_transform(track_path, self.FPS, *transform_params)
+            indices = (indices.astype(np.float32) / self.track_length) * len(track)
+            indices = np.round(indices).astype(np.int32).clip(0, len(track) - 1)
+            track = track[indices]
+        return track

detect_faces_on_videos.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import argparse
+import os
+import glob
+import yaml
+import pickle
+import tqdm
+import torch
+from torch.utils.data import DataLoader
+from dsfacedetector.face_ssd_infer import SSD
+from datasets import UnlabeledVideoDataset
+DETECTOR_WEIGHTS_PATH = 'external_data/WIDERFace_DSFD_RES152.pth'
+DETECTOR_THRESHOLD = 0.3
+DETECTOR_STEP = 6
+DETECTOR_TARGET_SIZE = (512, 512)
+BATCH_SIZE = 1
+NUM_WORKERS = 0
+DETECTIONS_ROOT = 'detections'
+DETECTIONS_FILE_NAME = 'detections.pkl'
+def main():
+    parser = argparse.ArgumentParser(description='Detects faces on videos')
+    parser.add_argument('--num_parts', type=int, default=1, help='Number of parts')
+    parser.add_argument('--part', type=int, default=0, help='Part index')
+    args = parser.parse_args()
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    content = []
+    for path in glob.iglob(os.path.join(config['DFDC_DATA_PATH'], 'dfdc_train_part_*', '*.mp4')):
+        parts = path.split('/')
+        content.append('/'.join(parts[-2:]))
+    content = sorted(content)
+    print('Total number of videos: {}'.format(len(content)))
+    part_size = len(content) // args.num_parts + 1
+    assert part_size * args.num_parts >= len(content)
+    part_start = part_size * args.part
+    part_end = min(part_start + part_size, len(content))
+    print('Part {} ({}, {})'.format(args.part, part_start, part_end))
+    dataset = UnlabeledVideoDataset(config['DFDC_DATA_PATH'], content[part_start:part_end])
+    detector = SSD('test')
+    state = torch.load(DETECTOR_WEIGHTS_PATH, map_location=lambda storage, loc: storage)
+    detector.load_state_dict(state)
+    device = torch.device('cuda')
+    detector = detector.eval().to(device)
+    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, collate_fn=lambda X: X,
+                        drop_last=False)
+    dst_root = os.path.join(config['ARTIFACTS_PATH'], DETECTIONS_ROOT)
+    os.makedirs(dst_root, exist_ok=True)
+    for video_sample in tqdm.tqdm(loader):
+        frames = video_sample[0]['frames']
+        video_idx = video_sample[0]['index']
+        video_rel_path = dataset.content[video_idx]
+        detections = []
+        for frame in frames[::DETECTOR_STEP]:
+            with torch.no_grad():
+                detections_per_frame = detector.detect_on_image(frame, DETECTOR_TARGET_SIZE, device, is_pad=False,
+                                                                keep_thresh=DETECTOR_THRESHOLD)
+                detections.append({'boxes': detections_per_frame[:, :4], 'scores': detections_per_frame[:, 4]})
+        os.makedirs(os.path.join(dst_root, video_rel_path), exist_ok=True)
+        with open(os.path.join(dst_root, video_rel_path, DETECTIONS_FILE_NAME), 'wb') as f:
+            pickle.dump(detections, f)
+if __name__ == '__main__':
+    main()

dsfacedetector/__init__.py ADDED Viewed

File without changes

dsfacedetector/data/__init__.py ADDED Viewed

File without changes

dsfacedetector/data/config.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+def test_base_transform(image, mean):
+    x = image.astype(np.float32)
+    x -= mean
+    x = x.astype(np.float32)
+    return x
+class TestBaseTransform:
+    def __init__(self, mean):
+        self.mean = np.array(mean, dtype=np.float32)
+    def __call__(self, image):
+        return test_base_transform(image, self.mean)
+widerface_640 = {
+    'num_classes': 2,
+    'feature_maps': [160, 80, 40, 20, 10, 5],
+    'min_dim': 640,
+    'steps': [4, 8, 16, 32, 64, 128],  # stride
+    'variance': [0.1, 0.2],
+    'clip': True,  # make default box in [0,1]
+    'name': 'WIDERFace',
+    'l2norm_scale': [10, 8, 5],
+    'base': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 512, 512, 512],
+    'extras': [256, 'S', 512, 128, 'S', 256],
+    'mbox': [1, 1, 1, 1, 1, 1],
+    'min_sizes': [16, 32, 64, 128, 256, 512],
+    'max_sizes': [],
+    'aspect_ratios': [[1.5], [1.5], [1.5], [1.5], [1.5], [1.5]],  # [1,2]  default 1
+    'backbone': 'resnet152',
+    'feature_pyramid_network': True,
+    'bottom_up_path': False,
+    'feature_enhance_module': True,
+    'max_in_out': True,
+    'focal_loss': False,
+    'progressive_anchor': True,
+    'refinedet': False,
+    'max_out': False,
+    'anchor_compensation': False,
+    'data_anchor_sampling': False,
+    'overlap_thresh': [0.4],
+    'negpos_ratio': 3,
+    # test
+    'nms_thresh': 0.3,
+    'conf_thresh': 0.01,
+    'num_thresh': 5000,
+}

dsfacedetector/face_ssd_infer.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# Source: https://github.com/vlad3996/FaceDetection-DSFD
+import torch
+import torchvision
+import torch.nn as nn
+from .data.config import TestBaseTransform, widerface_640 as cfg
+from .layers import Detect, get_prior_boxes, FEM, pa_multibox, mio_module, upsample_product
+from .utils import resize_image
+class SSD(nn.Module):
+    def __init__(self, phase, nms_thresh=0.3, nms_conf_thresh=0.01):
+        super(SSD, self).__init__()
+        self.phase = phase
+        self.num_classes = 2
+        self.cfg = cfg
+        resnet = torchvision.models.resnet152(pretrained=False)
+        self.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1)
+        self.layer2 = nn.Sequential(resnet.layer2)
+        self.layer3 = nn.Sequential(resnet.layer3)
+        self.layer4 = nn.Sequential(resnet.layer4)
+        self.layer5 = nn.Sequential(
+            *[nn.Conv2d(2048, 512, kernel_size=1),
+              nn.BatchNorm2d(512),
+              nn.ReLU(inplace=True),
+              nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=2),
+              nn.BatchNorm2d(512),
+              nn.ReLU(inplace=True)]
+        )
+        self.layer6 = nn.Sequential(
+            *[nn.Conv2d(512, 128, kernel_size=1, ),
+              nn.BatchNorm2d(128),
+              nn.ReLU(inplace=True),
+              nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2),
+              nn.BatchNorm2d(256),
+              nn.ReLU(inplace=True)]
+        )
+        output_channels = [256, 512, 1024, 2048, 512, 256]
+        # FPN
+        fpn_in = output_channels
+        self.latlayer3 = nn.Conv2d(fpn_in[3], fpn_in[2], kernel_size=1, stride=1, padding=0)
+        self.latlayer2 = nn.Conv2d(fpn_in[2], fpn_in[1], kernel_size=1, stride=1, padding=0)
+        self.latlayer1 = nn.Conv2d(fpn_in[1], fpn_in[0], kernel_size=1, stride=1, padding=0)
+        self.smooth3 = nn.Conv2d(fpn_in[2], fpn_in[2], kernel_size=1, stride=1, padding=0)
+        self.smooth2 = nn.Conv2d(fpn_in[1], fpn_in[1], kernel_size=1, stride=1, padding=0)
+        self.smooth1 = nn.Conv2d(fpn_in[0], fpn_in[0], kernel_size=1, stride=1, padding=0)
+        # FEM
+        cpm_in = output_channels
+        self.cpm3_3 = FEM(cpm_in[0])
+        self.cpm4_3 = FEM(cpm_in[1])
+        self.cpm5_3 = FEM(cpm_in[2])
+        self.cpm7 = FEM(cpm_in[3])
+        self.cpm6_2 = FEM(cpm_in[4])
+        self.cpm7_2 = FEM(cpm_in[5])
+        # head
+        head = pa_multibox(output_channels)
+        self.loc = nn.ModuleList(head[0])
+        self.conf = nn.ModuleList(head[1])
+        self.softmax = nn.Softmax(dim=-1)
+        if self.phase != 'onnx_export':
+            self.detect = Detect(self.num_classes, 0, cfg['num_thresh'], nms_conf_thresh, nms_thresh,
+                                 cfg['variance'])
+            self.last_image_size = None
+            self.last_feature_maps = None
+        if self.phase == 'test':
+            self.test_transform = TestBaseTransform((104, 117, 123))
+    def forward(self, x):
+        image_size = [x.shape[2], x.shape[3]]
+        loc = list()
+        conf = list()
+        conv3_3_x = self.layer1(x)
+        conv4_3_x = self.layer2(conv3_3_x)
+        conv5_3_x = self.layer3(conv4_3_x)
+        fc7_x = self.layer4(conv5_3_x)
+        conv6_2_x = self.layer5(fc7_x)
+        conv7_2_x = self.layer6(conv6_2_x)
+        lfpn3 = upsample_product(self.latlayer3(fc7_x), self.smooth3(conv5_3_x))
+        lfpn2 = upsample_product(self.latlayer2(lfpn3), self.smooth2(conv4_3_x))
+        lfpn1 = upsample_product(self.latlayer1(lfpn2), self.smooth1(conv3_3_x))
+        conv5_3_x = lfpn3
+        conv4_3_x = lfpn2
+        conv3_3_x = lfpn1
+        sources = [conv3_3_x, conv4_3_x, conv5_3_x, fc7_x, conv6_2_x, conv7_2_x]
+        sources[0] = self.cpm3_3(sources[0])
+        sources[1] = self.cpm4_3(sources[1])
+        sources[2] = self.cpm5_3(sources[2])
+        sources[3] = self.cpm7(sources[3])
+        sources[4] = self.cpm6_2(sources[4])
+        sources[5] = self.cpm7_2(sources[5])
+        # apply multibox head to source layers
+        featuremap_size = []
+        for (x, l, c) in zip(sources, self.loc, self.conf):
+            featuremap_size.append([x.shape[2], x.shape[3]])
+            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
+            len_conf = len(conf)
+            cls = mio_module(c(x), len_conf)
+            conf.append(cls.permute(0, 2, 3, 1).contiguous())
+        face_loc = torch.cat([o[:, :, :, :4].contiguous().view(o.size(0), -1) for o in loc], 1)
+        face_loc = face_loc.view(face_loc.size(0), -1, 4)
+        face_conf = torch.cat([o[:, :, :, :2].contiguous().view(o.size(0), -1) for o in conf], 1)
+        face_conf = self.softmax(face_conf.view(face_conf.size(0), -1, self.num_classes))
+        if self.phase != 'onnx_export':
+            if self.last_image_size is None or self.last_image_size != image_size or self.last_feature_maps != featuremap_size:
+                self.priors = get_prior_boxes(self.cfg, featuremap_size, image_size).to(face_loc.device)
+                self.last_image_size = image_size
+                self.last_feature_maps = featuremap_size
+            with torch.no_grad():
+                output = self.detect(face_loc, face_conf, self.priors)
+        else:
+            output = torch.cat((face_loc, face_conf), 2)
+        return output
+    def detect_on_image(self, source_image, target_size, device, is_pad=False, keep_thresh=0.3):
+        image, shift_h_scaled, shift_w_scaled, scale = resize_image(source_image, target_size, is_pad=is_pad)
+        x = torch.from_numpy(self.test_transform(image)).permute(2, 0, 1).to(device)
+        x.unsqueeze_(0)
+        detections = self.forward(x).cpu().numpy()
+        scores = detections[0, 1, :, 0]
+        keep_idxs = scores > keep_thresh  # find keeping indexes
+        detections = detections[0, 1, keep_idxs, :]  # select detections over threshold
+        detections = detections[:, [1, 2, 3, 4, 0]]  # reorder
+        detections[:, [0, 2]] -= shift_w_scaled  # 0 or pad percent from left corner
+        detections[:, [1, 3]] -= shift_h_scaled  # 0 or pad percent from top
+        detections[:, :4] *= scale
+        return detections

dsfacedetector/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .detection import Detect
+from .prior_box import PriorBox, get_prior_boxes
+from .modules import FEM, pa_multibox, mio_module, upsample_product

dsfacedetector/layers/detection.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from __future__ import division
+import torch
+import torch.nn as nn
+class Detect(nn.Module):
+    """At test time, Detect is the final layer of SSD.  Decode location preds,
+    apply non-maximum suppression to location predictions based on conf
+    scores and threshold to a top_k number of output predictions for both
+    confidence score and locations.
+    """
+    def __init__(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh, variance=(0.1, 0.2)):
+        super(Detect, self).__init__()
+        self.num_classes = num_classes
+        self.background_label = bkg_label
+        self.top_k = top_k
+        # Parameters used in nms.
+        self.nms_thresh = nms_thresh
+        if nms_thresh <= 0:
+            raise ValueError('nms_threshold must be non negative.')
+        self.conf_thresh = conf_thresh
+        self.variance = variance
+    def forward(self, loc_data, conf_data, prior_data):
+        """
+        Args:
+            loc_data: (tensor) Loc preds from loc layers
+                Shape: [batch,num_priors*4]
+            conf_data: (tensor) Shape: Conf preds from conf layers
+                Shape: [batch*num_priors,num_classes]
+            prior_data: (tensor) Prior boxes and variances from priorbox layers
+                Shape: [1,num_priors,4]
+        """
+        num = loc_data.size(0)  # batch size
+        num_priors = prior_data.size(0)
+        output = torch.zeros(num, self.num_classes, self.top_k, 5)
+        conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1)
+        # Decode predictions into bboxes.
+        for i in range(num):
+            default = prior_data
+            decoded_boxes = decode(loc_data[i], default, self.variance)
+            # For each class, perform nms
+            conf_scores = conf_preds[i].clone()
+            for cl in range(1, self.num_classes):
+                c_mask = conf_scores[cl].gt(self.conf_thresh)
+                scores = conf_scores[cl][c_mask]
+                if scores.dim() == 0 or scores.size(0) == 0:
+                    continue
+                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
+                boxes = decoded_boxes[l_mask].view(-1, 4)
+                # idx of highest scoring and non-overlapping boxes per class
+                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
+                output[i, cl, :count] = \
+                    torch.cat((scores[ids[:count]].unsqueeze(1),
+                               boxes[ids[:count]]), 1)
+        flt = output.contiguous().view(num, -1, 5)
+        _, idx = flt[:, :, 0].sort(1, descending=True)
+        _, rank = idx.sort(1)
+        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
+        return output
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+    """Decode locations from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        loc (tensor): location predictions for loc layers,
+            Shape: [num_priors,4]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded bounding box predictions
+    """
+    boxes = torch.cat((
+        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    # (cx,cy,w,h)->(x0,y0,x1,y1)
+    return boxes
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+    """Apply non-maximum suppression at test time to avoid detecting too many
+    overlapping bounding boxes for a given object.
+    Args:
+        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+        scores: (tensor) The class predscores for the img, Shape:[num_priors].
+        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+        top_k: (int) The Maximum number of box preds to consider.
+    Return:
+        The indices of the kept boxes with respect to num_priors.
+    """
+    keep = scores.new(scores.size(0)).zero_().long()
+    if boxes.numel() == 0:
+        return keep
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    area = torch.mul(x2 - x1, y2 - y1)
+    v, idx = scores.sort(0)  # sort in ascending order
+    # I = I[v >= 0.01]
+    idx = idx[-top_k:]  # indices of the top-k largest vals
+    xx1 = boxes.new()
+    yy1 = boxes.new()
+    xx2 = boxes.new()
+    yy2 = boxes.new()
+    w = boxes.new()
+    h = boxes.new()
+    # keep = torch.Tensor()
+    count = 0
+    while idx.numel() > 0:
+        i = idx[-1]  # index of current largest val
+        # keep.append(i)
+        keep[count] = i
+        count += 1
+        if idx.size(0) == 1:
+            break
+        idx = idx[:-1]  # remove kept element from view
+        # load bboxes of next highest vals
+        torch.index_select(x1, 0, idx, out=xx1)
+        torch.index_select(y1, 0, idx, out=yy1)
+        torch.index_select(x2, 0, idx, out=xx2)
+        torch.index_select(y2, 0, idx, out=yy2)
+        # store element-wise max with next highest score
+        xx1 = torch.clamp(xx1, min=x1[i])
+        yy1 = torch.clamp(yy1, min=y1[i])
+        xx2 = torch.clamp(xx2, max=x2[i])
+        yy2 = torch.clamp(yy2, max=y2[i])
+        w.resize_as_(xx2)
+        h.resize_as_(yy2)
+        w = xx2 - xx1
+        h = yy2 - yy1
+        # check sizes of xx1 and xx2.. after each iteration
+        w = torch.clamp(w, min=0.0)
+        h = torch.clamp(h, min=0.0)
+        inter = w * h
+        # IoU = i / (area(a) + area(b) - i)
+        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
+        union = (rem_areas - inter) + area[i]
+        IoU = inter / union  # store result in iou
+        # keep only elements with an IoU <= overlap
+        idx = idx[IoU.le(overlap)]
+    return keep, count

dsfacedetector/layers/modules.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DeepHeadModule(nn.Module):
+    def __init__(self, input_channels, output_channels):
+        super(DeepHeadModule, self).__init__()
+        self._input_channels = input_channels
+        self._output_channels = output_channels
+        self._mid_channels = min(self._input_channels, 256)
+        self.conv1 = nn.Conv2d(self._input_channels, self._mid_channels, kernel_size=3, dilation=1, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(self._mid_channels, self._mid_channels, kernel_size=3, dilation=1, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(self._mid_channels, self._mid_channels, kernel_size=3, dilation=1, stride=1, padding=1)
+        self.conv4 = nn.Conv2d(self._mid_channels, self._output_channels, kernel_size=1, dilation=1, stride=1,
+                               padding=0)
+    def forward(self, x):
+        return self.conv4(
+            F.relu(self.conv3(F.relu(self.conv2(F.relu(self.conv1(x), inplace=True)), inplace=True)), inplace=True))
+class FEM(nn.Module):
+    def __init__(self, channel_size):
+        super(FEM, self).__init__()
+        self.cs = channel_size
+        self.cpm1 = nn.Conv2d(self.cs, 256, kernel_size=3, dilation=1, stride=1, padding=1)
+        self.cpm2 = nn.Conv2d(self.cs, 256, kernel_size=3, dilation=2, stride=1, padding=2)
+        self.cpm3 = nn.Conv2d(256, 128, kernel_size=3, dilation=1, stride=1, padding=1)
+        self.cpm4 = nn.Conv2d(256, 128, kernel_size=3, dilation=2, stride=1, padding=2)
+        self.cpm5 = nn.Conv2d(128, 128, kernel_size=3, dilation=1, stride=1, padding=1)
+    def forward(self, x):
+        x1_1 = F.relu(self.cpm1(x), inplace=True)
+        x1_2 = F.relu(self.cpm2(x), inplace=True)
+        x2_1 = F.relu(self.cpm3(x1_2), inplace=True)
+        x2_2 = F.relu(self.cpm4(x1_2), inplace=True)
+        x3_1 = F.relu(self.cpm5(x2_2), inplace=True)
+        return torch.cat((x1_1, x2_1, x3_1), 1)
+def upsample_product(x, y):
+    '''Upsample and add two feature maps.
+       Args:
+         x: (Variable) top feature map to be upsampled.
+         y: (Variable) lateral feature map.
+       Returns:
+         (Variable) added feature map.
+       Note in PyTorch, when input size is odd, the upsampled feature map
+       with `F.upsample(..., scale_factor=2, mode='nearest')`
+       maybe not equal to the lateral feature map size.
+       e.g.
+       original input size: [N,_,15,15] ->
+       conv2d feature map size: [N,_,8,8] ->
+       upsampled feature map size: [N,_,16,16]
+       So we choose bilinear upsample which supports arbitrary output sizes.
+       '''
+    _, _, H, W = y.size()
+    # FOR ONNX CONVERSION
+    # return F.interpolate(x, scale_factor=2, mode='nearest') * y
+    return F.interpolate(x, size=(int(H), int(W)), mode='bilinear', align_corners=False) * y
+def pa_multibox(output_channels):
+    loc_layers = []
+    conf_layers = []
+    for k, v in enumerate(output_channels):
+        if k == 0:
+            loc_output = 4
+            conf_output = 2
+        elif k == 1:
+            loc_output = 8
+            conf_output = 4
+        else:
+            loc_output = 12
+            conf_output = 6
+        loc_layers += [DeepHeadModule(512, loc_output)]
+        conf_layers += [DeepHeadModule(512, (2 + conf_output))]
+    return (loc_layers, conf_layers)
+def mio_module(each_mmbox, len_conf, your_mind_state='peasant'):
+    # chunk = torch.split(each_mmbox, 1, 1) - !!!!! failed to export on PyTorch v1.0.1 (ONNX version 1.3)
+    chunk = torch.chunk(each_mmbox, int(each_mmbox.shape[1]), 1)
+    # some hacks for ONNX and Inference Engine export
+    if your_mind_state == 'peasant':
+        bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2])
+    elif your_mind_state == 'advanced':
+        bmax = torch.max(each_mmbox[:, :3], 1)[0].unsqueeze(0)
+    else: # supermind
+        bmax = torch.nn.functional.max_pool3d(each_mmbox[:, :3], kernel_size=(3, 1, 1))
+    cls = (torch.cat((bmax, chunk[3]), dim=1) if len_conf == 0 else torch.cat((chunk[3], bmax), dim=1))
+    cls = torch.cat((cls, *list(chunk[4:])), dim=1)
+    return cls

dsfacedetector/layers/prior_box.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from __future__ import division
+from math import sqrt as sqrt
+import torch
+class PriorBox(object):
+    """Compute priorbox coordinates in center-offset form for each source
+    feature map.
+    """
+    def __init__(self, cfg, min_size, max_size):
+        super(PriorBox, self).__init__()
+        self.image_size = cfg['min_dim']
+        self.feature_maps = cfg['feature_maps']
+        self.variance = cfg['variance'] or [0.1]
+        self.min_sizes = min_size
+        self.max_sizes = max_size
+        self.steps = cfg['steps']
+        self.aspect_ratios = cfg['aspect_ratios']
+        self.clip = cfg['clip']
+        for v in self.variance:
+            if v <= 0:
+                raise ValueError('Variances must be greater than 0')
+    def forward(self):
+        mean = []
+        if len(self.min_sizes) == 5:
+            self.feature_maps = self.feature_maps[1:]
+            self.steps = self.steps[1:]
+        if len(self.min_sizes) == 4:
+            self.feature_maps = self.feature_maps[2:]
+            self.steps = self.steps[2:]
+        for k, f in enumerate(self.feature_maps):
+            # for i, j in product(range(f), repeat=2):
+            for i in range(f[0]):
+                for j in range(f[1]):
+                    # f_k = self.image_size / self.steps[k]
+                    f_k_i = self.image_size[0] / self.steps[k]
+                    f_k_j = self.image_size[1] / self.steps[k]
+                    # unit center x,y
+                    cx = (j + 0.5) / f_k_j
+                    cy = (i + 0.5) / f_k_i
+                    # aspect_ratio: 1
+                    # rel size: min_size
+                    s_k_i = self.min_sizes[k] / self.image_size[1]
+                    s_k_j = self.min_sizes[k] / self.image_size[0]
+                    # swordli@tencent
+                    if len(self.aspect_ratios[0]) == 0:
+                        mean += [cx, cy, s_k_i, s_k_j]
+                    # aspect_ratio: 1
+                    # rel size: sqrt(s_k * s_(k+1))
+                    # s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
+                    if len(self.max_sizes) == len(self.min_sizes):
+                        s_k_prime_i = sqrt(s_k_i * (self.max_sizes[k] / self.image_size[1]))
+                        s_k_prime_j = sqrt(s_k_j * (self.max_sizes[k] / self.image_size[0]))
+                        mean += [cx, cy, s_k_prime_i, s_k_prime_j]
+                    # rest of aspect ratios
+                    for ar in self.aspect_ratios[k]:
+                        if len(self.max_sizes) == len(self.min_sizes):
+                            mean += [cx, cy, s_k_prime_i / sqrt(ar), s_k_prime_j * sqrt(ar)]
+                        mean += [cx, cy, s_k_i / sqrt(ar), s_k_j * sqrt(ar)]
+        # back to torch land
+        output = torch.Tensor(mean).view(-1, 4)
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output
+def get_prior_boxes(cfg, feature_maps, image_size):
+    # number of priors for feature map location (either 4 or 6)
+    variance = cfg['variance'] or [0.1]
+    min_sizes = cfg['min_sizes']
+    max_sizes = cfg['max_sizes']
+    steps = cfg['steps']
+    aspect_ratios = cfg['aspect_ratios']
+    clip = cfg['clip']
+    for v in variance:
+        if v <= 0:
+            raise ValueError('Variances must be greater than 0')
+    mean = []
+    if len(min_sizes) == 5:
+        feature_maps = feature_maps[1:]
+        steps = steps[1:]
+    if len(min_sizes) == 4:
+        feature_maps = feature_maps[2:]
+        steps = steps[2:]
+    for k, f in enumerate(feature_maps):
+        # for i, j in product(range(f), repeat=2):
+        for i in range(f[0]):
+            for j in range(f[1]):
+                # f_k = image_size / steps[k]
+                f_k_i = image_size[0] / steps[k]
+                f_k_j = image_size[1] / steps[k]
+                # unit center x,y
+                cx = (j + 0.5) / f_k_j
+                cy = (i + 0.5) / f_k_i
+                # aspect_ratio: 1
+                # rel size: min_size
+                s_k_i = min_sizes[k] / image_size[1]
+                s_k_j = min_sizes[k] / image_size[0]
+                # swordli@tencent
+                if len(aspect_ratios[0]) == 0:
+                    mean += [cx, cy, s_k_i, s_k_j]
+                # aspect_ratio: 1
+                # rel size: sqrt(s_k * s_(k+1))
+                # s_k_prime = sqrt(s_k * (max_sizes[k]/image_size))
+                if len(max_sizes) == len(min_sizes):
+                    s_k_prime_i = sqrt(s_k_i * (max_sizes[k] / image_size[1]))
+                    s_k_prime_j = sqrt(s_k_j * (max_sizes[k] / image_size[0]))
+                    mean += [cx, cy, s_k_prime_i, s_k_prime_j]
+                # rest of aspect ratios
+                for ar in aspect_ratios[k]:
+                    if len(max_sizes) == len(min_sizes):
+                        mean += [cx, cy, s_k_prime_i / sqrt(ar), s_k_prime_j * sqrt(ar)]
+                    mean += [cx, cy, s_k_i / sqrt(ar), s_k_j * sqrt(ar)]
+    # back to torch land
+    output = torch.Tensor(mean).view(-1, 4)
+    if clip:
+        output.clamp_(max=1, min=0)
+    return output

dsfacedetector/utils.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+def vis_detections(im, dets, thresh=0.5, show_text=True):
+    """Draw detected bounding boxes."""
+    class_name = 'face'
+    inds = np.where(dets[:, -1] >= thresh)[0] if dets is not None else []
+    if len(inds) == 0:
+        return
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=2.5)
+        )
+        if show_text:
+            ax.text(bbox[0], bbox[1] - 5,
+                    '{:s} {:.3f}'.format(class_name, score),
+                    bbox=dict(facecolor='blue', alpha=0.5),
+                    fontsize=10, color='white')
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                 fontsize=10)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig('out.png')
+    plt.show()
+def bbox_vote(det):
+    order = det[:, 4].ravel().argsort()[::-1]
+    det = det[order, :]
+    dets = None
+    while det.shape[0] > 0:
+        # IOU
+        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
+        xx1 = np.maximum(det[0, 0], det[:, 0])
+        yy1 = np.maximum(det[0, 1], det[:, 1])
+        xx2 = np.minimum(det[0, 2], det[:, 2])
+        yy2 = np.minimum(det[0, 3], det[:, 3])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        o = inter / (area[0] + area[:] - inter)
+        # get needed merge det and delete these det
+        merge_index = np.where(o >= 0.3)[0]
+        det_accu = det[merge_index, :]
+        det = np.delete(det, merge_index, 0)
+        if merge_index.shape[0] <= 1:
+            continue
+        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
+        max_score = np.max(det_accu[:, 4])
+        det_accu_sum = np.zeros((1, 5))
+        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
+        det_accu_sum[:, 4] = max_score
+        try:
+            dets = np.row_stack((dets, det_accu_sum))
+        except:
+            dets = det_accu_sum
+    if dets is not None:
+        dets = dets[0:750, :]
+    return dets
+def add_borders(curr_img, target_shape=(224, 224), fill_type=0):
+    curr_h, curr_w = curr_img.shape[0:2]
+    shift_h = max(target_shape[0] - curr_h, 0)
+    shift_w = max(target_shape[1] - curr_w, 0)
+    image = cv2.copyMakeBorder(curr_img, shift_h // 2, (shift_h + 1) // 2, shift_w // 2, (shift_w + 1) // 2, fill_type)
+    return image, shift_h, shift_w
+def resize_image(image, target_size, resize_factor=None, is_pad=True, interpolation=3):
+    curr_image_size = image.shape[0:2]
+    if resize_factor is None and is_pad:
+        resize_factor = min(target_size[0] / curr_image_size[0], target_size[1] / curr_image_size[1])
+    elif resize_factor is None and not is_pad:
+        resize_factor = np.sqrt((target_size[0] * target_size[1]) / (curr_image_size[0] * curr_image_size[1]))
+    image = cv2.resize(image, None, None, fx=resize_factor, fy=resize_factor, interpolation=interpolation)
+    if is_pad:
+        image, shift_h, shift_w = add_borders(image, target_size)
+    else:
+        shift_h = shift_w = 0
+    scale = np.array([image.shape[1]/resize_factor, image.shape[0]/resize_factor,
+                      image.shape[1]/resize_factor, image.shape[0]/resize_factor])
+    return image, shift_h/image.shape[0]/2, shift_w/image.shape[1]/2, scale

external_data/convert_tf_to_pt.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# Source: https://github.com/lukemelas/EfficientNet-PyTorch
+import numpy as np
+import tensorflow as tf
+import torch
+def load_param(checkpoint_file, conversion_table, model_name):
+    """
+    Load parameters according to conversion_table.
+    Args:
+        checkpoint_file (string): pretrained checkpoint model file in tensorflow
+        conversion_table (dict): { pytorch tensor in a model : checkpoint variable name }
+    """
+    for pyt_param, tf_param_name in conversion_table.items():
+        tf_param_name = str(model_name) + '/' +  tf_param_name
+        tf_param = tf.train.load_variable(checkpoint_file, tf_param_name)
+        if 'conv' in tf_param_name and 'kernel' in tf_param_name:
+            tf_param = np.transpose(tf_param, (3, 2, 0, 1))
+            if 'depthwise' in tf_param_name:
+                tf_param = np.transpose(tf_param, (1, 0, 2, 3))
+        elif tf_param_name.endswith('kernel'):  # for weight(kernel), we should do transpose
+            tf_param = np.transpose(tf_param)
+        assert pyt_param.size() == tf_param.shape, \
+            'Dim Mismatch: %s vs %s ; %s' % (tuple(pyt_param.size()), tf_param.shape, tf_param_name)
+        pyt_param.data = torch.from_numpy(tf_param)
+def load_efficientnet(model, checkpoint_file, model_name):
+    """
+    Load PyTorch EfficientNet from TensorFlow checkpoint file
+    """
+    # This will store the enire conversion table
+    conversion_table = {}
+    merge = lambda dict1, dict2: {**dict1, **dict2}
+    # All the weights not in the conv blocks
+    conversion_table_for_weights_outside_blocks = {
+        model._conv_stem.weight: 'stem/conv2d/kernel',  # [3, 3, 3, 32]),
+        model._bn0.bias: 'stem/tpu_batch_normalization/beta',  # [32]),
+        model._bn0.weight: 'stem/tpu_batch_normalization/gamma',  # [32]),
+        model._bn0.running_mean: 'stem/tpu_batch_normalization/moving_mean',  # [32]),
+        model._bn0.running_var: 'stem/tpu_batch_normalization/moving_variance',  # [32]),
+        model._conv_head.weight: 'head/conv2d/kernel',  # [1, 1, 320, 1280]),
+        model._bn1.bias: 'head/tpu_batch_normalization/beta',  # [1280]),
+        model._bn1.weight: 'head/tpu_batch_normalization/gamma',  # [1280]),
+        model._bn1.running_mean: 'head/tpu_batch_normalization/moving_mean',  # [32]),
+        model._bn1.running_var: 'head/tpu_batch_normalization/moving_variance',  # [32]),
+        model._fc.bias: 'head/dense/bias',  # [1000]),
+        model._fc.weight: 'head/dense/kernel',  # [1280, 1000]),
+    }
+    conversion_table = merge(conversion_table, conversion_table_for_weights_outside_blocks)
+    # The first conv block is special because it does not have _expand_conv
+    conversion_table_for_first_block = {
+        model._blocks[0]._project_conv.weight: 'blocks_0/conv2d/kernel',  # 1, 1, 32, 16]),
+        model._blocks[0]._depthwise_conv.weight: 'blocks_0/depthwise_conv2d/depthwise_kernel',  # [3, 3, 32, 1]),
+        model._blocks[0]._se_reduce.bias: 'blocks_0/se/conv2d/bias',  # , [8]),
+        model._blocks[0]._se_reduce.weight: 'blocks_0/se/conv2d/kernel',  # , [1, 1, 32, 8]),
+        model._blocks[0]._se_expand.bias: 'blocks_0/se/conv2d_1/bias',  # , [32]),
+        model._blocks[0]._se_expand.weight: 'blocks_0/se/conv2d_1/kernel',  # , [1, 1, 8, 32]),
+        model._blocks[0]._bn1.bias: 'blocks_0/tpu_batch_normalization/beta',  # [32]),
+        model._blocks[0]._bn1.weight: 'blocks_0/tpu_batch_normalization/gamma',  # [32]),
+        model._blocks[0]._bn1.running_mean: 'blocks_0/tpu_batch_normalization/moving_mean',
+        model._blocks[0]._bn1.running_var: 'blocks_0/tpu_batch_normalization/moving_variance',
+        model._blocks[0]._bn2.bias: 'blocks_0/tpu_batch_normalization_1/beta',  # [16]),
+        model._blocks[0]._bn2.weight: 'blocks_0/tpu_batch_normalization_1/gamma',  # [16]),
+        model._blocks[0]._bn2.running_mean: 'blocks_0/tpu_batch_normalization_1/moving_mean',
+        model._blocks[0]._bn2.running_var: 'blocks_0/tpu_batch_normalization_1/moving_variance',
+    }
+    conversion_table = merge(conversion_table, conversion_table_for_first_block)
+    # Conv blocks
+    for i in range(len(model._blocks)):
+        is_first_block = '_expand_conv.weight' not in [n for n, p in model._blocks[i].named_parameters()]
+        if is_first_block:
+            conversion_table_block = {
+                model._blocks[i]._project_conv.weight: 'blocks_' + str(i) + '/conv2d/kernel',  # 1, 1, 32, 16]),
+                model._blocks[i]._depthwise_conv.weight: 'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel',
+                # [3, 3, 32, 1]),
+                model._blocks[i]._se_reduce.bias: 'blocks_' + str(i) + '/se/conv2d/bias',  # , [8]),
+                model._blocks[i]._se_reduce.weight: 'blocks_' + str(i) + '/se/conv2d/kernel',  # , [1, 1, 32, 8]),
+                model._blocks[i]._se_expand.bias: 'blocks_' + str(i) + '/se/conv2d_1/bias',  # , [32]),
+                model._blocks[i]._se_expand.weight: 'blocks_' + str(i) + '/se/conv2d_1/kernel',  # , [1, 1, 8, 32]),
+                model._blocks[i]._bn1.bias: 'blocks_' + str(i) + '/tpu_batch_normalization/beta',  # [32]),
+                model._blocks[i]._bn1.weight: 'blocks_' + str(i) + '/tpu_batch_normalization/gamma',  # [32]),
+                model._blocks[i]._bn1.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean',
+                model._blocks[i]._bn1.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance',
+                model._blocks[i]._bn2.bias: 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta',  # [16]),
+                model._blocks[i]._bn2.weight: 'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma',  # [16]),
+                model._blocks[i]._bn2.running_mean: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean',
+                model._blocks[i]._bn2.running_var: 'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance',
+            }
+        else:
+            conversion_table_block = {
+                model._blocks[i]._expand_conv.weight:       'blocks_' + str(i) + '/conv2d/kernel',
+                model._blocks[i]._project_conv.weight:      'blocks_' + str(i) + '/conv2d_1/kernel',
+                model._blocks[i]._depthwise_conv.weight:    'blocks_' + str(i) + '/depthwise_conv2d/depthwise_kernel',
+                model._blocks[i]._se_reduce.bias:           'blocks_' + str(i) + '/se/conv2d/bias',
+                model._blocks[i]._se_reduce.weight:         'blocks_' + str(i) + '/se/conv2d/kernel',
+                model._blocks[i]._se_expand.bias:           'blocks_' + str(i) + '/se/conv2d_1/bias',
+                model._blocks[i]._se_expand.weight:         'blocks_' + str(i) + '/se/conv2d_1/kernel',
+                model._blocks[i]._bn0.bias:                 'blocks_' + str(i) + '/tpu_batch_normalization/beta',
+                model._blocks[i]._bn0.weight:               'blocks_' + str(i) + '/tpu_batch_normalization/gamma',
+                model._blocks[i]._bn0.running_mean:         'blocks_' + str(i) + '/tpu_batch_normalization/moving_mean',
+                model._blocks[i]._bn0.running_var:          'blocks_' + str(i) + '/tpu_batch_normalization/moving_variance',
+                model._blocks[i]._bn1.bias:                 'blocks_' + str(i) + '/tpu_batch_normalization_1/beta',
+                model._blocks[i]._bn1.weight:               'blocks_' + str(i) + '/tpu_batch_normalization_1/gamma',
+                model._blocks[i]._bn1.running_mean:         'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_mean',
+                model._blocks[i]._bn1.running_var:          'blocks_' + str(i) + '/tpu_batch_normalization_1/moving_variance',
+                model._blocks[i]._bn2.bias:                 'blocks_' + str(i) + '/tpu_batch_normalization_2/beta',
+                model._blocks[i]._bn2.weight:               'blocks_' + str(i) + '/tpu_batch_normalization_2/gamma',
+                model._blocks[i]._bn2.running_mean:         'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_mean',
+                model._blocks[i]._bn2.running_var:          'blocks_' + str(i) + '/tpu_batch_normalization_2/moving_variance',
+            }
+        conversion_table = merge(conversion_table, conversion_table_block)
+    # Load TensorFlow parameters into PyTorch model
+    load_param(checkpoint_file, conversion_table, model_name)
+    return conversion_table
+def load_and_save_temporary_tensorflow_model(model_name, model_ckpt, example_img= '../../example/img.jpg'):
+    """ Loads and saves a TensorFlow model. """
+    image_files = [example_img]
+    eval_ckpt_driver = eval_ckpt_main.EvalCkptDriver(model_name)
+    with tf.Graph().as_default(), tf.Session() as sess:
+        images, labels = eval_ckpt_driver.build_dataset(image_files, [0] * len(image_files), False)
+        probs = eval_ckpt_driver.build_model(images, is_training=False)
+        sess.run(tf.global_variables_initializer())
+        print(model_ckpt)
+        eval_ckpt_driver.restore_model(sess, model_ckpt)
+        tf.train.Saver().save(sess, 'tmp/model.ckpt')
+if __name__ == '__main__':
+    import sys
+    import argparse
+    sys.path.append('original_tf')
+    import eval_ckpt_main
+    from efficientnet_pytorch import EfficientNet
+    parser = argparse.ArgumentParser(
+        description='Convert TF model to PyTorch model and save for easier future loading')
+    parser.add_argument('--model_name', type=str, default='efficientnet-b0',
+                        help='efficientnet-b{N}, where N is an integer 0 <= N <= 8')
+    parser.add_argument('--tf_checkpoint', type=str, default='pretrained_tensorflow/efficientnet-b0/',
+                        help='checkpoint file path')
+    parser.add_argument('--output_file', type=str, default='pretrained_pytorch/efficientnet-b0.pth',
+                        help='output PyTorch model file name')
+    args = parser.parse_args()
+    # Build model
+    model = EfficientNet.from_name(args.model_name)
+    # Load and save temporary TensorFlow file due to TF nuances
+    print(args.tf_checkpoint)
+    load_and_save_temporary_tensorflow_model(args.model_name, args.tf_checkpoint)
+    # Load weights
+    load_efficientnet(model, 'tmp/model.ckpt', model_name=args.model_name)
+    print('Loaded TF checkpoint weights')
+    # Save PyTorch file
+    torch.save(model.state_dict(), args.output_file)
+    print('Saved model to', args.output_file)

external_data/original_tf/__init__.py ADDED Viewed

File without changes

external_data/original_tf/efficientnet_builder.py ADDED Viewed

	@@ -0,0 +1,329 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model Builder for EfficientNet."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import os
+import re
+from absl import logging
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+import efficientnet_model
+import utils
+MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255]
+STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255]
+def efficientnet_params(model_name):
+  """Get efficientnet params based on model name."""
+  params_dict = {
+      # (width_coefficient, depth_coefficient, resolution, dropout_rate)
+      'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+      'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+      'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+      'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+      'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+      'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+      'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+      'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+      'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+      'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+  }
+  return params_dict[model_name]
+class BlockDecoder(object):
+  """Block Decoder for readability."""
+  def _decode_block_string(self, block_string):
+    """Gets a block through a string notation of arguments."""
+    if six.PY2:
+      assert isinstance(block_string, (str, unicode))
+    else:
+      assert isinstance(block_string, str)
+    ops = block_string.split('_')
+    options = {}
+    for op in ops:
+      splits = re.split(r'(\d.*)', op)
+      if len(splits) >= 2:
+        key, value = splits[:2]
+        options[key] = value
+    if 's' not in options or len(options['s']) != 2:
+      raise ValueError('Strides options should be a pair of integers.')
+    return efficientnet_model.BlockArgs(
+        kernel_size=int(options['k']),
+        num_repeat=int(options['r']),
+        input_filters=int(options['i']),
+        output_filters=int(options['o']),
+        expand_ratio=int(options['e']),
+        id_skip=('noskip' not in block_string),
+        se_ratio=float(options['se']) if 'se' in options else None,
+        strides=[int(options['s'][0]),
+                 int(options['s'][1])],
+        conv_type=int(options['c']) if 'c' in options else 0,
+        fused_conv=int(options['f']) if 'f' in options else 0,
+        super_pixel=int(options['p']) if 'p' in options else 0,
+        condconv=('cc' in block_string))
+  def _encode_block_string(self, block):
+    """Encodes a block to a string."""
+    args = [
+        'r%d' % block.num_repeat,
+        'k%d' % block.kernel_size,
+        's%d%d' % (block.strides[0], block.strides[1]),
+        'e%s' % block.expand_ratio,
+        'i%d' % block.input_filters,
+        'o%d' % block.output_filters,
+        'c%d' % block.conv_type,
+        'f%d' % block.fused_conv,
+        'p%d' % block.super_pixel,
+    ]
+    if block.se_ratio > 0 and block.se_ratio <= 1:
+      args.append('se%s' % block.se_ratio)
+    if block.id_skip is False:  # pylint: disable=g-bool-id-comparison
+      args.append('noskip')
+    if block.condconv:
+      args.append('cc')
+    return '_'.join(args)
+  def decode(self, string_list):
+    """Decodes a list of string notations to specify blocks inside the network.
+    Args:
+      string_list: a list of strings, each string is a notation of block.
+    Returns:
+      A list of namedtuples to represent blocks arguments.
+    """
+    assert isinstance(string_list, list)
+    blocks_args = []
+    for block_string in string_list:
+      blocks_args.append(self._decode_block_string(block_string))
+    return blocks_args
+  def encode(self, blocks_args):
+    """Encodes a list of Blocks to a list of strings.
+    Args:
+      blocks_args: A list of namedtuples to represent blocks arguments.
+    Returns:
+      a list of strings, each string is a notation of block.
+    """
+    block_strings = []
+    for block in blocks_args:
+      block_strings.append(self._encode_block_string(block))
+    return block_strings
+def swish(features, use_native=True, use_hard=False):
+  """Computes the Swish activation function.
+  We provide three alternnatives:
+    - Native tf.nn.swish, use less memory during training than composable swish.
+    - Quantization friendly hard swish.
+    - A composable swish, equivalant to tf.nn.swish, but more general for
+      finetuning and TF-Hub.
+  Args:
+    features: A `Tensor` representing preactivation values.
+    use_native: Whether to use the native swish from tf.nn that uses a custom
+      gradient to reduce memory usage, or to use customized swish that uses
+      default TensorFlow gradient computation.
+    use_hard: Whether to use quantization-friendly hard swish.
+  Returns:
+    The activation value.
+  """
+  if use_native and use_hard:
+    raise ValueError('Cannot specify both use_native and use_hard.')
+  if use_native:
+    return tf.nn.swish(features)
+  if use_hard:
+    return features * tf.nn.relu6(features + np.float32(3)) * (1. / 6.)
+  features = tf.convert_to_tensor(features, name='features')
+  return features * tf.nn.sigmoid(features)
+_DEFAULT_BLOCKS_ARGS = [
+    'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
+    'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
+    'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
+    'r1_k3_s11_e6_i192_o320_se0.25',
+]
+def efficientnet(width_coefficient=None,
+                 depth_coefficient=None,
+                 dropout_rate=0.2,
+                 survival_prob=0.8):
+  """Creates a efficientnet model."""
+  global_params = efficientnet_model.GlobalParams(
+      blocks_args=_DEFAULT_BLOCKS_ARGS,
+      batch_norm_momentum=0.99,
+      batch_norm_epsilon=1e-3,
+      dropout_rate=dropout_rate,
+      survival_prob=survival_prob,
+      data_format='channels_last',
+      num_classes=1000,
+      width_coefficient=width_coefficient,
+      depth_coefficient=depth_coefficient,
+      depth_divisor=8,
+      min_depth=None,
+      relu_fn=tf.nn.swish,
+      # The default is TPU-specific batch norm.
+      # The alternative is tf.layers.BatchNormalization.
+      batch_norm=utils.TpuBatchNormalization,  # TPU-specific requirement.
+      use_se=True,
+      clip_projection_output=False)
+  return global_params
+def get_model_params(model_name, override_params):
+  """Get the block args and global params for a given model."""
+  if model_name.startswith('efficientnet'):
+    width_coefficient, depth_coefficient, _, dropout_rate = (
+        efficientnet_params(model_name))
+    global_params = efficientnet(
+        width_coefficient, depth_coefficient, dropout_rate)
+  else:
+    raise NotImplementedError('model name is not pre-defined: %s' % model_name)
+  if override_params:
+    # ValueError will be raised here if override_params has fields not included
+    # in global_params.
+    global_params = global_params._replace(**override_params)
+  decoder = BlockDecoder()
+  blocks_args = decoder.decode(global_params.blocks_args)
+  logging.info('global_params= %s', global_params)
+  return blocks_args, global_params
+def build_model(images,
+                model_name,
+                training,
+                override_params=None,
+                model_dir=None,
+                fine_tuning=False,
+                features_only=False,
+                pooled_features_only=False):
+  """A helper functiion to creates a model and returns predicted logits.
+  Args:
+    images: input images tensor.
+    model_name: string, the predefined model name.
+    training: boolean, whether the model is constructed for training.
+    override_params: A dictionary of params for overriding. Fields must exist in
+      efficientnet_model.GlobalParams.
+    model_dir: string, optional model dir for saving configs.
+    fine_tuning: boolean, whether the model is used for finetuning.
+    features_only: build the base feature network only (excluding final
+      1x1 conv layer, global pooling, dropout and fc head).
+    pooled_features_only: build the base network for features extraction (after
+      1x1 conv layer and global pooling, but before dropout and fc head).
+  Returns:
+    logits: the logits tensor of classes.
+    endpoints: the endpoints for each layer.
+  Raises:
+    When model_name specified an undefined model, raises NotImplementedError.
+    When override_params has invalid fields, raises ValueError.
+  """
+  assert isinstance(images, tf.Tensor)
+  assert not (features_only and pooled_features_only)
+  # For backward compatibility.
+  if override_params and override_params.get('drop_connect_rate', None):
+    override_params['survival_prob'] = 1 - override_params['drop_connect_rate']
+  if not training or fine_tuning:
+    if not override_params:
+      override_params = {}
+    override_params['batch_norm'] = utils.BatchNormalization
+    if fine_tuning:
+      override_params['relu_fn'] = functools.partial(swish, use_native=False)
+  blocks_args, global_params = get_model_params(model_name, override_params)
+  if model_dir:
+    param_file = os.path.join(model_dir, 'model_params.txt')
+    if not tf.gfile.Exists(param_file):
+      if not tf.gfile.Exists(model_dir):
+        tf.gfile.MakeDirs(model_dir)
+      with tf.gfile.GFile(param_file, 'w') as f:
+        logging.info('writing to %s', param_file)
+        f.write('model_name= %s\n\n' % model_name)
+        f.write('global_params= %s\n\n' % str(global_params))
+        f.write('blocks_args= %s\n\n' % str(blocks_args))
+  with tf.variable_scope(model_name):
+    model = efficientnet_model.Model(blocks_args, global_params)
+    outputs = model(
+        images,
+        training=training,
+        features_only=features_only,
+        pooled_features_only=pooled_features_only)
+  if features_only:
+    outputs = tf.identity(outputs, 'features')
+  elif pooled_features_only:
+    outputs = tf.identity(outputs, 'pooled_features')
+  else:
+    outputs = tf.identity(outputs, 'logits')
+  return outputs, model.endpoints
+def build_model_base(images, model_name, training, override_params=None):
+  """A helper functiion to create a base model and return global_pool.
+  Args:
+    images: input images tensor.
+    model_name: string, the predefined model name.
+    training: boolean, whether the model is constructed for training.
+    override_params: A dictionary of params for overriding. Fields must exist in
+      efficientnet_model.GlobalParams.
+  Returns:
+    features: global pool features.
+    endpoints: the endpoints for each layer.
+  Raises:
+    When model_name specified an undefined model, raises NotImplementedError.
+    When override_params has invalid fields, raises ValueError.
+  """
+  assert isinstance(images, tf.Tensor)
+  # For backward compatibility.
+  if override_params and override_params.get('drop_connect_rate', None):
+    override_params['survival_prob'] = 1 - override_params['drop_connect_rate']
+  blocks_args, global_params = get_model_params(model_name, override_params)
+  with tf.variable_scope(model_name):
+    model = efficientnet_model.Model(blocks_args, global_params)
+    features = model(images, training=training, features_only=True)
+  features = tf.identity(features, 'features')
+  return features, model.endpoints

external_data/original_tf/efficientnet_model.py ADDED Viewed

	@@ -0,0 +1,713 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains definitions for EfficientNet model.
+[1] Mingxing Tan, Quoc V. Le
+  EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks.
+  ICML'19, https://arxiv.org/abs/1905.11946
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import functools
+import math
+from absl import logging
+import numpy as np
+import six
+from six.moves import xrange
+import tensorflow.compat.v1 as tf
+import utils
+# from condconv import condconv_layers
+GlobalParams = collections.namedtuple('GlobalParams', [
+    'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'data_format',
+    'num_classes', 'width_coefficient', 'depth_coefficient', 'depth_divisor',
+    'min_depth', 'survival_prob', 'relu_fn', 'batch_norm', 'use_se',
+    'local_pooling', 'condconv_num_experts', 'clip_projection_output',
+    'blocks_args'
+])
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs = collections.namedtuple('BlockArgs', [
+    'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
+    'expand_ratio', 'id_skip', 'strides', 'se_ratio', 'conv_type', 'fused_conv',
+    'super_pixel', 'condconv'
+])
+# defaults will be a public argument for namedtuple in Python 3.7
+# https://docs.python.org/3/library/collections.html#collections.namedtuple
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+def conv_kernel_initializer(shape, dtype=None, partition_info=None):
+  """Initialization for convolutional kernels.
+  The main difference with tf.variance_scaling_initializer is that
+  tf.variance_scaling_initializer uses a truncated normal with an uncorrected
+  standard deviation, whereas here we use a normal distribution. Similarly,
+  tf.initializers.variance_scaling uses a truncated normal with
+  a corrected standard deviation.
+  Args:
+    shape: shape of variable
+    dtype: dtype of variable
+    partition_info: unused
+  Returns:
+    an initialization for the variable
+  """
+  del partition_info
+  kernel_height, kernel_width, _, out_filters = shape
+  fan_out = int(kernel_height * kernel_width * out_filters)
+  return tf.random_normal(
+      shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
+def dense_kernel_initializer(shape, dtype=None, partition_info=None):
+  """Initialization for dense kernels.
+  This initialization is equal to
+    tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out',
+                                    distribution='uniform').
+  It is written out explicitly here for clarity.
+  Args:
+    shape: shape of variable
+    dtype: dtype of variable
+    partition_info: unused
+  Returns:
+    an initialization for the variable
+  """
+  del partition_info
+  init_range = 1.0 / np.sqrt(shape[1])
+  return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
+def superpixel_kernel_initializer(shape, dtype='float32', partition_info=None):
+  """Initializes superpixel kernels.
+  This is inspired by space-to-depth transformation that is mathematically
+  equivalent before and after the transformation. But we do the space-to-depth
+  via a convolution. Moreover, we make the layer trainable instead of direct
+  transform, we can initialization it this way so that the model can learn not
+  to do anything but keep it mathematically equivalent, when improving
+  performance.
+  Args:
+    shape: shape of variable
+    dtype: dtype of variable
+    partition_info: unused
+  Returns:
+    an initialization for the variable
+  """
+  del partition_info
+  #  use input depth to make superpixel kernel.
+  depth = shape[-2]
+  filters = np.zeros([2, 2, depth, 4 * depth], dtype=dtype)
+  i = np.arange(2)
+  j = np.arange(2)
+  k = np.arange(depth)
+  mesh = np.array(np.meshgrid(i, j, k)).T.reshape(-1, 3).T
+  filters[
+      mesh[0],
+      mesh[1],
+      mesh[2],
+      4 * mesh[2] + 2 * mesh[0] + mesh[1]] = 1
+  return filters
+def round_filters(filters, global_params):
+  """Round number of filters based on depth multiplier."""
+  orig_f = filters
+  multiplier = global_params.width_coefficient
+  divisor = global_params.depth_divisor
+  min_depth = global_params.min_depth
+  if not multiplier:
+    return filters
+  filters *= multiplier
+  min_depth = min_depth or divisor
+  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_filters < 0.9 * filters:
+    new_filters += divisor
+  logging.info('round_filter input=%s output=%s', orig_f, new_filters)
+  return int(new_filters)
+def round_repeats(repeats, global_params):
+  """Round number of filters based on depth multiplier."""
+  multiplier = global_params.depth_coefficient
+  if not multiplier:
+    return repeats
+  return int(math.ceil(multiplier * repeats))
+class MBConvBlock(tf.keras.layers.Layer):
+  """A class of MBConv: Mobile Inverted Residual Bottleneck.
+  Attributes:
+    endpoints: dict. A list of internal tensors.
+  """
+  def __init__(self, block_args, global_params):
+    """Initializes a MBConv block.
+    Args:
+      block_args: BlockArgs, arguments to create a Block.
+      global_params: GlobalParams, a set of global parameters.
+    """
+    super(MBConvBlock, self).__init__()
+    self._block_args = block_args
+    self._batch_norm_momentum = global_params.batch_norm_momentum
+    self._batch_norm_epsilon = global_params.batch_norm_epsilon
+    self._batch_norm = global_params.batch_norm
+    self._condconv_num_experts = global_params.condconv_num_experts
+    self._data_format = global_params.data_format
+    if self._data_format == 'channels_first':
+      self._channel_axis = 1
+      self._spatial_dims = [2, 3]
+    else:
+      self._channel_axis = -1
+      self._spatial_dims = [1, 2]
+    self._relu_fn = global_params.relu_fn or tf.nn.swish
+    self._has_se = (
+        global_params.use_se and self._block_args.se_ratio is not None and
+        0 < self._block_args.se_ratio <= 1)
+    self._clip_projection_output = global_params.clip_projection_output
+    self.endpoints = None
+    self.conv_cls = tf.layers.Conv2D
+    self.depthwise_conv_cls = utils.DepthwiseConv2D
+    if self._block_args.condconv:
+      self.conv_cls = functools.partial(
+          condconv_layers.CondConv2D, num_experts=self._condconv_num_experts)
+      self.depthwise_conv_cls = functools.partial(
+          condconv_layers.DepthwiseCondConv2D,
+          num_experts=self._condconv_num_experts)
+    # Builds the block accordings to arguments.
+    self._build()
+  def block_args(self):
+    return self._block_args
+  def _build(self):
+    """Builds block according to the arguments."""
+    if self._block_args.super_pixel == 1:
+      self._superpixel = tf.layers.Conv2D(
+          self._block_args.input_filters,
+          kernel_size=[2, 2],
+          strides=[2, 2],
+          kernel_initializer=conv_kernel_initializer,
+          padding='same',
+          data_format=self._data_format,
+          use_bias=False)
+      self._bnsp = self._batch_norm(
+          axis=self._channel_axis,
+          momentum=self._batch_norm_momentum,
+          epsilon=self._batch_norm_epsilon)
+    if self._block_args.condconv:
+      # Add the example-dependent routing function
+      self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D(
+          data_format=self._data_format)
+      self._routing_fn = tf.layers.Dense(
+          self._condconv_num_experts, activation=tf.nn.sigmoid)
+    filters = self._block_args.input_filters * self._block_args.expand_ratio
+    kernel_size = self._block_args.kernel_size
+    # Fused expansion phase. Called if using fused convolutions.
+    self._fused_conv = self.conv_cls(
+        filters=filters,
+        kernel_size=[kernel_size, kernel_size],
+        strides=self._block_args.strides,
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        data_format=self._data_format,
+        use_bias=False)
+    # Expansion phase. Called if not using fused convolutions and expansion
+    # phase is necessary.
+    self._expand_conv = self.conv_cls(
+        filters=filters,
+        kernel_size=[1, 1],
+        strides=[1, 1],
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        data_format=self._data_format,
+        use_bias=False)
+    self._bn0 = self._batch_norm(
+        axis=self._channel_axis,
+        momentum=self._batch_norm_momentum,
+        epsilon=self._batch_norm_epsilon)
+    # Depth-wise convolution phase. Called if not using fused convolutions.
+    self._depthwise_conv = self.depthwise_conv_cls(
+        kernel_size=[kernel_size, kernel_size],
+        strides=self._block_args.strides,
+        depthwise_initializer=conv_kernel_initializer,
+        padding='same',
+        data_format=self._data_format,
+        use_bias=False)
+    self._bn1 = self._batch_norm(
+        axis=self._channel_axis,
+        momentum=self._batch_norm_momentum,
+        epsilon=self._batch_norm_epsilon)
+    if self._has_se:
+      num_reduced_filters = max(
+          1, int(self._block_args.input_filters * self._block_args.se_ratio))
+      # Squeeze and Excitation layer.
+      self._se_reduce = tf.layers.Conv2D(
+          num_reduced_filters,
+          kernel_size=[1, 1],
+          strides=[1, 1],
+          kernel_initializer=conv_kernel_initializer,
+          padding='same',
+          data_format=self._data_format,
+          use_bias=True)
+      self._se_expand = tf.layers.Conv2D(
+          filters,
+          kernel_size=[1, 1],
+          strides=[1, 1],
+          kernel_initializer=conv_kernel_initializer,
+          padding='same',
+          data_format=self._data_format,
+          use_bias=True)
+    # Output phase.
+    filters = self._block_args.output_filters
+    self._project_conv = self.conv_cls(
+        filters=filters,
+        kernel_size=[1, 1],
+        strides=[1, 1],
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        data_format=self._data_format,
+        use_bias=False)
+    self._bn2 = self._batch_norm(
+        axis=self._channel_axis,
+        momentum=self._batch_norm_momentum,
+        epsilon=self._batch_norm_epsilon)
+  def _call_se(self, input_tensor):
+    """Call Squeeze and Excitation layer.
+    Args:
+      input_tensor: Tensor, a single input tensor for Squeeze/Excitation layer.
+    Returns:
+      A output tensor, which should have the same shape as input.
+    """
+    se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True)
+    se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor)))
+    logging.info('Built Squeeze and Excitation with tensor shape: %s',
+                 (se_tensor.shape))
+    return tf.sigmoid(se_tensor) * input_tensor
+  def call(self, inputs, training=True, survival_prob=None):
+    """Implementation of call().
+    Args:
+      inputs: the inputs tensor.
+      training: boolean, whether the model is constructed for training.
+      survival_prob: float, between 0 to 1, drop connect rate.
+    Returns:
+      A output tensor.
+    """
+    logging.info('Block input: %s shape: %s', inputs.name, inputs.shape)
+    logging.info('Block input depth: %s output depth: %s',
+                 self._block_args.input_filters,
+                 self._block_args.output_filters)
+    x = inputs
+    fused_conv_fn = self._fused_conv
+    expand_conv_fn = self._expand_conv
+    depthwise_conv_fn = self._depthwise_conv
+    project_conv_fn = self._project_conv
+    if self._block_args.condconv:
+      pooled_inputs = self._avg_pooling(inputs)
+      routing_weights = self._routing_fn(pooled_inputs)
+      # Capture routing weights as additional input to CondConv layers
+      fused_conv_fn = functools.partial(
+          self._fused_conv, routing_weights=routing_weights)
+      expand_conv_fn = functools.partial(
+          self._expand_conv, routing_weights=routing_weights)
+      depthwise_conv_fn = functools.partial(
+          self._depthwise_conv, routing_weights=routing_weights)
+      project_conv_fn = functools.partial(
+          self._project_conv, routing_weights=routing_weights)
+    # creates conv 2x2 kernel
+    if self._block_args.super_pixel == 1:
+      with tf.variable_scope('super_pixel'):
+        x = self._relu_fn(
+            self._bnsp(self._superpixel(x), training=training))
+      logging.info(
+          'Block start with SuperPixel: %s shape: %s', x.name, x.shape)
+    if self._block_args.fused_conv:
+      # If use fused mbconv, skip expansion and use regular conv.
+      x = self._relu_fn(self._bn1(fused_conv_fn(x), training=training))
+      logging.info('Conv2D: %s shape: %s', x.name, x.shape)
+    else:
+      # Otherwise, first apply expansion and then apply depthwise conv.
+      if self._block_args.expand_ratio != 1:
+        x = self._relu_fn(self._bn0(expand_conv_fn(x), training=training))
+        logging.info('Expand: %s shape: %s', x.name, x.shape)
+      x = self._relu_fn(self._bn1(depthwise_conv_fn(x), training=training))
+      logging.info('DWConv: %s shape: %s', x.name, x.shape)
+    if self._has_se:
+      with tf.variable_scope('se'):
+        x = self._call_se(x)
+    self.endpoints = {'expansion_output': x}
+    x = self._bn2(project_conv_fn(x), training=training)
+    # Add identity so that quantization-aware training can insert quantization
+    # ops correctly.
+    x = tf.identity(x)
+    if self._clip_projection_output:
+      x = tf.clip_by_value(x, -6, 6)
+    if self._block_args.id_skip:
+      if all(
+          s == 1 for s in self._block_args.strides
+      ) and self._block_args.input_filters == self._block_args.output_filters:
+        # Apply only if skip connection presents.
+        if survival_prob:
+          x = utils.drop_connect(x, training, survival_prob)
+        x = tf.add(x, inputs)
+    logging.info('Project: %s shape: %s', x.name, x.shape)
+    return x
+class MBConvBlockWithoutDepthwise(MBConvBlock):
+  """MBConv-like block without depthwise convolution and squeeze-and-excite."""
+  def _build(self):
+    """Builds block according to the arguments."""
+    filters = self._block_args.input_filters * self._block_args.expand_ratio
+    if self._block_args.expand_ratio != 1:
+      # Expansion phase:
+      self._expand_conv = tf.layers.Conv2D(
+          filters,
+          kernel_size=[3, 3],
+          strides=[1, 1],
+          kernel_initializer=conv_kernel_initializer,
+          padding='same',
+          use_bias=False)
+      self._bn0 = self._batch_norm(
+          axis=self._channel_axis,
+          momentum=self._batch_norm_momentum,
+          epsilon=self._batch_norm_epsilon)
+    # Output phase:
+    filters = self._block_args.output_filters
+    self._project_conv = tf.layers.Conv2D(
+        filters,
+        kernel_size=[1, 1],
+        strides=self._block_args.strides,
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        use_bias=False)
+    self._bn1 = self._batch_norm(
+        axis=self._channel_axis,
+        momentum=self._batch_norm_momentum,
+        epsilon=self._batch_norm_epsilon)
+  def call(self, inputs, training=True, survival_prob=None):
+    """Implementation of call().
+    Args:
+      inputs: the inputs tensor.
+      training: boolean, whether the model is constructed for training.
+      survival_prob: float, between 0 to 1, drop connect rate.
+    Returns:
+      A output tensor.
+    """
+    logging.info('Block input: %s shape: %s', inputs.name, inputs.shape)
+    if self._block_args.expand_ratio != 1:
+      x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=training))
+    else:
+      x = inputs
+    logging.info('Expand: %s shape: %s', x.name, x.shape)
+    self.endpoints = {'expansion_output': x}
+    x = self._bn1(self._project_conv(x), training=training)
+    # Add identity so that quantization-aware training can insert quantization
+    # ops correctly.
+    x = tf.identity(x)
+    if self._clip_projection_output:
+      x = tf.clip_by_value(x, -6, 6)
+    if self._block_args.id_skip:
+      if all(
+          s == 1 for s in self._block_args.strides
+      ) and self._block_args.input_filters == self._block_args.output_filters:
+        # Apply only if skip connection presents.
+        if survival_prob:
+          x = utils.drop_connect(x, training, survival_prob)
+        x = tf.add(x, inputs)
+    logging.info('Project: %s shape: %s', x.name, x.shape)
+    return x
+class Model(tf.keras.Model):
+  """A class implements tf.keras.Model for MNAS-like model.
+    Reference: https://arxiv.org/abs/1807.11626
+  """
+  def __init__(self, blocks_args=None, global_params=None):
+    """Initializes an `Model` instance.
+    Args:
+      blocks_args: A list of BlockArgs to construct block modules.
+      global_params: GlobalParams, a set of global parameters.
+    Raises:
+      ValueError: when blocks_args is not specified as a list.
+    """
+    super(Model, self).__init__()
+    if not isinstance(blocks_args, list):
+      raise ValueError('blocks_args should be a list.')
+    self._global_params = global_params
+    self._blocks_args = blocks_args
+    self._relu_fn = global_params.relu_fn or tf.nn.swish
+    self._batch_norm = global_params.batch_norm
+    self.endpoints = None
+    self._build()
+  def _get_conv_block(self, conv_type):
+    conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise}
+    return conv_block_map[conv_type]
+  def _build(self):
+    """Builds a model."""
+    self._blocks = []
+    batch_norm_momentum = self._global_params.batch_norm_momentum
+    batch_norm_epsilon = self._global_params.batch_norm_epsilon
+    if self._global_params.data_format == 'channels_first':
+      channel_axis = 1
+      self._spatial_dims = [2, 3]
+    else:
+      channel_axis = -1
+      self._spatial_dims = [1, 2]
+    # Stem part.
+    self._conv_stem = tf.layers.Conv2D(
+        filters=round_filters(32, self._global_params),
+        kernel_size=[3, 3],
+        strides=[2, 2],
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        data_format=self._global_params.data_format,
+        use_bias=False)
+    self._bn0 = self._batch_norm(
+        axis=channel_axis,
+        momentum=batch_norm_momentum,
+        epsilon=batch_norm_epsilon)
+    # Builds blocks.
+    for block_args in self._blocks_args:
+      assert block_args.num_repeat > 0
+      assert block_args.super_pixel in [0, 1, 2]
+      # Update block input and output filters based on depth multiplier.
+      input_filters = round_filters(block_args.input_filters,
+                                    self._global_params)
+      output_filters = round_filters(block_args.output_filters,
+                                     self._global_params)
+      kernel_size = block_args.kernel_size
+      block_args = block_args._replace(
+          input_filters=input_filters,
+          output_filters=output_filters,
+          num_repeat=round_repeats(block_args.num_repeat, self._global_params))
+      # The first block needs to take care of stride and filter size increase.
+      conv_block = self._get_conv_block(block_args.conv_type)
+      if not block_args.super_pixel:  #  no super_pixel at all
+        self._blocks.append(conv_block(block_args, self._global_params))
+      else:
+        # if superpixel, adjust filters, kernels, and strides.
+        depth_factor = int(4 / block_args.strides[0] / block_args.strides[1])
+        block_args = block_args._replace(
+            input_filters=block_args.input_filters * depth_factor,
+            output_filters=block_args.output_filters * depth_factor,
+            kernel_size=((block_args.kernel_size + 1) // 2 if depth_factor > 1
+                         else block_args.kernel_size))
+        # if the first block has stride-2 and super_pixel trandformation
+        if (block_args.strides[0] == 2 and block_args.strides[1] == 2):
+          block_args = block_args._replace(strides=[1, 1])
+          self._blocks.append(conv_block(block_args, self._global_params))
+          block_args = block_args._replace(  # sp stops at stride-2
+              super_pixel=0,
+              input_filters=input_filters,
+              output_filters=output_filters,
+              kernel_size=kernel_size)
+        elif block_args.super_pixel == 1:
+          self._blocks.append(conv_block(block_args, self._global_params))
+          block_args = block_args._replace(super_pixel=2)
+        else:
+          self._blocks.append(conv_block(block_args, self._global_params))
+      if block_args.num_repeat > 1:  # rest of blocks with the same block_arg
+        # pylint: disable=protected-access
+        block_args = block_args._replace(
+            input_filters=block_args.output_filters, strides=[1, 1])
+        # pylint: enable=protected-access
+      for _ in xrange(block_args.num_repeat - 1):
+        self._blocks.append(conv_block(block_args, self._global_params))
+    # Head part.
+    self._conv_head = tf.layers.Conv2D(
+        filters=round_filters(1280, self._global_params),
+        kernel_size=[1, 1],
+        strides=[1, 1],
+        kernel_initializer=conv_kernel_initializer,
+        padding='same',
+        use_bias=False)
+    self._bn1 = self._batch_norm(
+        axis=channel_axis,
+        momentum=batch_norm_momentum,
+        epsilon=batch_norm_epsilon)
+    self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D(
+        data_format=self._global_params.data_format)
+    if self._global_params.num_classes:
+      self._fc = tf.layers.Dense(
+          self._global_params.num_classes,
+          kernel_initializer=dense_kernel_initializer)
+    else:
+      self._fc = None
+    if self._global_params.dropout_rate > 0:
+      self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate)
+    else:
+      self._dropout = None
+  def call(self,
+           inputs,
+           training=True,
+           features_only=None,
+           pooled_features_only=False):
+    """Implementation of call().
+    Args:
+      inputs: input tensors.
+      training: boolean, whether the model is constructed for training.
+      features_only: build the base feature network only.
+      pooled_features_only: build the base network for features extraction
+        (after 1x1 conv layer and global pooling, but before dropout and fc
+        head).
+    Returns:
+      output tensors.
+    """
+    outputs = None
+    self.endpoints = {}
+    reduction_idx = 0
+    # Calls Stem layers
+    with tf.variable_scope('stem'):
+      outputs = self._relu_fn(
+          self._bn0(self._conv_stem(inputs), training=training))
+    logging.info('Built stem layers with output shape: %s', outputs.shape)
+    self.endpoints['stem'] = outputs
+    # Calls blocks.
+    for idx, block in enumerate(self._blocks):
+      is_reduction = False  # reduction flag for blocks after the stem layer
+      # If the first block has super-pixel (space-to-depth) layer, then stem is
+      # the first reduction point.
+      if (block.block_args().super_pixel == 1 and idx == 0):
+        reduction_idx += 1
+        self.endpoints['reduction_%s' % reduction_idx] = outputs
+      elif ((idx == len(self._blocks) - 1) or
+            self._blocks[idx + 1].block_args().strides[0] > 1):
+        is_reduction = True
+        reduction_idx += 1
+      with tf.variable_scope('blocks_%s' % idx):
+        survival_prob = self._global_params.survival_prob
+        if survival_prob:
+          drop_rate = 1.0 - survival_prob
+          survival_prob = 1.0 - drop_rate * float(idx) / len(self._blocks)
+          logging.info('block_%s survival_prob: %s', idx, survival_prob)
+        outputs = block.call(
+            outputs, training=training, survival_prob=survival_prob)
+        self.endpoints['block_%s' % idx] = outputs
+        if is_reduction:
+          self.endpoints['reduction_%s' % reduction_idx] = outputs
+        if block.endpoints:
+          for k, v in six.iteritems(block.endpoints):
+            self.endpoints['block_%s/%s' % (idx, k)] = v
+            if is_reduction:
+              self.endpoints['reduction_%s/%s' % (reduction_idx, k)] = v
+    self.endpoints['features'] = outputs
+    if not features_only:
+      # Calls final layers and returns logits.
+      with tf.variable_scope('head'):
+        outputs = self._relu_fn(
+            self._bn1(self._conv_head(outputs), training=training))
+        self.endpoints['head_1x1'] = outputs
+        if self._global_params.local_pooling:
+          shape = outputs.get_shape().as_list()
+          kernel_size = [
+              1, shape[self._spatial_dims[0]], shape[self._spatial_dims[1]], 1]
+          outputs = tf.nn.avg_pool(
+              outputs, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
+          self.endpoints['pooled_features'] = outputs
+          if not pooled_features_only:
+            if self._dropout:
+              outputs = self._dropout(outputs, training=training)
+            self.endpoints['global_pool'] = outputs
+            if self._fc:
+              outputs = tf.squeeze(outputs, self._spatial_dims)
+              outputs = self._fc(outputs)
+            self.endpoints['head'] = outputs
+        else:
+          outputs = self._avg_pooling(outputs)
+          self.endpoints['pooled_features'] = outputs
+          if not pooled_features_only:
+            if self._dropout:
+              outputs = self._dropout(outputs, training=training)
+            self.endpoints['global_pool'] = outputs
+            if self._fc:
+              outputs = self._fc(outputs)
+            self.endpoints['head'] = outputs
+    return outputs

external_data/original_tf/eval_ckpt_main.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Eval checkpoint driver.
+This is an example evaluation script for users to understand the EfficientNet
+model checkpoints on CPU. To serve EfficientNet, please consider to export a
+`SavedModel` from checkpoints and use tf-serving to serve.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import json
+import sys
+from absl import app
+from absl import flags
+import numpy as np
+import tensorflow as tf
+import efficientnet_builder
+import preprocessing
+flags.DEFINE_string('model_name', 'efficientnet-b0', 'Model name to eval.')
+flags.DEFINE_string('runmode', 'examples', 'Running mode: examples or imagenet')
+flags.DEFINE_string('imagenet_eval_glob', None,
+                    'Imagenet eval image glob, '
+                    'such as /imagenet/ILSVRC2012*.JPEG')
+flags.DEFINE_string('imagenet_eval_label', None,
+                    'Imagenet eval label file path, '
+                    'such as /imagenet/ILSVRC2012_validation_ground_truth.txt')
+flags.DEFINE_string('ckpt_dir', '/tmp/ckpt/', 'Checkpoint folders')
+flags.DEFINE_string('example_img', '/tmp/panda.jpg',
+                    'Filepath for a single example image.')
+flags.DEFINE_string('labels_map_file', '/tmp/labels_map.txt',
+                    'Labels map from label id to its meaning.')
+flags.DEFINE_integer('num_images', 5000,
+                     'Number of images to eval. Use -1 to eval all images.')
+FLAGS = flags.FLAGS
+MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255]
+STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255]
+class EvalCkptDriver(object):
+  """A driver for running eval inference.
+  Attributes:
+    model_name: str. Model name to eval.
+    batch_size: int. Eval batch size.
+    num_classes: int. Number of classes, default to 1000 for ImageNet.
+    image_size: int. Input image size, determined by model name.
+  """
+  def __init__(self, model_name='efficientnet-b0', batch_size=1):
+    """Initialize internal variables."""
+    self.model_name = model_name
+    self.batch_size = batch_size
+    self.num_classes = 1000
+    # Model Scaling parameters
+    _, _, self.image_size, _ = efficientnet_builder.efficientnet_params(
+        model_name)
+  def restore_model(self, sess, ckpt_dir):
+    """Restore variables from checkpoint dir."""
+    checkpoint = tf.train.latest_checkpoint(ckpt_dir)
+    ema = tf.train.ExponentialMovingAverage(decay=0.9999)
+    ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars')
+    for v in tf.global_variables():
+      if 'moving_mean' in v.name or 'moving_variance' in v.name:
+        ema_vars.append(v)
+    ema_vars = list(set(ema_vars))
+    var_dict = ema.variables_to_restore(ema_vars)
+    saver = tf.train.Saver(var_dict, max_to_keep=1)
+    saver.restore(sess, checkpoint)
+  def build_model(self, features, is_training):
+    """Build model with input features."""
+    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
+    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)
+    logits, _ = efficientnet_builder.build_model(
+        features, self.model_name, is_training)
+    probs = tf.nn.softmax(logits)
+    probs = tf.squeeze(probs)
+    return probs
+  def build_dataset(self, filenames, labels, is_training):
+    """Build input dataset."""
+    filenames = tf.constant(filenames)
+    labels = tf.constant(labels)
+    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
+    def _parse_function(filename, label):
+      image_string = tf.read_file(filename)
+      image_decoded = preprocessing.preprocess_image(
+          image_string, is_training, self.image_size)
+      image = tf.cast(image_decoded, tf.float32)
+      return image, label
+    dataset = dataset.map(_parse_function)
+    dataset = dataset.batch(self.batch_size)
+    iterator = dataset.make_one_shot_iterator()
+    images, labels = iterator.get_next()
+    return images, labels
+  def run_inference(self, ckpt_dir, image_files, labels):
+    """Build and run inference on the target images and labels."""
+    with tf.Graph().as_default(), tf.Session() as sess:
+      images, labels = self.build_dataset(image_files, labels, False)
+      probs = self.build_model(images, is_training=False)
+      sess.run(tf.global_variables_initializer())
+      self.restore_model(sess, ckpt_dir)
+      prediction_idx = []
+      prediction_prob = []
+      for _ in range(len(image_files) // self.batch_size):
+        out_probs = sess.run(probs)
+        idx = np.argsort(out_probs)[::-1]
+        prediction_idx.append(idx[:5])
+        prediction_prob.append([out_probs[pid] for pid in idx[:5]])
+      # Return the top 5 predictions (idx and prob) for each image.
+      return prediction_idx, prediction_prob
+def eval_example_images(model_name, ckpt_dir, image_files, labels_map_file):
+  """Eval a list of example images.
+  Args:
+    model_name: str. The name of model to eval.
+    ckpt_dir: str. Checkpoint directory path.
+    image_files: List[str]. A list of image file paths.
+    labels_map_file: str. The labels map file path.
+  Returns:
+    A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction
+    index and pred_prob is the top 5 prediction probability.
+  """
+  eval_ckpt_driver = EvalCkptDriver(model_name)
+  classes = json.loads(tf.gfile.Open(labels_map_file).read())
+  pred_idx, pred_prob = eval_ckpt_driver.run_inference(
+      ckpt_dir, image_files, [0] * len(image_files))
+  for i in range(len(image_files)):
+    print('predicted class for image {}: '.format(image_files[i]))
+    for j, idx in enumerate(pred_idx[i]):
+      print('  -> top_{} ({:4.2f}%): {}  '.format(
+          j, pred_prob[i][j] * 100, classes[str(idx)]))
+  return pred_idx, pred_prob
+def eval_imagenet(model_name,
+                  ckpt_dir,
+                  imagenet_eval_glob,
+                  imagenet_eval_label,
+                  num_images):
+  """Eval ImageNet images and report top1/top5 accuracy.
+  Args:
+    model_name: str. The name of model to eval.
+    ckpt_dir: str. Checkpoint directory path.
+    imagenet_eval_glob: str. File path glob for all eval images.
+    imagenet_eval_label: str. File path for eval label.
+    num_images: int. Number of images to eval: -1 means eval the whole dataset.
+  Returns:
+    A tuple (top1, top5) for top1 and top5 accuracy.
+  """
+  eval_ckpt_driver = EvalCkptDriver(model_name)
+  imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)]
+  imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob))
+  if num_images < 0:
+    num_images = len(imagenet_filenames)
+  image_files = imagenet_filenames[:num_images]
+  labels = imagenet_val_labels[:num_images]
+  pred_idx, _ = eval_ckpt_driver.run_inference(ckpt_dir, image_files, labels)
+  top1_cnt, top5_cnt = 0.0, 0.0
+  for i, label in enumerate(labels):
+    top1_cnt += label in pred_idx[i][:1]
+    top5_cnt += label in pred_idx[i][:5]
+    if i % 100 == 0:
+      print('Step {}: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(
+          i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1)))
+      sys.stdout.flush()
+  top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images
+  print('Final: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(top1, top5))
+  return top1, top5
+def main(unused_argv):
+  tf.logging.set_verbosity(tf.logging.ERROR)
+  if FLAGS.runmode == 'examples':
+    # Run inference for an example image.
+    eval_example_images(FLAGS.model_name, FLAGS.ckpt_dir, [FLAGS.example_img],
+                        FLAGS.labels_map_file)
+  elif FLAGS.runmode == 'imagenet':
+    # Run inference for imagenet.
+    eval_imagenet(FLAGS.model_name, FLAGS.ckpt_dir, FLAGS.imagenet_eval_glob,
+                  FLAGS.imagenet_eval_label, FLAGS.num_images)
+  else:
+    print('must specify runmode: examples or imagenet')
+if __name__ == '__main__':
+  app.run(main)

external_data/original_tf/preprocessing.py ADDED Viewed

	@@ -0,0 +1,241 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""ImageNet preprocessing."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from absl import logging
+import tensorflow.compat.v1 as tf
+IMAGE_SIZE = 224
+CROP_PADDING = 32
+def distorted_bounding_box_crop(image_bytes,
+                                bbox,
+                                min_object_covered=0.1,
+                                aspect_ratio_range=(0.75, 1.33),
+                                area_range=(0.05, 1.0),
+                                max_attempts=100,
+                                scope=None):
+  """Generates cropped_image using one of the bboxes randomly distorted.
+  See `tf.image.sample_distorted_bounding_box` for more documentation.
+  Args:
+    image_bytes: `Tensor` of binary image data.
+    bbox: `Tensor` of bounding boxes arranged `[1, num_boxes, coords]`
+        where each coordinate is [0, 1) and the coordinates are arranged
+        as `[ymin, xmin, ymax, xmax]`. If num_boxes is 0 then use the whole
+        image.
+    min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
+        area of the image must contain at least this fraction of any bounding
+        box supplied.
+    aspect_ratio_range: An optional list of `float`s. The cropped area of the
+        image must have an aspect ratio = width / height within this range.
+    area_range: An optional list of `float`s. The cropped area of the image
+        must contain a fraction of the supplied image within in this range.
+    max_attempts: An optional `int`. Number of attempts at generating a cropped
+        region of the image of the specified constraints. After `max_attempts`
+        failures, return the entire image.
+    scope: Optional `str` for name scope.
+  Returns:
+    cropped image `Tensor`
+  """
+  with tf.name_scope(scope, 'distorted_bounding_box_crop', [image_bytes, bbox]):
+    shape = tf.image.extract_jpeg_shape(image_bytes)
+    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+        shape,
+        bounding_boxes=bbox,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=max_attempts,
+        use_image_if_no_bounding_boxes=True)
+    bbox_begin, bbox_size, _ = sample_distorted_bounding_box
+    # Crop the image to the specified bounding box.
+    offset_y, offset_x, _ = tf.unstack(bbox_begin)
+    target_height, target_width, _ = tf.unstack(bbox_size)
+    crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
+    image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3)
+    return image
+def _at_least_x_are_equal(a, b, x):
+  """At least `x` of `a` and `b` `Tensors` are equal."""
+  match = tf.equal(a, b)
+  match = tf.cast(match, tf.int32)
+  return tf.greater_equal(tf.reduce_sum(match), x)
+def _decode_and_random_crop(image_bytes, image_size):
+  """Make a random crop of image_size."""
+  bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
+  image = distorted_bounding_box_crop(
+      image_bytes,
+      bbox,
+      min_object_covered=0.1,
+      aspect_ratio_range=(3. / 4, 4. / 3.),
+      area_range=(0.08, 1.0),
+      max_attempts=10,
+      scope=None)
+  original_shape = tf.image.extract_jpeg_shape(image_bytes)
+  bad = _at_least_x_are_equal(original_shape, tf.shape(image), 3)
+  image = tf.cond(
+      bad,
+      lambda: _decode_and_center_crop(image_bytes, image_size),
+      lambda: tf.image.resize_bicubic([image],  # pylint: disable=g-long-lambda
+                                      [image_size, image_size])[0])
+  return image
+def _decode_and_center_crop(image_bytes, image_size):
+  """Crops to center of image with padding then scales image_size."""
+  shape = tf.image.extract_jpeg_shape(image_bytes)
+  image_height = shape[0]
+  image_width = shape[1]
+  padded_center_crop_size = tf.cast(
+      ((image_size / (image_size + CROP_PADDING)) *
+       tf.cast(tf.minimum(image_height, image_width), tf.float32)),
+      tf.int32)
+  offset_height = ((image_height - padded_center_crop_size) + 1) // 2
+  offset_width = ((image_width - padded_center_crop_size) + 1) // 2
+  crop_window = tf.stack([offset_height, offset_width,
+                          padded_center_crop_size, padded_center_crop_size])
+  image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3)
+  image = tf.image.resize_bicubic([image], [image_size, image_size])[0]
+  return image
+def _flip(image):
+  """Random horizontal image flip."""
+  image = tf.image.random_flip_left_right(image)
+  return image
+def preprocess_for_train(image_bytes, use_bfloat16, image_size=IMAGE_SIZE,
+                         augment_name=None,
+                         randaug_num_layers=None, randaug_magnitude=None):
+  """Preprocesses the given image for evaluation.
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    use_bfloat16: `bool` for whether to use bfloat16.
+    image_size: image size.
+    augment_name: `string` that is the name of the augmentation method
+      to apply to the image. `autoaugment` if AutoAugment is to be used or
+      `randaugment` if RandAugment is to be used. If the value is `None` no
+      augmentation method will be applied applied. See autoaugment.py for more
+      details.
+    randaug_num_layers: 'int', if RandAug is used, what should the number of
+      layers be. See autoaugment.py for detailed description.
+    randaug_magnitude: 'int', if RandAug is used, what should the magnitude
+      be. See autoaugment.py for detailed description.
+  Returns:
+    A preprocessed image `Tensor`.
+  """
+  image = _decode_and_random_crop(image_bytes, image_size)
+  image = _flip(image)
+  image = tf.reshape(image, [image_size, image_size, 3])
+  image = tf.image.convert_image_dtype(
+      image, dtype=tf.bfloat16 if use_bfloat16 else tf.float32)
+  if augment_name:
+    try:
+      import autoaugment  # pylint: disable=g-import-not-at-top
+    except ImportError as e:
+      logging.exception('Autoaugment is not supported in TF 2.x.')
+      raise e
+    logging.info('Apply AutoAugment policy %s', augment_name)
+    input_image_type = image.dtype
+    image = tf.clip_by_value(image, 0.0, 255.0)
+    image = tf.cast(image, dtype=tf.uint8)
+    if augment_name == 'autoaugment':
+      logging.info('Apply AutoAugment policy %s', augment_name)
+      image = autoaugment.distort_image_with_autoaugment(image, 'v0')
+    elif augment_name == 'randaugment':
+      image = autoaugment.distort_image_with_randaugment(
+          image, randaug_num_layers, randaug_magnitude)
+    else:
+      raise ValueError('Invalid value for augment_name: %s' % (augment_name))
+    image = tf.cast(image, dtype=input_image_type)
+  return image
+def preprocess_for_eval(image_bytes, use_bfloat16, image_size=IMAGE_SIZE):
+  """Preprocesses the given image for evaluation.
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    use_bfloat16: `bool` for whether to use bfloat16.
+    image_size: image size.
+  Returns:
+    A preprocessed image `Tensor`.
+  """
+  image = _decode_and_center_crop(image_bytes, image_size)
+  image = tf.reshape(image, [image_size, image_size, 3])
+  image = tf.image.convert_image_dtype(
+      image, dtype=tf.bfloat16 if use_bfloat16 else tf.float32)
+  return image
+def preprocess_image(image_bytes,
+                     is_training=False,
+                     use_bfloat16=False,
+                     image_size=IMAGE_SIZE,
+                     augment_name=None,
+                     randaug_num_layers=None,
+                     randaug_magnitude=None):
+  """Preprocesses the given image.
+  Args:
+    image_bytes: `Tensor` representing an image binary of arbitrary size.
+    is_training: `bool` for whether the preprocessing is for training.
+    use_bfloat16: `bool` for whether to use bfloat16.
+    image_size: image size.
+    augment_name: `string` that is the name of the augmentation method
+      to apply to the image. `autoaugment` if AutoAugment is to be used or
+      `randaugment` if RandAugment is to be used. If the value is `None` no
+      augmentation method will be applied applied. See autoaugment.py for more
+      details.
+    randaug_num_layers: 'int', if RandAug is used, what should the number of
+      layers be. See autoaugment.py for detailed description.
+    randaug_magnitude: 'int', if RandAug is used, what should the magnitude
+      be. See autoaugment.py for detailed description.
+  Returns:
+    A preprocessed image `Tensor` with value range of [0, 255].
+  """
+  if is_training:
+    return preprocess_for_train(
+        image_bytes, use_bfloat16, image_size, augment_name,
+        randaug_num_layers, randaug_magnitude)
+  else:
+    return preprocess_for_eval(image_bytes, use_bfloat16, image_size)

external_data/original_tf/utils.py ADDED Viewed

	@@ -0,0 +1,405 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model utilities."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import json
+import os
+import sys
+from absl import logging
+import numpy as np
+import tensorflow.compat.v1 as tf
+from tensorflow.python.tpu import tpu_function  # pylint:disable=g-direct-tensorflow-import
+def build_learning_rate(initial_lr,
+                        global_step,
+                        steps_per_epoch=None,
+                        lr_decay_type='exponential',
+                        decay_factor=0.97,
+                        decay_epochs=2.4,
+                        total_steps=None,
+                        warmup_epochs=5):
+  """Build learning rate."""
+  if lr_decay_type == 'exponential':
+    assert steps_per_epoch is not None
+    decay_steps = steps_per_epoch * decay_epochs
+    lr = tf.train.exponential_decay(
+        initial_lr, global_step, decay_steps, decay_factor, staircase=True)
+  elif lr_decay_type == 'cosine':
+    assert total_steps is not None
+    lr = 0.5 * initial_lr * (
+        1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps))
+  elif lr_decay_type == 'constant':
+    lr = initial_lr
+  else:
+    assert False, 'Unknown lr_decay_type : %s' % lr_decay_type
+  if warmup_epochs:
+    logging.info('Learning rate warmup_epochs: %d', warmup_epochs)
+    warmup_steps = int(warmup_epochs * steps_per_epoch)
+    warmup_lr = (
+        initial_lr * tf.cast(global_step, tf.float32) / tf.cast(
+            warmup_steps, tf.float32))
+    lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
+  return lr
+def build_optimizer(learning_rate,
+                    optimizer_name='rmsprop',
+                    decay=0.9,
+                    epsilon=0.001,
+                    momentum=0.9):
+  """Build optimizer."""
+  if optimizer_name == 'sgd':
+    logging.info('Using SGD optimizer')
+    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+  elif optimizer_name == 'momentum':
+    logging.info('Using Momentum optimizer')
+    optimizer = tf.train.MomentumOptimizer(
+        learning_rate=learning_rate, momentum=momentum)
+  elif optimizer_name == 'rmsprop':
+    logging.info('Using RMSProp optimizer')
+    optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum,
+                                          epsilon)
+  else:
+    logging.fatal('Unknown optimizer: %s', optimizer_name)
+  return optimizer
+class TpuBatchNormalization(tf.layers.BatchNormalization):
+  # class TpuBatchNormalization(tf.layers.BatchNormalization):
+  """Cross replica batch normalization."""
+  def __init__(self, fused=False, **kwargs):
+    if fused in (True, None):
+      raise ValueError('TpuBatchNormalization does not support fused=True.')
+    super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
+  def _cross_replica_average(self, t, num_shards_per_group):
+    """Calculates the average value of input tensor across TPU replicas."""
+    num_shards = tpu_function.get_tpu_context().number_of_shards
+    group_assignment = None
+    if num_shards_per_group > 1:
+      if num_shards % num_shards_per_group != 0:
+        raise ValueError('num_shards: %d mod shards_per_group: %d, should be 0'
+                         % (num_shards, num_shards_per_group))
+      num_groups = num_shards // num_shards_per_group
+      group_assignment = [[
+          x for x in range(num_shards) if x // num_shards_per_group == y
+      ] for y in range(num_groups)]
+    return tf.tpu.cross_replica_sum(t, group_assignment) / tf.cast(
+        num_shards_per_group, t.dtype)
+  def _moments(self, inputs, reduction_axes, keep_dims):
+    """Compute the mean and variance: it overrides the original _moments."""
+    shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
+        inputs, reduction_axes, keep_dims=keep_dims)
+    num_shards = tpu_function.get_tpu_context().number_of_shards or 1
+    if num_shards <= 8:  # Skip cross_replica for 2x2 or smaller slices.
+      num_shards_per_group = 1
+    else:
+      num_shards_per_group = max(8, num_shards // 8)
+    logging.info('TpuBatchNormalization with num_shards_per_group %s',
+                 num_shards_per_group)
+    if num_shards_per_group > 1:
+      # Compute variance using: Var[X]= E[X^2] - E[X]^2.
+      shard_square_of_mean = tf.math.square(shard_mean)
+      shard_mean_of_square = shard_variance + shard_square_of_mean
+      group_mean = self._cross_replica_average(
+          shard_mean, num_shards_per_group)
+      group_mean_of_square = self._cross_replica_average(
+          shard_mean_of_square, num_shards_per_group)
+      group_variance = group_mean_of_square - tf.math.square(group_mean)
+      return (group_mean, group_variance)
+    else:
+      return (shard_mean, shard_variance)
+class BatchNormalization(tf.layers.BatchNormalization):
+  """Fixed default name of BatchNormalization to match TpuBatchNormalization."""
+  def __init__(self, name='tpu_batch_normalization', **kwargs):
+    super(BatchNormalization, self).__init__(name=name, **kwargs)
+def drop_connect(inputs, is_training, survival_prob):
+  """Drop the entire conv with given survival probability."""
+  # "Deep Networks with Stochastic Depth", https://arxiv.org/pdf/1603.09382.pdf
+  if not is_training:
+    return inputs
+  # Compute tensor.
+  batch_size = tf.shape(inputs)[0]
+  random_tensor = survival_prob
+  random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
+  binary_tensor = tf.floor(random_tensor)
+  # Unlike conventional way that multiply survival_prob at test time, here we
+  # divide survival_prob at training time, such that no addition compute is
+  # needed at test time.
+  output = tf.div(inputs, survival_prob) * binary_tensor
+  return output
+def archive_ckpt(ckpt_eval, ckpt_objective, ckpt_path):
+  """Archive a checkpoint if the metric is better."""
+  ckpt_dir, ckpt_name = os.path.split(ckpt_path)
+  saved_objective_path = os.path.join(ckpt_dir, 'best_objective.txt')
+  saved_objective = float('-inf')
+  if tf.gfile.Exists(saved_objective_path):
+    with tf.gfile.GFile(saved_objective_path, 'r') as f:
+      saved_objective = float(f.read())
+  if saved_objective > ckpt_objective:
+    logging.info('Ckpt %s is worse than %s', ckpt_objective, saved_objective)
+    return False
+  filenames = tf.gfile.Glob(ckpt_path + '.*')
+  if filenames is None:
+    logging.info('No files to copy for checkpoint %s', ckpt_path)
+    return False
+  # Clear the old folder.
+  dst_dir = os.path.join(ckpt_dir, 'archive')
+  if tf.gfile.Exists(dst_dir):
+    tf.gfile.DeleteRecursively(dst_dir)
+  tf.gfile.MakeDirs(dst_dir)
+  # Write checkpoints.
+  for f in filenames:
+    dest = os.path.join(dst_dir, os.path.basename(f))
+    tf.gfile.Copy(f, dest, overwrite=True)
+  ckpt_state = tf.train.generate_checkpoint_state_proto(
+      dst_dir,
+      model_checkpoint_path=ckpt_name,
+      all_model_checkpoint_paths=[ckpt_name])
+  with tf.gfile.GFile(os.path.join(dst_dir, 'checkpoint'), 'w') as f:
+    f.write(str(ckpt_state))
+  with tf.gfile.GFile(os.path.join(dst_dir, 'best_eval.txt'), 'w') as f:
+    f.write('%s' % ckpt_eval)
+  # Update the best objective.
+  with tf.gfile.GFile(saved_objective_path, 'w') as f:
+    f.write('%f' % ckpt_objective)
+  logging.info('Copying checkpoint %s to %s', ckpt_path, dst_dir)
+  return True
+def get_ema_vars():
+  """Get all exponential moving average (ema) variables."""
+  ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars')
+  for v in tf.global_variables():
+    # We maintain mva for batch norm moving mean and variance as well.
+    if 'moving_mean' in v.name or 'moving_variance' in v.name:
+      ema_vars.append(v)
+  return list(set(ema_vars))
+class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, tf.layers.Layer):
+  """Wrap keras DepthwiseConv2D to tf.layers."""
+  pass
+class EvalCkptDriver(object):
+  """A driver for running eval inference.
+  Attributes:
+    model_name: str. Model name to eval.
+    batch_size: int. Eval batch size.
+    image_size: int. Input image size, determined by model name.
+    num_classes: int. Number of classes, default to 1000 for ImageNet.
+    include_background_label: whether to include extra background label.
+  """
+  def __init__(self,
+               model_name,
+               batch_size=1,
+               image_size=224,
+               num_classes=1000,
+               include_background_label=False):
+    """Initialize internal variables."""
+    self.model_name = model_name
+    self.batch_size = batch_size
+    self.num_classes = num_classes
+    self.include_background_label = include_background_label
+    self.image_size = image_size
+  def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None):
+    """Restore variables from checkpoint dir."""
+    sess.run(tf.global_variables_initializer())
+    checkpoint = tf.train.latest_checkpoint(ckpt_dir)
+    if enable_ema:
+      ema = tf.train.ExponentialMovingAverage(decay=0.0)
+      ema_vars = get_ema_vars()
+      var_dict = ema.variables_to_restore(ema_vars)
+      ema_assign_op = ema.apply(ema_vars)
+    else:
+      var_dict = get_ema_vars()
+      ema_assign_op = None
+    tf.train.get_or_create_global_step()
+    sess.run(tf.global_variables_initializer())
+    saver = tf.train.Saver(var_dict, max_to_keep=1)
+    saver.restore(sess, checkpoint)
+    if export_ckpt:
+      if ema_assign_op is not None:
+        sess.run(ema_assign_op)
+      saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
+      saver.save(sess, export_ckpt)
+  def build_model(self, features, is_training):
+    """Build model with input features."""
+    del features, is_training
+    raise ValueError('Must be implemented by subclasses.')
+  def get_preprocess_fn(self):
+    raise ValueError('Must be implemented by subclsses.')
+  def build_dataset(self, filenames, labels, is_training):
+    """Build input dataset."""
+    batch_drop_remainder = False
+    if 'condconv' in self.model_name and not is_training:
+      # CondConv layers can only be called with known batch dimension. Thus, we
+      # must drop all remaining examples that do not make up one full batch.
+      # To ensure all examples are evaluated, use a batch size that evenly
+      # divides the number of files.
+      batch_drop_remainder = True
+      num_files = len(filenames)
+      if num_files % self.batch_size != 0:
+        tf.logging.warn('Remaining examples in last batch are not being '
+                        'evaluated.')
+    filenames = tf.constant(filenames)
+    labels = tf.constant(labels)
+    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
+    def _parse_function(filename, label):
+      image_string = tf.read_file(filename)
+      preprocess_fn = self.get_preprocess_fn()
+      image_decoded = preprocess_fn(
+          image_string, is_training, image_size=self.image_size)
+      image = tf.cast(image_decoded, tf.float32)
+      return image, label
+    dataset = dataset.map(_parse_function)
+    dataset = dataset.batch(self.batch_size,
+                            drop_remainder=batch_drop_remainder)
+    iterator = dataset.make_one_shot_iterator()
+    images, labels = iterator.get_next()
+    return images, labels
+  def run_inference(self,
+                    ckpt_dir,
+                    image_files,
+                    labels,
+                    enable_ema=True,
+                    export_ckpt=None):
+    """Build and run inference on the target images and labels."""
+    label_offset = 1 if self.include_background_label else 0
+    with tf.Graph().as_default(), tf.Session() as sess:
+      images, labels = self.build_dataset(image_files, labels, False)
+      probs = self.build_model(images, is_training=False)
+      if isinstance(probs, tuple):
+        probs = probs[0]
+      self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt)
+      prediction_idx = []
+      prediction_prob = []
+      for _ in range(len(image_files) // self.batch_size):
+        out_probs = sess.run(probs)
+        idx = np.argsort(out_probs)[::-1]
+        prediction_idx.append(idx[:5] - label_offset)
+        prediction_prob.append([out_probs[pid] for pid in idx[:5]])
+      # Return the top 5 predictions (idx and prob) for each image.
+      return prediction_idx, prediction_prob
+  def eval_example_images(self,
+                          ckpt_dir,
+                          image_files,
+                          labels_map_file,
+                          enable_ema=True,
+                          export_ckpt=None):
+    """Eval a list of example images.
+    Args:
+      ckpt_dir: str. Checkpoint directory path.
+      image_files: List[str]. A list of image file paths.
+      labels_map_file: str. The labels map file path.
+      enable_ema: enable expotential moving average.
+      export_ckpt: export ckpt folder.
+    Returns:
+      A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction
+      index and pred_prob is the top 5 prediction probability.
+    """
+    classes = json.loads(tf.gfile.Open(labels_map_file).read())
+    pred_idx, pred_prob = self.run_inference(
+        ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt)
+    for i in range(len(image_files)):
+      print('predicted class for image {}: '.format(image_files[i]))
+      for j, idx in enumerate(pred_idx[i]):
+        print('  -> top_{} ({:4.2f}%): {}  '.format(j, pred_prob[i][j] * 100,
+                                                    classes[str(idx)]))
+    return pred_idx, pred_prob
+  def eval_imagenet(self, ckpt_dir, imagenet_eval_glob,
+                    imagenet_eval_label, num_images, enable_ema, export_ckpt):
+    """Eval ImageNet images and report top1/top5 accuracy.
+    Args:
+      ckpt_dir: str. Checkpoint directory path.
+      imagenet_eval_glob: str. File path glob for all eval images.
+      imagenet_eval_label: str. File path for eval label.
+      num_images: int. Number of images to eval: -1 means eval the whole
+        dataset.
+      enable_ema: enable expotential moving average.
+      export_ckpt: export checkpoint folder.
+    Returns:
+      A tuple (top1, top5) for top1 and top5 accuracy.
+    """
+    imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)]
+    imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob))
+    if num_images < 0:
+      num_images = len(imagenet_filenames)
+    image_files = imagenet_filenames[:num_images]
+    labels = imagenet_val_labels[:num_images]
+    pred_idx, _ = self.run_inference(
+        ckpt_dir, image_files, labels, enable_ema, export_ckpt)
+    top1_cnt, top5_cnt = 0.0, 0.0
+    for i, label in enumerate(labels):
+      top1_cnt += label in pred_idx[i][:1]
+      top5_cnt += label in pred_idx[i][:5]
+      if i % 100 == 0:
+        print('Step {}: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(
+            i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1)))
+        sys.stdout.flush()
+    top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images
+    print('Final: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(top1, top5))
+    return top1, top5

extract_tracks_from_videos.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import argparse
+import os
+import yaml
+import random
+import pickle
+import tqdm
+import cv2
+import numpy as np
+from generate_aligned_tracks import ALIGNED_TRACKS_FILE_NAME
+SEED = 0xDEADFACE
+TRACK_LENGTH = 50
+DETECTOR_STEP = 6
+BOX_MULT = 1.5
+TRACKS_ROOT = 'tracks'
+BOXES_FILE_NAME = 'boxes.float32'
+def main():
+    parser = argparse.ArgumentParser(description='Extracts tracks from videos')
+    parser.add_argument('--num_parts', type=int, default=1, help='Number of parts')
+    parser.add_argument('--part', type=int, default=0, help='Part')
+    args = parser.parse_args()
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    with open(os.path.join(config['ARTIFACTS_PATH'], ALIGNED_TRACKS_FILE_NAME), 'rb') as f:
+        aligned_tracks = pickle.load(f)
+    part_size = len(aligned_tracks) // args.num_parts + 1
+    assert part_size * args.num_parts >= len(aligned_tracks)
+    part_start = part_size * args.part
+    part_end = min(part_start + part_size, len(aligned_tracks))
+    print('Part {} ({}, {})'.format(args.part, part_start, part_end))
+    random.seed(SEED)
+    for real_video, fake_video, aligned_track in tqdm.tqdm(aligned_tracks[part_start:part_end]):
+        if len(aligned_track) < TRACK_LENGTH // DETECTOR_STEP:
+            continue
+        real_boxes = [item[1] for item in aligned_track]
+        fake_boxes = [item[2] for item in aligned_track]
+        start_idx = random.randint(0, len(aligned_track) - TRACK_LENGTH // DETECTOR_STEP)
+        start_frame = aligned_track[start_idx][0] * DETECTOR_STEP
+        middle_idx = start_idx + TRACK_LENGTH // DETECTOR_STEP // 2
+        if random.choice([False, True]):
+            xmin, ymin, xmax, ymax = real_boxes[middle_idx]
+        else:
+            xmin, ymin, xmax, ymax = fake_boxes[middle_idx]
+        width = xmax - xmin
+        height = ymax - ymin
+        xcenter = xmin + width / 2
+        ycenter = ymin + height / 2
+        width = width * BOX_MULT
+        height = height * BOX_MULT
+        xmin = xcenter - width / 2
+        ymin = ycenter - height / 2
+        xmax = xmin + width
+        ymax = ymin + height
+        for video, boxes in [(real_video, real_boxes), (fake_video, fake_boxes)]:
+            capture = cv2.VideoCapture(os.path.join(config['DFDC_DATA_PATH'], video))
+            frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+            if frame_count == 0:
+                continue
+            frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+            xmin = max(int(xmin), 0)
+            xmax = min(int(xmax), frame_width)
+            ymin = max(int(ymin), 0)
+            ymax = min(int(ymax), frame_height)
+            dst_root = os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT,
+                                    video + '_{}_{}_{}'.format(start_frame, xmin, ymin))
+            if os.path.exists(dst_root):
+                continue
+            os.makedirs(dst_root)
+            for i in range(start_frame + TRACK_LENGTH):
+                capture.grab()
+                if i < start_frame:
+                    continue
+                ret, frame = capture.retrieve()
+                if not ret:
+                    continue
+                face = frame[ymin:ymax, xmin:xmax]
+                dst_path = os.path.join(dst_root, '{}.png'.format(i - start_frame))
+                cv2.imwrite(dst_path, face)
+            boxes = np.array(boxes, dtype=np.float32)
+            boxes[:, 0] -= xmin
+            boxes[:, 1] -= ymin
+            boxes[:, 2] -= xmin
+            boxes[:, 3] -= ymin
+            boxes.tofile(os.path.join(dst_root, BOXES_FILE_NAME))
+if __name__ == '__main__':
+    main()

generate_aligned_tracks.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import glob
+import os
+import yaml
+import json
+from collections import defaultdict
+import tqdm
+import pickle
+from tracker.utils import iou
+from generate_tracks import TRACKS_FILE_NAME
+MIN_TRACK_LENGTH = 5
+IOU_THRESHOLD = 0.5
+METADATA_FILE_NAME = 'metadata.json'
+ALIGNED_TRACKS_FILE_NAME = 'aligned_tracks.pkl'
+def get_track(tracks, min_track_length):
+    good_tracks = [track for track in tracks if len(track) >= min_track_length]
+    if len(good_tracks) == 1:
+        return good_tracks[0]
+    else:
+        return None
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    video_to_meta = {}
+    for path in glob.iglob(os.path.join(config['DFDC_DATA_PATH'], '**', METADATA_FILE_NAME), recursive=True):
+        root = os.path.basename(os.path.dirname(path))
+        with open(path, 'r') as f:
+            for video, meta in json.load(f).items():
+                video_to_meta[os.path.join(root, video)] = meta
+    real_video_to_fake_videos = defaultdict(list)
+    for video in video_to_meta:
+        root = os.path.dirname(video)
+        meta = video_to_meta[video]
+        if meta['label'] == 'FAKE':
+            original_video = os.path.join(root, meta['original'])
+            real_video_to_fake_videos[original_video].append(video)
+    print('Total number of real videos: {}'.format(len(real_video_to_fake_videos)))
+    print('Total number of fake videos: {}'.format(sum([len(fake_videos) for fake_videos in real_video_to_fake_videos.items()])))
+    with open(os.path.join(config['ARTIFACTS_PATH'], TRACKS_FILE_NAME), 'rb') as f:
+        video_to_tracks = pickle.load(f)
+    real_fake_aligned_tracks = []
+    real_videos = sorted(real_video_to_fake_videos)
+    for real_video in tqdm.tqdm(real_videos):
+        if real_video not in video_to_tracks:
+            continue
+        real_tracks = [track for track in video_to_tracks[real_video] if len(track) >= MIN_TRACK_LENGTH]
+        for fake_video in real_video_to_fake_videos[real_video]:
+            if fake_video not in video_to_tracks:
+                continue
+            fake_tracks = [track for track in video_to_tracks[fake_video] if len(track) >= MIN_TRACK_LENGTH]
+            for real_track in real_tracks:
+                real_frame_idx_to_bbox = {}
+                for real_frame_idx, real_bbox in real_track:
+                    real_frame_idx_to_bbox[real_frame_idx] = real_bbox
+                for fake_track in fake_tracks:
+                    fake_frame_idx_to_bbox = {}
+                    ious = []
+                    for fake_frame_idx, fake_bbox in fake_track:
+                        fake_frame_idx_to_bbox[fake_frame_idx] = fake_bbox
+                        if fake_frame_idx in real_frame_idx_to_bbox:
+                            real_bbox = real_frame_idx_to_bbox[fake_frame_idx]
+                            ious.append(iou(real_bbox, fake_bbox))
+                    if len(ious) > 0 and min(ious) > IOU_THRESHOLD:
+                        start_frame_idx = max(min(real_frame_idx_to_bbox), min(fake_frame_idx_to_bbox))
+                        end_frame_idx = min(max(real_frame_idx_to_bbox), max(fake_frame_idx_to_bbox)) + 1
+                        assert start_frame_idx < end_frame_idx
+                        real_fake_aligned_track = []
+                        for frame_idx in range(start_frame_idx, end_frame_idx):
+                            real_bbox = real_frame_idx_to_bbox[frame_idx]
+                            fake_bbox = fake_frame_idx_to_bbox[frame_idx]
+                            assert iou(real_bbox, fake_bbox) > IOU_THRESHOLD
+                            real_fake_aligned_track.append((frame_idx, real_bbox, fake_bbox))
+                        real_fake_aligned_tracks.append((real_video, fake_video, real_fake_aligned_track))
+                        break
+    print('Total number of tracks: {}'.format(len(real_fake_aligned_tracks)))
+    with open(os.path.join(config['ARTIFACTS_PATH'], ALIGNED_TRACKS_FILE_NAME), 'wb') as f:
+        pickle.dump(real_fake_aligned_tracks, f)
+if __name__ == '__main__':
+    main()

generate_track_pairs.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import yaml
+import os
+import json
+from collections import defaultdict
+import glob
+from generate_aligned_tracks import METADATA_FILE_NAME
+from extract_tracks_from_videos import TRACKS_ROOT
+TRACK_PAIRS_FILE_NAME = 'track_pairs.txt'
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    video_to_tracks = defaultdict(list)
+    for path in glob.iglob(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT, 'dfdc_train_part_*', '*.mp4_*')):
+        parts = path.split('/')
+        rel_path = '/'.join(parts[-2:])
+        video = '_'.join(rel_path.split('_')[:-3])
+        video_to_tracks[video].append(rel_path)
+    video_to_meta = {}
+    for path in glob.iglob(os.path.join(config['DFDC_DATA_PATH'], '**', METADATA_FILE_NAME), recursive=True):
+        root = os.path.basename(os.path.dirname(path))
+        with open(path, 'r') as f:
+            for video, meta in json.load(f).items():
+                video_to_meta[os.path.join(root, video)] = meta
+    fake_video_to_real_video = {}
+    for video in video_to_meta:
+        root = os.path.dirname(video)
+        meta = video_to_meta[video]
+        if meta['label'] == 'FAKE':
+            original_video = os.path.join(root, meta['original'])
+            fake_video_to_real_video[video] = original_video
+    print('Total number of fake videos: {}'.format(len(fake_video_to_real_video)))
+    track_pairs = []
+    fake_videos = sorted(fake_video_to_real_video)
+    for fake_video in fake_videos:
+        real_video = fake_video_to_real_video[fake_video]
+        fake_tracks = video_to_tracks[fake_video]
+        real_tracks = video_to_tracks[real_video]
+        for fake_track in fake_tracks:
+            if not os.path.exists(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT, fake_track, '0.png')):
+                continue
+            suffix = fake_track[len(fake_video):]
+            for real_track in real_tracks:
+                if not os.path.exists(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT, real_track, '0.png')):
+                    continue
+                if real_track.endswith(suffix):
+                    track_pairs.append((real_track, fake_track))
+                    break
+    print('Total number of track pairs: {}'.format(len(track_pairs)))
+    with open(os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME), 'w') as f:
+        for real_track, fake_track in track_pairs:
+            f.write('{},{}\n'.format(real_track, fake_track))
+if __name__ == '__main__':
+    main()

generate_tracks.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import yaml
+import tqdm
+import glob
+import pickle
+from tracker.iou_tracker import track_iou
+from detect_faces_on_videos import DETECTIONS_FILE_NAME, DETECTIONS_ROOT
+SIGMA_L = 0.3
+SIGMA_H = 0.9
+SIGMA_IOU = 0.3
+T_MIN = 1
+TRACKS_FILE_NAME = 'tracks.pkl'
+def get_tracks(detections):
+    if len(detections) == 0:
+        return []
+    converted_detections = []
+    for i, detections_per_frame in enumerate(detections):
+        converted_detections_per_frame = []
+        for j, (bbox, score) in enumerate(zip(detections_per_frame['boxes'], detections_per_frame['scores'])):
+            bbox = tuple(bbox.tolist())
+            converted_detections_per_frame.append({'bbox': bbox, 'score': score})
+        converted_detections.append(converted_detections_per_frame)
+    tracks = track_iou(converted_detections, SIGMA_L, SIGMA_H, SIGMA_IOU, T_MIN)
+    tracks_converted = []
+    for track in tracks:
+        track_converted = []
+        start_frame = track['start_frame'] - 1
+        for i, bbox in enumerate(track['bboxes']):
+            track_converted.append((start_frame + i, bbox))
+        tracks_converted.append(track_converted)
+    return tracks_converted
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    root_dir = os.path.join(config['ARTIFACTS_PATH'], DETECTIONS_ROOT)
+    detections_content = []
+    for path in glob.iglob(os.path.join(root_dir, '**', DETECTIONS_FILE_NAME), recursive=True):
+        rel_path = path[len(root_dir) + 1:]
+        detections_content.append(rel_path)
+    detections_content = sorted(detections_content)
+    print('Total number of videos: {}'.format(len(detections_content)))
+    video_to_tracks = {}
+    for rel_path in tqdm.tqdm(detections_content):
+        video = os.path.dirname(rel_path)
+        with open(os.path.join(root_dir, rel_path), 'rb') as f:
+            detections = pickle.load(f)
+        video_to_tracks[video] = get_tracks(detections)
+    track_count = sum([len(tracks) for tracks in video_to_tracks.values()])
+    print('Total number of tracks: {}'.format(track_count))
+    with open(os.path.join(config['ARTIFACTS_PATH'], TRACKS_FILE_NAME), 'wb') as f:
+        pickle.dump(video_to_tracks, f)
+if __name__ == '__main__':
+    main()

images/augmented_mixup.jpg ADDED Viewed

Git LFS Details

SHA256: 99db3c3344ef4c7635fc0134f9b6dc73be249f7ac9291af9a003e7233d72ad7b
Pointer size: 131 Bytes
Size of remote file: 116 kB

images/clip_example.jpg ADDED Viewed

images/first_and_second_model_inputs.jpg ADDED Viewed

images/mixup_example.jpg ADDED Viewed

Git LFS Details

SHA256: 74358172ce92a6ffca0796583f921e0158b933658352a990e38179949d47ae1f
Pointer size: 131 Bytes
Size of remote file: 118 kB

images/pred_transform.jpg ADDED Viewed

images/third_model_input.jpg ADDED Viewed

models/.gitkeep ADDED Viewed

File without changes

predict.py ADDED Viewed

	@@ -0,0 +1,399 @@

+import os
+import yaml
+import glob
+import numpy as np
+import cv2
+import torch
+from torch import nn
+from torch.utils.data import Dataset, DataLoader
+from torchvision.models.detection.transform import GeneralizedRCNNTransform
+from albumentations import Compose, SmallestMaxSize, CenterCrop, Normalize, PadIfNeeded
+from albumentations.pytorch import ToTensor
+from dsfacedetector.face_ssd_infer import SSD
+from tracker.iou_tracker import track_iou
+from efficientnet_pytorch.model import EfficientNet, MBConvBlock
+DETECTOR_WEIGHTS_PATH = 'WIDERFace_DSFD_RES152.fp16.pth'
+DETECTOR_THRESHOLD = 0.3
+DETECTOR_MIN_SIZE = 512
+DETECTOR_MAX_SIZE = 512
+DETECTOR_MEAN = (104.0, 117.0, 123.0)
+DETECTOR_STD = (1.0, 1.0, 1.0)
+DETECTOR_BATCH_SIZE = 16
+DETECTOR_STEP = 3
+TRACKER_SIGMA_L = 0.3
+TRACKER_SIGMA_H = 0.9
+TRACKER_SIGMA_IOU = 0.3
+TRACKER_T_MIN = 7
+VIDEO_MODEL_BBOX_MULT = 1.5
+VIDEO_MODEL_MIN_SIZE = 224
+VIDEO_MODEL_CROP_HEIGHT = 224
+VIDEO_MODEL_CROP_WIDTH = 192
+VIDEO_FACE_MODEL_TRACK_STEP = 2
+VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH = 7
+VIDEO_SEQUENCE_MODEL_TRACK_STEP = 14
+VIDEO_SEQUENCE_MODEL_WEIGHTS_PATH = 'efficientnet-b7_ns_seq_aa-original-mstd0.5_100k_v4_cad79a/snapshot_100000.fp16.pth'
+FIRST_VIDEO_FACE_MODEL_WEIGHTS_PATH = 'efficientnet-b7_ns_aa-original-mstd0.5_large_crop_100k_v4_cad79a/snapshot_100000.fp16.pth'
+SECOND_VIDEO_FACE_MODEL_WEIGHTS_PATH = 'efficientnet-b7_ns_aa-original-mstd0.5_re_100k_v4_cad79a/snapshot_100000.fp16.pth'
+VIDEO_BATCH_SIZE = 1
+VIDEO_TARGET_FPS = 15
+VIDEO_NUM_WORKERS = 0
+class UnlabeledVideoDataset(Dataset):
+    def __init__(self, root_dir, content=None):
+        self.root_dir = os.path.normpath(root_dir)
+        if content is not None:
+            self.content = content
+        else:
+            self.content = []
+            for path in glob.iglob(os.path.join(self.root_dir, '**', '*.mp4'), recursive=True):
+                rel_path = path[len(self.root_dir) + 1:]
+                self.content.append(rel_path)
+            self.content = sorted(self.content)
+    def __len__(self):
+        return len(self.content)
+    def __getitem__(self, idx):
+        rel_path = self.content[idx]
+        path = os.path.join(self.root_dir, rel_path)
+        sample = {
+            'frames': [],
+            'index': idx
+        }
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_count == 0:
+            return sample
+        fps = int(capture.get(cv2.CAP_PROP_FPS))
+        video_step = round(fps / VIDEO_TARGET_FPS)
+        if video_step == 0:
+            return sample
+        for i in range(frame_count):
+            capture.grab()
+            if i % video_step != 0:
+                continue
+            ret, frame = capture.retrieve()
+            if not ret:
+                continue
+            sample['frames'].append(frame)
+        return sample
+class Detector(object):
+    def __init__(self, weights_path):
+        self.model = SSD('test')
+        self.model.cuda().eval()
+        state = torch.load(weights_path, map_location=lambda storage, loc: storage)
+        state = {key: value.float() for key, value in state.items()}
+        self.model.load_state_dict(state)
+        self.transform = GeneralizedRCNNTransform(DETECTOR_MIN_SIZE, DETECTOR_MAX_SIZE, DETECTOR_MEAN, DETECTOR_STD)
+        self.transform.eval()
+    def detect(self, images):
+        images = torch.stack([torch.from_numpy(image).cuda() for image in images])
+        images = images.transpose(1, 3).transpose(2, 3).float()
+        original_image_sizes = [img.shape[-2:] for img in images]
+        images, _ = self.transform(images, None)
+        with torch.no_grad():
+            detections_batch = self.model(images.tensors).cpu().numpy()
+        result = []
+        for detections, image_size in zip(detections_batch, images.image_sizes):
+            scores = detections[1, :, 0]
+            keep_idxs = scores > DETECTOR_THRESHOLD
+            detections = detections[1, keep_idxs, :]
+            detections = detections[:, [1, 2, 3, 4, 0]]
+            detections[:, 0] *= image_size[1]
+            detections[:, 1] *= image_size[0]
+            detections[:, 2] *= image_size[1]
+            detections[:, 3] *= image_size[0]
+            result.append({
+                'scores': torch.from_numpy(detections[:, 4]),
+                'boxes': torch.from_numpy(detections[:, :4])
+            })
+        result = self.transform.postprocess(result, images.image_sizes, original_image_sizes)
+        return result
+def get_tracks(detections):
+    if len(detections) == 0:
+        return []
+    converted_detections = []
+    frame_bbox_to_face_idx = {}
+    for i, detections_per_frame in enumerate(detections):
+        converted_detections_per_frame = []
+        for j, (bbox, score) in enumerate(zip(detections_per_frame['boxes'], detections_per_frame['scores'])):
+            bbox = tuple(bbox.tolist())
+            frame_bbox_to_face_idx[(i, bbox)] = j
+            converted_detections_per_frame.append({'bbox': bbox, 'score': score})
+        converted_detections.append(converted_detections_per_frame)
+    tracks = track_iou(converted_detections, TRACKER_SIGMA_L, TRACKER_SIGMA_H, TRACKER_SIGMA_IOU, TRACKER_T_MIN)
+    tracks_converted = []
+    for track in tracks:
+        start_frame = track['start_frame'] - 1
+        bboxes = np.array(track['bboxes'], dtype=np.float32)
+        frame_indices = np.arange(start_frame, start_frame + len(bboxes)) * DETECTOR_STEP
+        interp_frame_indices = np.arange(frame_indices[0], frame_indices[-1] + 1)
+        interp_bboxes = np.zeros((len(interp_frame_indices), 4), dtype=np.float32)
+        for i in range(4):
+            interp_bboxes[:, i] = np.interp(interp_frame_indices, frame_indices, bboxes[:, i])
+        track_converted = []
+        for frame_idx, bbox in zip(interp_frame_indices, interp_bboxes):
+            track_converted.append((frame_idx, bbox))
+        tracks_converted.append(track_converted)
+    return tracks_converted
+class SeqExpandConv(nn.Module):
+    def __init__(self, in_channels, out_channels, seq_length):
+        super(SeqExpandConv, self).__init__()
+        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=(3, 1, 1), padding=(1, 0, 0), bias=False)
+        self.seq_length = seq_length
+    def forward(self, x):
+        batch_size, in_channels, height, width = x.shape
+        x = x.view(batch_size // self.seq_length, self.seq_length, in_channels, height, width)
+        x = self.conv(x.transpose(1, 2).contiguous()).transpose(2, 1).contiguous()
+        x = x.flatten(0, 1)
+        return x
+class TrackSequencesClassifier(object):
+    def __init__(self, weights_path):
+        model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+        for module in model.modules():
+            if isinstance(module, MBConvBlock):
+                if module._block_args.expand_ratio != 1:
+                    expand_conv = module._expand_conv
+                    seq_expand_conv = SeqExpandConv(expand_conv.in_channels, expand_conv.out_channels,
+                                                    VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH)
+                    module._expand_conv = seq_expand_conv
+        self.model = model.cuda().eval()
+        normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self.transform = Compose(
+            [SmallestMaxSize(VIDEO_MODEL_MIN_SIZE), CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH),
+             normalize, ToTensor()])
+        state = torch.load(weights_path, map_location=lambda storage, loc: storage)
+        state = {key: value.float() for key, value in state.items()}
+        self.model.load_state_dict(state)
+    def classify(self, track_sequences):
+        track_sequences = [torch.stack([self.transform(image=face)['image'] for face in sequence]) for sequence in
+                           track_sequences]
+        track_sequences = torch.cat(track_sequences).cuda()
+        with torch.no_grad():
+            track_probs = torch.sigmoid(self.model(track_sequences)).flatten().cpu().numpy()
+        return track_probs
+class TrackFacesClassifier(object):
+    def __init__(self, first_weights_path, second_weights_path):
+        first_model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+        self.first_model = first_model.cuda().eval()
+        second_model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+        self.second_model = second_model.cuda().eval()
+        first_normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self.first_transform = Compose(
+            [SmallestMaxSize(VIDEO_MODEL_CROP_WIDTH), PadIfNeeded(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH),
+             CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH), first_normalize, ToTensor()])
+        second_normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self.second_transform = Compose(
+            [SmallestMaxSize(VIDEO_MODEL_MIN_SIZE), CenterCrop(VIDEO_MODEL_CROP_HEIGHT, VIDEO_MODEL_CROP_WIDTH),
+             second_normalize, ToTensor()])
+        first_state = torch.load(first_weights_path, map_location=lambda storage, loc: storage)
+        first_state = {key: value.float() for key, value in first_state.items()}
+        self.first_model.load_state_dict(first_state)
+        second_state = torch.load(second_weights_path, map_location=lambda storage, loc: storage)
+        second_state = {key: value.float() for key, value in second_state.items()}
+        self.second_model.load_state_dict(second_state)
+    def classify(self, track_faces):
+        first_track_faces = []
+        second_track_faces = []
+        for i, face in enumerate(track_faces):
+            if i % 4 < 2:
+                first_track_faces.append(self.first_transform(image=face)['image'])
+            else:
+                second_track_faces.append(self.second_transform(image=face)['image'])
+        first_track_faces = torch.stack(first_track_faces).cuda()
+        second_track_faces = torch.stack(second_track_faces).cuda()
+        with torch.no_grad():
+            first_track_probs = torch.sigmoid(self.first_model(first_track_faces)).flatten().cpu().numpy()
+            second_track_probs = torch.sigmoid(self.second_model(second_track_faces)).flatten().cpu().numpy()
+            track_probs = np.concatenate((first_track_probs, second_track_probs))
+        return track_probs
+def extract_sequence(frames, start_idx, bbox, flip):
+    frame_height, frame_width, _ = frames[start_idx].shape
+    xmin, ymin, xmax, ymax = bbox
+    width = xmax - xmin
+    height = ymax - ymin
+    xcenter = xmin + width / 2
+    ycenter = ymin + height / 2
+    width = width * VIDEO_MODEL_BBOX_MULT
+    height = height * VIDEO_MODEL_BBOX_MULT
+    xmin = xcenter - width / 2
+    ymin = ycenter - height / 2
+    xmax = xmin + width
+    ymax = ymin + height
+    xmin = max(int(xmin), 0)
+    xmax = min(int(xmax), frame_width)
+    ymin = max(int(ymin), 0)
+    ymax = min(int(ymax), frame_height)
+    sequence = []
+    for i in range(VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH):
+        face = cv2.cvtColor(frames[start_idx + i][ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)
+        sequence.append(face)
+    if flip:
+        sequence = [face[:, ::-1] for face in sequence]
+    return sequence
+def extract_face(frame, bbox, flip):
+    frame_height, frame_width, _ = frame.shape
+    xmin, ymin, xmax, ymax = bbox
+    width = xmax - xmin
+    height = ymax - ymin
+    xcenter = xmin + width / 2
+    ycenter = ymin + height / 2
+    width = width * VIDEO_MODEL_BBOX_MULT
+    height = height * VIDEO_MODEL_BBOX_MULT
+    xmin = xcenter - width / 2
+    ymin = ycenter - height / 2
+    xmax = xmin + width
+    ymax = ymin + height
+    xmin = max(int(xmin), 0)
+    xmax = min(int(xmax), frame_width)
+    ymin = max(int(ymin), 0)
+    ymax = min(int(ymax), frame_height)
+    face = cv2.cvtColor(frame[ymin:ymax, xmin:xmax], cv2.COLOR_BGR2RGB)
+    if flip:
+        face = face[:, ::-1].copy()
+    return face
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    detector = Detector(os.path.join(config['MODELS_PATH'], DETECTOR_WEIGHTS_PATH))
+    track_sequences_classifier = TrackSequencesClassifier(os.path.join(config['MODELS_PATH'], VIDEO_SEQUENCE_MODEL_WEIGHTS_PATH))
+    track_faces_classifier = TrackFacesClassifier(os.path.join(config['MODELS_PATH'], FIRST_VIDEO_FACE_MODEL_WEIGHTS_PATH),
+                                                  os.path.join(config['MODELS_PATH'], SECOND_VIDEO_FACE_MODEL_WEIGHTS_PATH))
+    dataset = UnlabeledVideoDataset(os.path.join(config['DFDC_DATA_PATH'], 'test_videos'))
+    print('Total number of videos: {}'.format(len(dataset)))
+    loader = DataLoader(dataset, batch_size=VIDEO_BATCH_SIZE, shuffle=False, num_workers=VIDEO_NUM_WORKERS,
+                        collate_fn=lambda X: X,
+                        drop_last=False)
+    video_name_to_score = {}
+    for video_sample in loader:
+        frames = video_sample[0]['frames']
+        detector_frames = frames[::DETECTOR_STEP]
+        video_idx = video_sample[0]['index']
+        video_rel_path = dataset.content[video_idx]
+        video_name = os.path.basename(video_rel_path)
+        if len(frames) == 0:
+            video_name_to_score[video_name] = 0.5
+            continue
+        detections = []
+        for start in range(0, len(detector_frames), DETECTOR_BATCH_SIZE):
+            end = min(len(detector_frames), start + DETECTOR_BATCH_SIZE)
+            detections_batch = detector.detect(detector_frames[start:end])
+            for detections_per_frame in detections_batch:
+                detections.append({key: value.cpu().numpy() for key, value in detections_per_frame.items()})
+        tracks = get_tracks(detections)
+        if len(tracks) == 0:
+            video_name_to_score[video_name] = 0.5
+            continue
+        sequence_track_scores = []
+        for track in tracks:
+            track_sequences = []
+            for i, (start_idx, _) in enumerate(
+                    track[:-VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH + 1:VIDEO_SEQUENCE_MODEL_TRACK_STEP]):
+                assert start_idx >= 0 and start_idx + VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH <= len(frames)
+                _, bbox = track[i * VIDEO_SEQUENCE_MODEL_TRACK_STEP + VIDEO_SEQUENCE_MODEL_SEQUENCE_LENGTH // 2]
+                track_sequences.append(extract_sequence(frames, start_idx, bbox, i % 2 == 0))
+            sequence_track_scores.append(track_sequences_classifier.classify(track_sequences))
+        face_track_scores = []
+        for track in tracks:
+            track_faces = []
+            for i, (frame_idx, bbox) in enumerate(track[::VIDEO_FACE_MODEL_TRACK_STEP]):
+                face = extract_face(frames[frame_idx], bbox, i % 2 == 0)
+                track_faces.append(face)
+            face_track_scores.append(track_faces_classifier.classify(track_faces))
+        sequence_track_scores = np.concatenate(sequence_track_scores)
+        face_track_scores = np.concatenate(face_track_scores)
+        track_probs = np.concatenate((sequence_track_scores, face_track_scores))
+        delta = track_probs - 0.5
+        sign = np.sign(delta)
+        pos_delta = delta > 0
+        neg_delta = delta < 0
+        track_probs[pos_delta] = np.clip(0.5 + sign[pos_delta] * np.power(abs(delta[pos_delta]), 0.65), 0.01, 0.99)
+        track_probs[neg_delta] = np.clip(0.5 + sign[neg_delta] * np.power(abs(delta[neg_delta]), 0.65), 0.01, 0.99)
+        weights = np.power(abs(delta), 1.0) + 1e-4
+        video_score = float((track_probs * weights).sum() / weights.sum())
+        video_name_to_score[video_name] = video_score
+        print('NUM DETECTION FRAMES: {}, VIDEO SCORE: {}. {}'.format(len(detections), video_name_to_score[video_name],
+                                                                     video_rel_path))
+    os.makedirs(os.path.dirname(config['SUBMISSION_PATH']), exist_ok=True)
+    with open(config['SUBMISSION_PATH'], 'w') as f:
+        f.write('filename,label\n')
+        for video_name in sorted(video_name_to_score):
+            score = video_name_to_score[video_name]
+            f.write('{},{}\n'.format(video_name, score))
+main()

tracker/__init__.py ADDED Viewed

File without changes

tracker/iou_tracker.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Source: https://github.com/bochinski/iou-tracker
+from .utils import iou
+def track_iou(detections, sigma_l, sigma_h, sigma_iou, t_min):
+    """
+    Simple IOU based tracker.
+    See "High-Speed Tracking-by-Detection Without Using Image Information by E. Bochinski, V. Eiselein, T. Sikora" for
+    more information.
+    Args:
+         detections (list): list of detections per frame, usually generated by util.load_mot
+         sigma_l (float): low detection threshold.
+         sigma_h (float): high detection threshold.
+         sigma_iou (float): IOU threshold.
+         t_min (float): minimum track length in frames.
+    Returns:
+        list: list of tracks.
+    """
+    tracks_active = []
+    tracks_finished = []
+    for frame_num, detections_frame in enumerate(detections, start=1):
+        # apply low threshold to detections
+        dets = [det for det in detections_frame if det['score'] >= sigma_l]
+        updated_tracks = []
+        for track in tracks_active:
+            if len(dets) > 0:
+                # get det with highest iou
+                best_match = max(dets, key=lambda x: iou(track['bboxes'][-1], x['bbox']))
+                if iou(track['bboxes'][-1], best_match['bbox']) >= sigma_iou:
+                    track['bboxes'].append(best_match['bbox'])
+                    track['max_score'] = max(track['max_score'], best_match['score'])
+                    updated_tracks.append(track)
+                    # remove from best matching detection from detections
+                    del dets[dets.index(best_match)]
+            # if track was not updated
+            if len(updated_tracks) == 0 or track is not updated_tracks[-1]:
+                # finish track when the conditions are met
+                if track['max_score'] >= sigma_h and len(track['bboxes']) >= t_min:
+                    tracks_finished.append(track)
+        # create new tracks
+        new_tracks = [{'bboxes': [det['bbox']], 'max_score': det['score'], 'start_frame': frame_num} for det in dets]
+        tracks_active = updated_tracks + new_tracks
+    # finish all remaining active tracks
+    tracks_finished += [track for track in tracks_active
+                        if track['max_score'] >= sigma_h and len(track['bboxes']) >= t_min]
+    return tracks_finished

tracker/utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+def iou(bbox1, bbox2):
+    """
+    Calculates the intersection-over-union of two bounding boxes.
+    Args:
+        bbox1 (numpy.array, list of floats): bounding box in format x1,y1,x2,y2.
+        bbox2 (numpy.array, list of floats): bounding box in format x1,y1,x2,y2.
+    Returns:
+        int: intersection-over-onion of bbox1, bbox2
+    """
+    bbox1 = [float(x) for x in bbox1]
+    bbox2 = [float(x) for x in bbox2]
+    (x0_1, y0_1, x1_1, y1_1) = bbox1
+    (x0_2, y0_2, x1_2, y1_2) = bbox2
+    # get the overlap rectangle
+    overlap_x0 = max(x0_1, x0_2)
+    overlap_y0 = max(y0_1, y0_2)
+    overlap_x1 = min(x1_1, x1_2)
+    overlap_y1 = min(y1_1, y1_2)
+    # check if there is an overlap
+    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
+        return 0
+    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
+    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
+    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
+    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
+    size_union = size_1 + size_2 - size_intersection
+    return size_intersection / size_union

train_b7_ns_aa_original_large_crop_100k.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import yaml
+import os
+import random
+import tqdm
+import numpy as np
+from PIL import Image
+import torch
+from torch import distributions
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+import ffmpeg
+from albumentations import ImageOnlyTransform
+from albumentations import SmallestMaxSize, PadIfNeeded, HorizontalFlip, Normalize, Compose, RandomCrop
+from albumentations.pytorch import ToTensor
+from efficientnet_pytorch import EfficientNet
+from timm.data.transforms_factory import transforms_imagenet_train
+from datasets import TrackPairDataset
+from extract_tracks_from_videos import TRACK_LENGTH, TRACKS_ROOT
+from generate_track_pairs import TRACK_PAIRS_FILE_NAME
+SEED = 30
+BATCH_SIZE = 8
+TRAIN_INDICES = [9, 13, 17, 21, 25, 29, 33, 37]
+INITIAL_LR = 0.005
+MOMENTUM = 0.9
+WEIGHT_DECAY = 1e-4
+NUM_WORKERS = 8
+NUM_WARMUP_ITERATIONS = 100
+SNAPSHOT_FREQUENCY = 1000
+OUTPUT_FOLDER_NAME = 'efficientnet-b7_ns_aa-original-mstd0.5_large_crop_100k'
+SNAPSHOT_NAME_TEMPLATE = 'snapshot_{}.pth'
+MAX_ITERS = 100000
+FPS_RANGE = (15, 30)
+SCALE_RANGE = (0.25, 1)
+CRF_RANGE = (17, 40)
+TUNE_VALUES = ['film', 'animation', 'grain', 'stillimage', 'fastdecode', 'zerolatency']
+CROP_HEIGHT = 224
+CROP_WIDTH = 192
+PRETRAINED_WEIGHTS_PATH = 'external_data/noisy_student_efficientnet-b7.pth'
+SNAPSHOTS_ROOT = 'snapshots'
+LOGS_ROOT = 'logs'
+class TrackTransform(object):
+    def __init__(self, fps_range, scale_range, crf_range, tune_values):
+        self.fps_range = fps_range
+        self.scale_range = scale_range
+        self.crf_range = crf_range
+        self.tune_values = tune_values
+    def get_params(self, src_fps, src_height, src_width):
+        if random.random() > 0.5:
+            return None
+        dst_fps = src_fps
+        if random.random() > 0.5:
+            dst_fps = random.randrange(*self.fps_range)
+        scale = 1.0
+        if random.random() > 0.5:
+            scale = random.uniform(*self.scale_range)
+        dst_height = round(scale * src_height) // 2 * 2
+        dst_width = round(scale * src_width) // 2 * 2
+        crf = random.randrange(*self.crf_range)
+        tune = random.choice(self.tune_values)
+        return dst_fps, dst_height, dst_width, crf, tune
+    def __call__(self, track_path, src_fps, dst_fps, dst_height, dst_width, crf, tune):
+        out, err = (
+            ffmpeg
+                .input(os.path.join(track_path, '%d.png'), framerate=src_fps, start_number=0)
+                .filter('fps', fps=dst_fps)
+                .filter('scale', dst_width, dst_height)
+                .output('pipe:', format='h264', vcodec='libx264', crf=crf, tune=tune)
+                .run(capture_stdout=True, quiet=True)
+        )
+        out, err = (
+            ffmpeg
+                .input('pipe:', format='h264')
+                .output('pipe:', format='rawvideo', pix_fmt='rgb24')
+                .run(capture_stdout=True, input=out, quiet=True)
+        )
+        imgs = np.frombuffer(out, dtype=np.uint8).reshape(-1, dst_height, dst_width, 3)
+        return imgs
+class VisionTransform(ImageOnlyTransform):
+    def __init__(
+            self, transform, is_tensor=True, always_apply=False, p=1.0
+    ):
+        super(VisionTransform, self).__init__(always_apply, p)
+        self.transform = transform
+        self.is_tensor = is_tensor
+    def apply(self, image, **params):
+        if self.is_tensor:
+            return self.transform(image)
+        else:
+            return np.array(self.transform(Image.fromarray(image)))
+    def get_transform_init_args_names(self):
+        return ("transform")
+def set_global_seed(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+def prepare_cudnn(deterministic=None, benchmark=None):
+    # https://pytorch.org/docs/stable/notes/randomness.html#cudnn
+    if deterministic is None:
+        deterministic = os.environ.get("CUDNN_DETERMINISTIC", "True") == "True"
+    torch.backends.cudnn.deterministic = deterministic
+    # https://discuss.pytorch.org/t/how-should-i-disable-using-cudnn-in-my-code/38053/4
+    if benchmark is None:
+        benchmark = os.environ.get("CUDNN_BENCHMARK", "True") == "True"
+    torch.backends.cudnn.benchmark = benchmark
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    set_global_seed(SEED)
+    prepare_cudnn(deterministic=True, benchmark=True)
+    model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+    state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=lambda storage, loc: storage)
+    state.pop('_fc.weight')
+    state.pop('_fc.bias')
+    res = model.load_state_dict(state, strict=False)
+    assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
+    model = model.cuda()
+    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    _, rand_augment, _ = transforms_imagenet_train((CROP_HEIGHT, CROP_WIDTH), auto_augment='original-mstd0.5',
+                                                   separate=True)
+    train_dataset = TrackPairDataset(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT),
+                                     os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME),
+                                     TRAIN_INDICES,
+                                     track_length=TRACK_LENGTH,
+                                     track_transform=TrackTransform(FPS_RANGE, SCALE_RANGE, CRF_RANGE, TUNE_VALUES),
+                                     image_transform=Compose([
+                                         SmallestMaxSize(CROP_WIDTH),
+                                         PadIfNeeded(CROP_HEIGHT, CROP_WIDTH),
+                                         HorizontalFlip(),
+                                         RandomCrop(CROP_HEIGHT, CROP_WIDTH),
+                                         VisionTransform(rand_augment, is_tensor=False, p=0.5),
+                                         normalize,
+                                         ToTensor()
+                                     ]), sequence_mode=False)
+    print('Train dataset size: {}.'.format(len(train_dataset)))
+    warmup_optimizer = torch.optim.SGD(model._fc.parameters(), INITIAL_LR, momentum=MOMENTUM,
+                                       weight_decay=WEIGHT_DECAY, nesterov=True)
+    full_optimizer = torch.optim.SGD(model.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY,
+                                     nesterov=True)
+    full_lr_scheduler = torch.optim.lr_scheduler.LambdaLR(full_optimizer,
+                                                          lambda iteration: (MAX_ITERS - iteration) / MAX_ITERS)
+    snapshots_root = os.path.join(config['ARTIFACTS_PATH'], SNAPSHOTS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(snapshots_root)
+    log_root = os.path.join(config['ARTIFACTS_PATH'], LOGS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(log_root)
+    writer = SummaryWriter(log_root)
+    iteration = 0
+    if iteration < NUM_WARMUP_ITERATIONS:
+        print('Start {} warmup iterations'.format(NUM_WARMUP_ITERATIONS))
+        model.eval()
+        model._fc.train()
+        for param in model.parameters():
+            param.requires_grad = False
+        for param in model._fc.parameters():
+            param.requires_grad = True
+        optimizer = warmup_optimizer
+    else:
+        print('Start without warmup iterations')
+        model.train()
+        optimizer = full_optimizer
+    max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+    writer.add_scalar('train/max_lr', max_lr, iteration)
+    epoch = 0
+    fake_prob_dist = distributions.beta.Beta(0.5, 0.5)
+    while True:
+        epoch += 1
+        print('Epoch {} is in progress'.format(epoch))
+        loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
+        for samples in tqdm.tqdm(loader):
+            iteration += 1
+            fake_input_tensor = torch.cat(samples['fake']).cuda()
+            real_input_tensor = torch.cat(samples['real']).cuda()
+            target_fake_prob = fake_prob_dist.sample((len(fake_input_tensor),)).float().cuda()
+            fake_weight = target_fake_prob.view(-1, 1, 1, 1)
+            input_tensor = (1.0 - fake_weight) * real_input_tensor + fake_weight * fake_input_tensor
+            pred = model(input_tensor).flatten()
+            loss = F.binary_cross_entropy_with_logits(pred, target_fake_prob)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            if iteration > NUM_WARMUP_ITERATIONS:
+                full_lr_scheduler.step()
+                max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+                writer.add_scalar('train/max_lr', max_lr, iteration)
+            writer.add_scalar('train/loss', loss.item(), iteration)
+            if iteration == NUM_WARMUP_ITERATIONS:
+                print('Stop warmup iterations')
+                model.train()
+                for param in model.parameters():
+                    param.requires_grad = True
+                optimizer = full_optimizer
+            if iteration % SNAPSHOT_FREQUENCY == 0:
+                snapshot_name = SNAPSHOT_NAME_TEMPLATE.format(iteration)
+                snapshot_path = os.path.join(snapshots_root, snapshot_name)
+                print('Saving snapshot to {}'.format(snapshot_path))
+                torch.save(model.state_dict(), snapshot_path)
+            if iteration >= MAX_ITERS:
+                print('Stop training due to maximum iteration exceeded')
+                return
+if __name__ == '__main__':
+    main()

train_b7_ns_aa_original_re_100k.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import yaml
+import os
+import random
+import tqdm
+import numpy as np
+from PIL import Image
+import torch
+from torch import distributions
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+import ffmpeg
+from albumentations import ImageOnlyTransform
+from albumentations import SmallestMaxSize, HorizontalFlip, Normalize, Compose, RandomCrop
+from albumentations.pytorch import ToTensor
+from efficientnet_pytorch import EfficientNet
+from timm.data.transforms_factory import transforms_imagenet_train
+from timm.data.random_erasing import RandomErasing
+from datasets import TrackPairDataset
+from extract_tracks_from_videos import TRACK_LENGTH, TRACKS_ROOT
+from generate_track_pairs import TRACK_PAIRS_FILE_NAME
+SEED = 10
+BATCH_SIZE = 8
+TRAIN_INDICES = [9, 13, 17, 21, 25, 29, 33, 37]
+INITIAL_LR = 0.005
+MOMENTUM = 0.9
+WEIGHT_DECAY = 1e-4
+NUM_WORKERS = 8
+NUM_WARMUP_ITERATIONS = 100
+SNAPSHOT_FREQUENCY = 1000
+OUTPUT_FOLDER_NAME = 'efficientnet-b7_ns_aa-original-mstd0.5_re_100k'
+SNAPSHOT_NAME_TEMPLATE = 'snapshot_{}.pth'
+MAX_ITERS = 100000
+FPS_RANGE = (15, 30)
+SCALE_RANGE = (0.25, 1)
+CRF_RANGE = (17, 40)
+TUNE_VALUES = ['film', 'animation', 'grain', 'stillimage', 'fastdecode', 'zerolatency']
+RE_PROB = 0.2
+RE_MODE = 'pixel'
+RE_COUNT = 1
+RE_NUM_SPLITS = 0
+MIN_SIZE = 224
+CROP_HEIGHT = 224
+CROP_WIDTH = 192
+PRETRAINED_WEIGHTS_PATH = 'external_data/noisy_student_efficientnet-b7.pth'
+SNAPSHOTS_ROOT = 'snapshots'
+LOGS_ROOT = 'logs'
+class TrackTransform(object):
+    def __init__(self, fps_range, scale_range, crf_range, tune_values):
+        self.fps_range = fps_range
+        self.scale_range = scale_range
+        self.crf_range = crf_range
+        self.tune_values = tune_values
+    def get_params(self, src_fps, src_height, src_width):
+        if random.random() > 0.5:
+            return None
+        dst_fps = src_fps
+        if random.random() > 0.5:
+            dst_fps = random.randrange(*self.fps_range)
+        scale = 1.0
+        if random.random() > 0.5:
+            scale = random.uniform(*self.scale_range)
+        dst_height = round(scale * src_height) // 2 * 2
+        dst_width = round(scale * src_width) // 2 * 2
+        crf = random.randrange(*self.crf_range)
+        tune = random.choice(self.tune_values)
+        return dst_fps, dst_height, dst_width, crf, tune
+    def __call__(self, track_path, src_fps, dst_fps, dst_height, dst_width, crf, tune):
+        out, err = (
+            ffmpeg
+                .input(os.path.join(track_path, '%d.png'), framerate=src_fps, start_number=0)
+                .filter('fps', fps=dst_fps)
+                .filter('scale', dst_width, dst_height)
+                .output('pipe:', format='h264', vcodec='libx264', crf=crf, tune=tune)
+                .run(capture_stdout=True, quiet=True)
+        )
+        out, err = (
+            ffmpeg
+                .input('pipe:', format='h264')
+                .output('pipe:', format='rawvideo', pix_fmt='rgb24')
+                .run(capture_stdout=True, input=out, quiet=True)
+        )
+        imgs = np.frombuffer(out, dtype=np.uint8).reshape(-1, dst_height, dst_width, 3)
+        return imgs
+class VisionTransform(ImageOnlyTransform):
+    def __init__(
+            self, transform, is_tensor=True, always_apply=False, p=1.0
+    ):
+        super(VisionTransform, self).__init__(always_apply, p)
+        self.transform = transform
+        self.is_tensor = is_tensor
+    def apply(self, image, **params):
+        if self.is_tensor:
+            return self.transform(image)
+        else:
+            return np.array(self.transform(Image.fromarray(image)))
+    def get_transform_init_args_names(self):
+        return ("transform")
+def set_global_seed(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+def prepare_cudnn(deterministic=None, benchmark=None):
+    # https://pytorch.org/docs/stable/notes/randomness.html#cudnn
+    if deterministic is None:
+        deterministic = os.environ.get("CUDNN_DETERMINISTIC", "True") == "True"
+    torch.backends.cudnn.deterministic = deterministic
+    # https://discuss.pytorch.org/t/how-should-i-disable-using-cudnn-in-my-code/38053/4
+    if benchmark is None:
+        benchmark = os.environ.get("CUDNN_BENCHMARK", "True") == "True"
+    torch.backends.cudnn.benchmark = benchmark
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    set_global_seed(SEED)
+    prepare_cudnn(deterministic=True, benchmark=True)
+    model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+    state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=lambda storage, loc: storage)
+    state.pop('_fc.weight')
+    state.pop('_fc.bias')
+    res = model.load_state_dict(state, strict=False)
+    assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
+    model = model.cuda()
+    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    _, rand_augment, _ = transforms_imagenet_train((CROP_HEIGHT, CROP_WIDTH), auto_augment='original-mstd0.5',
+                                                   separate=True)
+    train_dataset = TrackPairDataset(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT),
+                                     os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME),
+                                     TRAIN_INDICES,
+                                     track_length=TRACK_LENGTH,
+                                     track_transform=TrackTransform(FPS_RANGE, SCALE_RANGE, CRF_RANGE, TUNE_VALUES),
+                                     image_transform=Compose([
+                                         SmallestMaxSize(MIN_SIZE),
+                                         HorizontalFlip(),
+                                         RandomCrop(CROP_HEIGHT, CROP_WIDTH),
+                                         VisionTransform(rand_augment, is_tensor=False, p=0.5),
+                                         normalize,
+                                         ToTensor(),
+                                         VisionTransform(
+                                             RandomErasing(probability=RE_PROB, mode=RE_MODE, max_count=RE_COUNT,
+                                                           num_splits=RE_NUM_SPLITS, device='cpu'), is_tensor=True)
+                                     ]), sequence_mode=False)
+    print('Train dataset size: {}.'.format(len(train_dataset)))
+    warmup_optimizer = torch.optim.SGD(model._fc.parameters(), INITIAL_LR, momentum=MOMENTUM,
+                                       weight_decay=WEIGHT_DECAY, nesterov=True)
+    full_optimizer = torch.optim.SGD(model.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY,
+                                     nesterov=True)
+    full_lr_scheduler = torch.optim.lr_scheduler.LambdaLR(full_optimizer,
+                                                          lambda iteration: (MAX_ITERS - iteration) / MAX_ITERS)
+    snapshots_root = os.path.join(config['ARTIFACTS_PATH'], SNAPSHOTS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(snapshots_root)
+    log_root = os.path.join(config['ARTIFACTS_PATH'], LOGS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(log_root)
+    writer = SummaryWriter(log_root)
+    iteration = 0
+    if iteration < NUM_WARMUP_ITERATIONS:
+        print('Start {} warmup iterations'.format(NUM_WARMUP_ITERATIONS))
+        model.eval()
+        model._fc.train()
+        for param in model.parameters():
+            param.requires_grad = False
+        for param in model._fc.parameters():
+            param.requires_grad = True
+        optimizer = warmup_optimizer
+    else:
+        print('Start without warmup iterations')
+        model.train()
+        optimizer = full_optimizer
+    max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+    writer.add_scalar('train/max_lr', max_lr, iteration)
+    epoch = 0
+    fake_prob_dist = distributions.beta.Beta(0.5, 0.5)
+    while True:
+        epoch += 1
+        print('Epoch {} is in progress'.format(epoch))
+        loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
+        for samples in tqdm.tqdm(loader):
+            iteration += 1
+            fake_input_tensor = torch.cat(samples['fake']).cuda()
+            real_input_tensor = torch.cat(samples['real']).cuda()
+            target_fake_prob = fake_prob_dist.sample((len(fake_input_tensor),)).float().cuda()
+            fake_weight = target_fake_prob.view(-1, 1, 1, 1)
+            input_tensor = (1.0 - fake_weight) * real_input_tensor + fake_weight * fake_input_tensor
+            pred = model(input_tensor).flatten()
+            loss = F.binary_cross_entropy_with_logits(pred, target_fake_prob)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            if iteration > NUM_WARMUP_ITERATIONS:
+                full_lr_scheduler.step()
+                max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+                writer.add_scalar('train/max_lr', max_lr, iteration)
+            writer.add_scalar('train/loss', loss.item(), iteration)
+            if iteration == NUM_WARMUP_ITERATIONS:
+                print('Stop warmup iterations')
+                model.train()
+                for param in model.parameters():
+                    param.requires_grad = True
+                optimizer = full_optimizer
+            if iteration % SNAPSHOT_FREQUENCY == 0:
+                snapshot_name = SNAPSHOT_NAME_TEMPLATE.format(iteration)
+                snapshot_path = os.path.join(snapshots_root, snapshot_name)
+                print('Saving snapshot to {}'.format(snapshot_path))
+                torch.save(model.state_dict(), snapshot_path)
+            if iteration >= MAX_ITERS:
+                print('Stop training due to maximum iteration exceeded')
+                return
+if __name__ == '__main__':
+    main()

train_b7_ns_seq_aa_original_100k.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import yaml
+import os
+import random
+import tqdm
+import numpy as np
+from PIL import Image
+import torch
+from torch import nn
+from torch import distributions
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+import ffmpeg
+from albumentations import ImageOnlyTransform
+from albumentations import SmallestMaxSize, HorizontalFlip, Normalize, Compose, RandomCrop
+from albumentations.pytorch import ToTensor
+from efficientnet_pytorch import EfficientNet
+from efficientnet_pytorch.model import MBConvBlock
+from timm.data.transforms_factory import transforms_imagenet_train
+from datasets import TrackPairDataset
+from extract_tracks_from_videos import TRACK_LENGTH, TRACKS_ROOT
+from generate_track_pairs import TRACK_PAIRS_FILE_NAME
+SEED = 20
+BATCH_SIZE = 8
+TRAIN_INDICES = [19, 21, 23, 25, 27, 29, 31]
+INITIAL_LR = 0.005
+MOMENTUM = 0.9
+WEIGHT_DECAY = 1e-4
+NUM_WORKERS = 8
+NUM_WARMUP_ITERATIONS = 100
+SNAPSHOT_FREQUENCY = 1000
+OUTPUT_FOLDER_NAME = 'efficientnet-b7_ns_seq_aa-original-mstd0.5_100k'
+SNAPSHOT_NAME_TEMPLATE = 'snapshot_{}.pth'
+FINAL_SNAPSHOT_NAME = 'final.pth'
+MAX_ITERS = 100000
+FPS_RANGE = (15, 30)
+SCALE_RANGE = (0.25, 1)
+CRF_RANGE = (17, 40)
+TUNE_VALUES = ['film', 'animation', 'grain', 'stillimage', 'fastdecode', 'zerolatency']
+MIN_SIZE = 224
+CROP_HEIGHT = 224
+CROP_WIDTH = 192
+PRETRAINED_WEIGHTS_PATH = 'external_data/noisy_student_efficientnet-b7.pth'
+SNAPSHOTS_ROOT = 'snapshots'
+LOGS_ROOT = 'logs'
+class SeqExpandConv(nn.Module):
+    def __init__(self, in_channels, out_channels, seq_length):
+        super(SeqExpandConv, self).__init__()
+        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=(3, 1, 1), padding=(1, 0, 0), bias=False)
+        self.seq_length = seq_length
+    def forward(self, x):
+        batch_size, in_channels, height, width = x.shape
+        x = x.view(batch_size // self.seq_length, self.seq_length, in_channels, height, width)
+        x = self.conv(x.transpose(1, 2).contiguous()).transpose(2, 1).contiguous()
+        x = x.flatten(0, 1)
+        return x
+class TrackTransform(object):
+    def __init__(self, fps_range, scale_range, crf_range, tune_values):
+        self.fps_range = fps_range
+        self.scale_range = scale_range
+        self.crf_range = crf_range
+        self.tune_values = tune_values
+    def get_params(self, src_fps, src_height, src_width):
+        if random.random() > 0.5:
+            return None
+        dst_fps = src_fps
+        if random.random() > 0.5:
+            dst_fps = random.randrange(*self.fps_range)
+        scale = 1.0
+        if random.random() > 0.5:
+            scale = random.uniform(*self.scale_range)
+        dst_height = round(scale * src_height) // 2 * 2
+        dst_width = round(scale * src_width) // 2 * 2
+        crf = random.randrange(*self.crf_range)
+        tune = random.choice(self.tune_values)
+        return dst_fps, dst_height, dst_width, crf, tune
+    def __call__(self, track_path, src_fps, dst_fps, dst_height, dst_width, crf, tune):
+        out, err = (
+            ffmpeg
+                .input(os.path.join(track_path, '%d.png'), framerate=src_fps, start_number=0)
+                .filter('fps', fps=dst_fps)
+                .filter('scale', dst_width, dst_height)
+                .output('pipe:', format='h264', vcodec='libx264', crf=crf, tune=tune)
+                .run(capture_stdout=True, quiet=True)
+        )
+        out, err = (
+            ffmpeg
+                .input('pipe:', format='h264')
+                .output('pipe:', format='rawvideo', pix_fmt='rgb24')
+                .run(capture_stdout=True, input=out, quiet=True)
+        )
+        imgs = np.frombuffer(out, dtype=np.uint8).reshape(-1, dst_height, dst_width, 3)
+        return imgs
+class VisionTransform(ImageOnlyTransform):
+    def __init__(
+            self, transform, always_apply=False, p=1.0
+    ):
+        super(VisionTransform, self).__init__(always_apply, p)
+        self.transform = transform
+    def apply(self, image, **params):
+        return np.array(self.transform(Image.fromarray(image)))
+    def get_transform_init_args_names(self):
+        return ("transform")
+def set_global_seed(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+def prepare_cudnn(deterministic=None, benchmark=None):
+    # https://pytorch.org/docs/stable/notes/randomness.html#cudnn
+    if deterministic is None:
+        deterministic = os.environ.get("CUDNN_DETERMINISTIC", "True") == "True"
+    torch.backends.cudnn.deterministic = deterministic
+    # https://discuss.pytorch.org/t/how-should-i-disable-using-cudnn-in-my-code/38053/4
+    if benchmark is None:
+        benchmark = os.environ.get("CUDNN_BENCHMARK", "True") == "True"
+    torch.backends.cudnn.benchmark = benchmark
+def main():
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    set_global_seed(SEED)
+    prepare_cudnn(deterministic=True, benchmark=True)
+    model = EfficientNet.from_name('efficientnet-b7', override_params={'num_classes': 1})
+    state = torch.load(PRETRAINED_WEIGHTS_PATH, map_location=lambda storage, loc: storage)
+    state.pop('_fc.weight')
+    state.pop('_fc.bias')
+    res = model.load_state_dict(state, strict=False)
+    assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
+    for module in model.modules():
+        if isinstance(module, MBConvBlock):
+            if module._block_args.expand_ratio != 1:
+                expand_conv = module._expand_conv
+                seq_expand_conv = SeqExpandConv(expand_conv.in_channels, expand_conv.out_channels, len(TRAIN_INDICES))
+                seq_expand_conv.conv.weight.data[:, :, 0, :, :].copy_(expand_conv.weight.data / 3)
+                seq_expand_conv.conv.weight.data[:, :, 1, :, :].copy_(expand_conv.weight.data / 3)
+                seq_expand_conv.conv.weight.data[:, :, 2, :, :].copy_(expand_conv.weight.data / 3)
+                module._expand_conv = seq_expand_conv
+    model = model.cuda()
+    normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    _, rand_augment, _ = transforms_imagenet_train((CROP_HEIGHT, CROP_WIDTH), auto_augment='original-mstd0.5',
+                                                   separate=True)
+    train_dataset = TrackPairDataset(os.path.join(config['ARTIFACTS_PATH'], TRACKS_ROOT),
+                                     os.path.join(config['ARTIFACTS_PATH'], TRACK_PAIRS_FILE_NAME),
+                                     TRAIN_INDICES,
+                                     track_length=TRACK_LENGTH,
+                                     track_transform=TrackTransform(FPS_RANGE, SCALE_RANGE, CRF_RANGE, TUNE_VALUES),
+                                     image_transform=Compose([
+                                         SmallestMaxSize(MIN_SIZE),
+                                         HorizontalFlip(),
+                                         RandomCrop(CROP_HEIGHT, CROP_WIDTH),
+                                         VisionTransform(rand_augment, p=0.5),
+                                         normalize,
+                                         ToTensor()
+                                     ]), sequence_mode=True)
+    print('Train dataset size: {}.'.format(len(train_dataset)))
+    warmup_optimizer = torch.optim.SGD(model._fc.parameters(), INITIAL_LR, momentum=MOMENTUM,
+                                       weight_decay=WEIGHT_DECAY, nesterov=True)
+    full_optimizer = torch.optim.SGD(model.parameters(), INITIAL_LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY,
+                                     nesterov=True)
+    full_lr_scheduler = torch.optim.lr_scheduler.LambdaLR(full_optimizer,
+                                                          lambda iteration: (MAX_ITERS - iteration) / MAX_ITERS)
+    snapshots_root = os.path.join(config['ARTIFACTS_PATH'], SNAPSHOTS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(snapshots_root)
+    log_root = os.path.join(config['ARTIFACTS_PATH'], LOGS_ROOT, OUTPUT_FOLDER_NAME)
+    os.makedirs(log_root)
+    writer = SummaryWriter(log_root)
+    iteration = 0
+    if iteration < NUM_WARMUP_ITERATIONS:
+        print('Start {} warmup iterations'.format(NUM_WARMUP_ITERATIONS))
+        model.eval()
+        model._fc.train()
+        for param in model.parameters():
+            param.requires_grad = False
+        for param in model._fc.parameters():
+            param.requires_grad = True
+        optimizer = warmup_optimizer
+    else:
+        print('Start without warmup iterations')
+        model.train()
+        optimizer = full_optimizer
+    max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+    writer.add_scalar('train/max_lr', max_lr, iteration)
+    epoch = 0
+    fake_prob_dist = distributions.beta.Beta(0.5, 0.5)
+    while True:
+        epoch += 1
+        print('Epoch {} is in progress'.format(epoch))
+        loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
+        for samples in tqdm.tqdm(loader):
+            iteration += 1
+            fake_input_tensor = torch.stack(samples['fake']).transpose(0, 1).cuda()
+            real_input_tensor = torch.stack(samples['real']).transpose(0, 1).cuda()
+            target_fake_prob = fake_prob_dist.sample((len(fake_input_tensor),)).float().cuda()
+            fake_weight = target_fake_prob.view(-1, 1, 1, 1, 1)
+            input_tensor = (1.0 - fake_weight) * real_input_tensor + fake_weight * fake_input_tensor
+            pred = model(input_tensor.flatten(0, 1)).flatten()
+            loss = F.binary_cross_entropy_with_logits(pred, target_fake_prob.repeat_interleave(len(TRAIN_INDICES)))
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            if iteration > NUM_WARMUP_ITERATIONS:
+                full_lr_scheduler.step()
+                max_lr = max(param_group["lr"] for param_group in full_optimizer.param_groups)
+                writer.add_scalar('train/max_lr', max_lr, iteration)
+            writer.add_scalar('train/loss', loss.item(), iteration)
+            if iteration == NUM_WARMUP_ITERATIONS:
+                print('Stop warmup iterations')
+                model.train()
+                for param in model.parameters():
+                    param.requires_grad = True
+                optimizer = full_optimizer
+            if iteration % SNAPSHOT_FREQUENCY == 0:
+                snapshot_name = SNAPSHOT_NAME_TEMPLATE.format(iteration)
+                snapshot_path = os.path.join(snapshots_root, snapshot_name)
+                print('Saving snapshot to {}'.format(snapshot_path))
+                torch.save(model.state_dict(), snapshot_path)
+            if iteration >= MAX_ITERS:
+                print('Stop training due to maximum iteration exceeded')
+                return
+if __name__ == '__main__':
+    main()