Upload 415 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +8 -0
- README.md +121 -0
- Real-ESRGAN/.pre-commit-config.yaml +46 -0
- Real-ESRGAN/MANIFEST.in +8 -0
- Real-ESRGAN/README_CN.md +276 -0
- Real-ESRGAN/cog.yaml +22 -0
- Real-ESRGAN/cog_predict.py +148 -0
- Real-ESRGAN/experiments/pretrained_models/README.md +1 -0
- Real-ESRGAN/gfpgan/weights/detection_Resnet50_Final.pth +3 -0
- Real-ESRGAN/gfpgan/weights/parsing_parsenet.pth +3 -0
- Real-ESRGAN/inference_realesrgan.py +166 -0
- Real-ESRGAN/inference_realesrgan_video.py +398 -0
- Real-ESRGAN/inputs/00003.png +0 -0
- Real-ESRGAN/inputs/00017_gray.png +0 -0
- Real-ESRGAN/inputs/0014.jpg +0 -0
- Real-ESRGAN/inputs/0030.jpg +0 -0
- Real-ESRGAN/inputs/ADE_val_00000114.jpg +0 -0
- Real-ESRGAN/inputs/OST_009.png +0 -0
- Real-ESRGAN/inputs/children-alpha.png +0 -0
- Real-ESRGAN/inputs/tree_alpha_16bit.png +0 -0
- Real-ESRGAN/inputs/video/onepiece_demo.mp4 +0 -0
- Real-ESRGAN/inputs/wolf_gray.jpg +0 -0
- Real-ESRGAN/options/finetune_realesrgan_x4plus.yml +188 -0
- Real-ESRGAN/options/finetune_realesrgan_x4plus_pairdata.yml +150 -0
- Real-ESRGAN/options/train_realesrgan_x2plus.yml +186 -0
- Real-ESRGAN/options/train_realesrgan_x4plus.yml +185 -0
- Real-ESRGAN/options/train_realesrnet_x2plus.yml +145 -0
- Real-ESRGAN/options/train_realesrnet_x4plus.yml +144 -0
- Real-ESRGAN/realesrgan/__init__.py +6 -0
- Real-ESRGAN/realesrgan/__pycache__/__init__.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/__pycache__/utils.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/__pycache__/version.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/archs/__init__.py +10 -0
- Real-ESRGAN/realesrgan/archs/__pycache__/__init__.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/archs/__pycache__/discriminator_arch.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/archs/__pycache__/srvgg_arch.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/archs/discriminator_arch.py +67 -0
- Real-ESRGAN/realesrgan/archs/srvgg_arch.py +69 -0
- Real-ESRGAN/realesrgan/data/__init__.py +10 -0
- Real-ESRGAN/realesrgan/data/__pycache__/__init__.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_dataset.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_paired_dataset.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/data/realesrgan_dataset.py +192 -0
- Real-ESRGAN/realesrgan/data/realesrgan_paired_dataset.py +108 -0
- Real-ESRGAN/realesrgan/models/__init__.py +10 -0
- Real-ESRGAN/realesrgan/models/__pycache__/__init__.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/models/__pycache__/realesrgan_model.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/models/__pycache__/realesrnet_model.cpython-311.pyc +0 -0
- Real-ESRGAN/realesrgan/models/realesrgan_model.py +258 -0
- Real-ESRGAN/realesrgan/models/realesrnet_model.py +188 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
checkpoints/pretrained.state filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
examples/kennedy_hd.mkv filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
examples/mona_hd.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
examples/mona_hd.mkv filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
input_audios/part_000_RVC_1.wav filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
input_videos/MC.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
temp/result.avi filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
temp/temp.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Wav2Lip-HD: Improving Wav2Lip to achieve High-Fidelity Videos
|
| 2 |
+
|
| 3 |
+
This repository contains code for achieving high-fidelity lip-syncing in videos, using the [Wav2Lip algorithm](https://github.com/Rudrabha/Wav2Lip) for lip-syncing and the [Real-ESRGAN algorithm](https://github.com/xinntao/Real-ESRGAN) for super-resolution. The combination of these two algorithms allows for the creation of lip-synced videos that are both highly accurate and visually stunning.
|
| 4 |
+
|
| 5 |
+
## Algorithm
|
| 6 |
+
|
| 7 |
+
The algorithm for achieving high-fidelity lip-syncing with Wav2Lip and Real-ESRGAN can be summarized as follows:
|
| 8 |
+
|
| 9 |
+
1. The input video and audio are given to `Wav2Lip` algorithm.
|
| 10 |
+
2. Python script is written to extract frames from the video generated by wav2lip.
|
| 11 |
+
3. Frames are provided to Real-ESRGAN algorithm to improve quality.
|
| 12 |
+
4. Then, the high-quality frames are converted to video using ffmpeg, along with the original audio.
|
| 13 |
+
5. The result is a high-quality lip-syncing video.
|
| 14 |
+
6. The specific steps for running this algorithm are described in the [Testing Model](https://github.com/saifhassan/Wav2Lip-HD#testing-model) section of this README.
|
| 15 |
+
|
| 16 |
+
## Testing Model
|
| 17 |
+
|
| 18 |
+
To test the "Wav2Lip-HD" model, follow these steps:
|
| 19 |
+
|
| 20 |
+
1. Clone this repository and install requirements using following command (Make sure, Python and CUDA are already installed):
|
| 21 |
+
|
| 22 |
+
```
|
| 23 |
+
git clone https://github.com/saifhassan/Wav2Lip-HD.git
|
| 24 |
+
cd Wav2Lip-HD
|
| 25 |
+
pip install -r requirements.txt
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
2. Downloading weights
|
| 29 |
+
|
| 30 |
+
| Model | Directory | Download Link |
|
| 31 |
+
| :------------- |:-------------| :-----:|
|
| 32 |
+
| Wav2Lip | [checkpoints/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/checkpoints) | [Link](https://drive.google.com/drive/folders/1tB_uz-TYMePRMZzrDMdShWUZZ0JK3SIZ?usp=sharing) |
|
| 33 |
+
| ESRGAN | [experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/experiments/001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb/models) | [Link](https://drive.google.com/file/d/1Al8lEpnx2K-kDX7zL2DBcAuDnSKXACPb/view?usp=sharing) |
|
| 34 |
+
| Face_Detection | [face_detection/detection/sfd/](https://github.com/saifhassan/Wav2Lip-HD/tree/main/face_detection/detection/sfd) | [Link](https://drive.google.com/file/d/1uNLYCPFFmO-og3WSHyFytJQLLYOwH5uY/view?usp=sharing) |
|
| 35 |
+
| Real-ESRGAN | Real-ESRGAN/gfpgan/weights/ | [Link](https://drive.google.com/drive/folders/1BLx6aMpHgFt41fJ27_cRmT8bt53kVAYG?usp=sharing) |
|
| 36 |
+
| Real-ESRGAN | Real-ESRGAN/weights/ | [Link](https://drive.google.com/file/d/1qNIf8cJl_dQo3ivelPJVWFkApyEAGnLi/view?usp=sharing) |
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
3. Put input video to `input_videos` directory and input audio to `input_audios` directory.
|
| 40 |
+
4. Open `run_final.sh` file and modify following parameters:
|
| 41 |
+
|
| 42 |
+
`filename=kennedy` (just video file name without extension)
|
| 43 |
+
|
| 44 |
+
`input_audio=input_audios/ai.wav` (audio filename with extension)
|
| 45 |
+
|
| 46 |
+
5. Execute `run_final.sh` using following command:
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
bash run_final.sh
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
6. Outputs
|
| 53 |
+
|
| 54 |
+
- `output_videos_wav2lip` directory contains video output generated by wav2lip algorithm.
|
| 55 |
+
- `frames_wav2lip` directory contains frames extracted from video (generated by wav2lip algorithm).
|
| 56 |
+
- `frames_hd` directory contains frames after performing super-resolution using Real-ESRGAN algorithm.
|
| 57 |
+
- `output_videos_hd` directory contains final high quality video output generated by Wav2Lip-HD.
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
## Results
|
| 61 |
+
The results produced by Wav2Lip-HD are in two forms, one is frames and other is videos. Both are shared below:
|
| 62 |
+
|
| 63 |
+
### Example output frames </summary>
|
| 64 |
+
<table>
|
| 65 |
+
<tr>
|
| 66 |
+
<td>Frame by Wav2Lip</td>
|
| 67 |
+
<td>Optimized Frame</td>
|
| 68 |
+
</tr>
|
| 69 |
+
<tr>
|
| 70 |
+
<td><img src="examples/1_low.jpg" width=500></td>
|
| 71 |
+
<td><img src="examples/1_hd.jpg" width=500></td>
|
| 72 |
+
</tr>
|
| 73 |
+
<tr>
|
| 74 |
+
<td><img src="examples/kennedy_low.jpg" width=500></td>
|
| 75 |
+
<td><img src="examples/kennedy_hd.jpg" width=500></td>
|
| 76 |
+
</tr>
|
| 77 |
+
|
| 78 |
+
</tr>
|
| 79 |
+
<tr>
|
| 80 |
+
<td><img src="examples/mona_low.jpg" width=500></td>
|
| 81 |
+
<td><img src="examples/mona_hd.jpg" width=500></td>
|
| 82 |
+
</tr>
|
| 83 |
+
</table>
|
| 84 |
+
</Details>
|
| 85 |
+
|
| 86 |
+
### Example output videos
|
| 87 |
+
|
| 88 |
+
| Video by Wav2Lip | Optimized Video |
|
| 89 |
+
| ------------- | ------------- |
|
| 90 |
+
| <video src="https://user-images.githubusercontent.com/11873763/229389410-56d96244-8c67-4add-a43e-a4900aa9db88.mp4" width="500"> | <video src="https://user-images.githubusercontent.com/11873763/229389414-d5cb6d33-7772-47a7-b829-9e3d5c3945a1.mp4" width="500">|
|
| 91 |
+
| <video src="https://user-images.githubusercontent.com/11873763/229389751-507669f1-7772-4863-ab23-8df7f206a065.mp4" width="500"> | <video src="https://user-images.githubusercontent.com/11873763/229389962-5373b765-ce3a-4af2-bd6a-8be8543ee933.mp4" width="500">|
|
| 92 |
+
|
| 93 |
+
## Acknowledgements
|
| 94 |
+
|
| 95 |
+
We would like to thank the following repositories and libraries for their contributions to our work:
|
| 96 |
+
|
| 97 |
+
1. The [Wav2Lip](https://github.com/Rudrabha/Wav2Lip) repository, which is the core model of our algorithm that performs lip-sync.
|
| 98 |
+
2. The [face-parsing.PyTorch](https://github.com/zllrunning/face-parsing.PyTorch) repository, which provides us with a model for face segmentation.
|
| 99 |
+
3. The [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) repository, which provides the super resolution component for our algorithm.
|
| 100 |
+
4. [ffmpeg](https://ffmpeg.org), which we use for converting frames to video.
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
|
Real-ESRGAN/.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
# flake8
|
| 3 |
+
- repo: https://github.com/PyCQA/flake8
|
| 4 |
+
rev: 3.8.3
|
| 5 |
+
hooks:
|
| 6 |
+
- id: flake8
|
| 7 |
+
args: ["--config=setup.cfg", "--ignore=W504, W503"]
|
| 8 |
+
|
| 9 |
+
# modify known_third_party
|
| 10 |
+
- repo: https://github.com/asottile/seed-isort-config
|
| 11 |
+
rev: v2.2.0
|
| 12 |
+
hooks:
|
| 13 |
+
- id: seed-isort-config
|
| 14 |
+
|
| 15 |
+
# isort
|
| 16 |
+
- repo: https://github.com/timothycrosley/isort
|
| 17 |
+
rev: 5.2.2
|
| 18 |
+
hooks:
|
| 19 |
+
- id: isort
|
| 20 |
+
|
| 21 |
+
# yapf
|
| 22 |
+
- repo: https://github.com/pre-commit/mirrors-yapf
|
| 23 |
+
rev: v0.30.0
|
| 24 |
+
hooks:
|
| 25 |
+
- id: yapf
|
| 26 |
+
|
| 27 |
+
# codespell
|
| 28 |
+
- repo: https://github.com/codespell-project/codespell
|
| 29 |
+
rev: v2.1.0
|
| 30 |
+
hooks:
|
| 31 |
+
- id: codespell
|
| 32 |
+
|
| 33 |
+
# pre-commit-hooks
|
| 34 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 35 |
+
rev: v3.2.0
|
| 36 |
+
hooks:
|
| 37 |
+
- id: trailing-whitespace # Trim trailing whitespace
|
| 38 |
+
- id: check-yaml # Attempt to load all yaml files to verify syntax
|
| 39 |
+
- id: check-merge-conflict # Check for files that contain merge conflict strings
|
| 40 |
+
- id: double-quote-string-fixer # Replace double quoted strings with single quoted strings
|
| 41 |
+
- id: end-of-file-fixer # Make sure files end in a newline and only a newline
|
| 42 |
+
- id: requirements-txt-fixer # Sort entries in requirements.txt and remove incorrect entry for pkg-resources==0.0.0
|
| 43 |
+
- id: fix-encoding-pragma # Remove the coding pragma: # -*- coding: utf-8 -*-
|
| 44 |
+
args: ["--remove"]
|
| 45 |
+
- id: mixed-line-ending # Replace or check mixed line ending
|
| 46 |
+
args: ["--fix=lf"]
|
Real-ESRGAN/MANIFEST.in
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
include assets/*
|
| 2 |
+
include inputs/*
|
| 3 |
+
include scripts/*.py
|
| 4 |
+
include inference_realesrgan.py
|
| 5 |
+
include VERSION
|
| 6 |
+
include LICENSE
|
| 7 |
+
include requirements.txt
|
| 8 |
+
include weights/README.md
|
Real-ESRGAN/README_CN.md
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<p align="center">
|
| 2 |
+
<img src="assets/realesrgan_logo.png" height=120>
|
| 3 |
+
</p>
|
| 4 |
+
|
| 5 |
+
## <div align="center"><b><a href="README.md">English</a> | <a href="README_CN.md">简体中文</a></b></div>
|
| 6 |
+
|
| 7 |
+
[](https://github.com/xinntao/Real-ESRGAN/releases)
|
| 8 |
+
[](https://pypi.org/project/realesrgan/)
|
| 9 |
+
[](https://github.com/xinntao/Real-ESRGAN/issues)
|
| 10 |
+
[](https://github.com/xinntao/Real-ESRGAN/issues)
|
| 11 |
+
[](https://github.com/xinntao/Real-ESRGAN/blob/master/LICENSE)
|
| 12 |
+
[](https://github.com/xinntao/Real-ESRGAN/blob/master/.github/workflows/pylint.yml)
|
| 13 |
+
[](https://github.com/xinntao/Real-ESRGAN/blob/master/.github/workflows/publish-pip.yml)
|
| 14 |
+
|
| 15 |
+
:fire: 更新动漫视频的小模型 **RealESRGAN AnimeVideo-v3**. 更多信息在 [[动漫视频模型介绍](docs/anime_video_model.md)] 和 [[比较](docs/anime_comparisons_CN.md)] 中.
|
| 16 |
+
|
| 17 |
+
1. Real-ESRGAN的[Colab Demo](https://colab.research.google.com/drive/1k2Zod6kSHEvraybHl50Lys0LerhyTMCo?usp=sharing) | Real-ESRGAN**动漫视频** 的[Colab Demo](https://colab.research.google.com/drive/1yNl9ORUxxlL4N0keJa2SEPB61imPQd1B?usp=sharing)
|
| 18 |
+
2. **支持Intel/AMD/Nvidia显卡**的绿色版exe文件: [Windows版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip) / [Linux版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-ubuntu.zip) / [macOS版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-macos.zip),详情请移步[这里](#便携版(绿色版)可执行文件)。NCNN的实现在 [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan)。
|
| 19 |
+
|
| 20 |
+
Real-ESRGAN 的目标是开发出**实用的图像/视频修复算法**。<br>
|
| 21 |
+
我们在 ESRGAN 的基础上使用纯合成的数据来进行训练,以使其能被应用于实际的图片修复的场景(顾名思义:Real-ESRGAN)。
|
| 22 |
+
|
| 23 |
+
:art: Real-ESRGAN 需要,也很欢迎你的贡献,如新功能、模型、bug修复、建议、维护等等。详情可以查看[CONTRIBUTING.md](docs/CONTRIBUTING.md),所有的贡献者都会被列在[此处](README_CN.md#hugs-感谢)。
|
| 24 |
+
|
| 25 |
+
:milky_way: 感谢大家提供了很好的反馈。这些反馈会逐步更新在 [这个文档](docs/feedback.md)。
|
| 26 |
+
|
| 27 |
+
:question: 常见的问题可以在[FAQ.md](docs/FAQ.md)中找到答案。(好吧,现在还是空白的=-=||)
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
如果 Real-ESRGAN 对你有帮助,可以给本项目一个 Star :star: ,或者推荐给你的朋友们,谢谢!:blush: <br/>
|
| 32 |
+
其他推荐的项目:<br/>
|
| 33 |
+
:arrow_forward: [GFPGAN](https://github.com/TencentARC/GFPGAN): 实用的人脸复原算法 <br>
|
| 34 |
+
:arrow_forward: [BasicSR](https://github.com/xinntao/BasicSR): 开源的图像和视频工具箱<br>
|
| 35 |
+
:arrow_forward: [facexlib](https://github.com/xinntao/facexlib): 提供与人脸相关的工具箱<br>
|
| 36 |
+
:arrow_forward: [HandyView](https://github.com/xinntao/HandyView): 基于PyQt5的图片查看器,方便查看以及比较 <br>
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
<!---------------------------------- Updates --------------------------->
|
| 41 |
+
<details>
|
| 42 |
+
<summary>🚩<b>更新</b></summary>
|
| 43 |
+
|
| 44 |
+
- ✅ 更新动漫视频的小模型 **RealESRGAN AnimeVideo-v3**. 更多信息在 [anime video models](docs/anime_video_model.md) 和 [comparisons](docs/anime_comparisons.md)中.
|
| 45 |
+
- ✅ 添加了针对动漫视频的小模型, 更多信息在 [anime video models](docs/anime_video_model.md) 中.
|
| 46 |
+
- ✅ 添加了ncnn 实现:[Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan).
|
| 47 |
+
- ✅ 添加了 [*RealESRGAN_x4plus_anime_6B.pth*](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth),对二次元图片进行了优化,并减少了model的大小。详情 以及 与[waifu2x](https://github.com/nihui/waifu2x-ncnn-vulkan)的对比请查看[**anime_model.md**](docs/anime_model.md)
|
| 48 |
+
- ✅支持用户在自己的数据上进行微调 (finetune):[详情](docs/Training.md#Finetune-Real-ESRGAN-on-your-own-dataset)
|
| 49 |
+
- ✅ 支持使用[GFPGAN](https://github.com/TencentARC/GFPGAN)**增强人脸**
|
| 50 |
+
- ✅ 通过[Gradio](https://github.com/gradio-app/gradio)添加到了[Huggingface Spaces](https://huggingface.co/spaces)(一个机器学习应用的在线平台):[Gradio在线版](https://huggingface.co/spaces/akhaliq/Real-ESRGAN)。感谢[@AK391](https://github.com/AK391)
|
| 51 |
+
- ✅ 支持任意比例的缩放:`--outscale`(实际上使用`LANCZOS4`来更进一步调整输出图像的尺寸)。添加了*RealESRGAN_x2plus.pth*模型
|
| 52 |
+
- ✅ [推断脚本](inference_realesrgan.py)支持: 1) 分块处理**tile**; 2) 带**alpha通道**的图像; 3) **灰色**图像; 4) **16-bit**图像.
|
| 53 |
+
- ✅ 训练代码已经发布,具体做法可查看:[Training.md](docs/Training.md)。
|
| 54 |
+
|
| 55 |
+
</details>
|
| 56 |
+
|
| 57 |
+
<!---------------------------------- Projects that use Real-ESRGAN --------------------------->
|
| 58 |
+
<details>
|
| 59 |
+
<summary>🧩<b>使用Real-ESRGAN的项目</b></summary>
|
| 60 |
+
|
| 61 |
+
👋 如果你开发/使用/集成了Real-ESRGAN, 欢迎联系我添加
|
| 62 |
+
|
| 63 |
+
- NCNN-Android: [RealSR-NCNN-Android](https://github.com/tumuyan/RealSR-NCNN-Android) by [tumuyan](https://github.com/tumuyan)
|
| 64 |
+
- VapourSynth: [vs-realesrgan](https://github.com/HolyWu/vs-realesrgan) by [HolyWu](https://github.com/HolyWu)
|
| 65 |
+
- NCNN: [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan)
|
| 66 |
+
|
| 67 |
+
**易用的图形界面**
|
| 68 |
+
|
| 69 |
+
- [Waifu2x-Extension-GUI](https://github.com/AaronFeng753/Waifu2x-Extension-GUI) by [AaronFeng753](https://github.com/AaronFeng753)
|
| 70 |
+
- [Squirrel-RIFE](https://github.com/Justin62628/Squirrel-RIFE) by [Justin62628](https://github.com/Justin62628)
|
| 71 |
+
- [Real-GUI](https://github.com/scifx/Real-GUI) by [scifx](https://github.com/scifx)
|
| 72 |
+
- [Real-ESRGAN_GUI](https://github.com/net2cn/Real-ESRGAN_GUI) by [net2cn](https://github.com/net2cn)
|
| 73 |
+
- [Real-ESRGAN-EGUI](https://github.com/WGzeyu/Real-ESRGAN-EGUI) by [WGzeyu](https://github.com/WGzeyu)
|
| 74 |
+
- [anime_upscaler](https://github.com/shangar21/anime_upscaler) by [shangar21](https://github.com/shangar21)
|
| 75 |
+
- [RealESRGAN-GUI](https://github.com/Baiyuetribe/paper2gui/blob/main/Video%20Super%20Resolution/RealESRGAN-GUI.md) by [Baiyuetribe](https://github.com/Baiyuetribe)
|
| 76 |
+
|
| 77 |
+
</details>
|
| 78 |
+
|
| 79 |
+
<details>
|
| 80 |
+
<summary>👀<b>Demo视频(B站)</b></summary>
|
| 81 |
+
|
| 82 |
+
- [大闹天宫片段](https://www.bilibili.com/video/BV1ja41117zb)
|
| 83 |
+
|
| 84 |
+
</details>
|
| 85 |
+
|
| 86 |
+
### :book: Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data
|
| 87 |
+
|
| 88 |
+
> [[论文](https://arxiv.org/abs/2107.10833)]   [项目主页]   [[YouTube 视频](https://www.youtube.com/watch?v=fxHWoDSSvSc)]   [[B站视频](https://www.bilibili.com/video/BV1H34y1m7sS/)]   [[Poster](https://xinntao.github.io/projects/RealESRGAN_src/RealESRGAN_poster.pdf)]   [[PPT](https://docs.google.com/presentation/d/1QtW6Iy8rm8rGLsJ0Ldti6kP-7Qyzy6XL/edit?usp=sharing&ouid=109799856763657548160&rtpof=true&sd=true)]<br>
|
| 89 |
+
> [Xintao Wang](https://xinntao.github.io/), Liangbin Xie, [Chao Dong](https://scholar.google.com.hk/citations?user=OSDCB0UAAAAJ), [Ying Shan](https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=en) <br>
|
| 90 |
+
> Tencent ARC Lab; Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences
|
| 91 |
+
|
| 92 |
+
<p align="center">
|
| 93 |
+
<img src="assets/teaser.jpg">
|
| 94 |
+
</p>
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
我们提供了一套训练好的模型(*RealESRGAN_x4plus.pth*),可以进行4倍的超分辨率。<br>
|
| 99 |
+
**现在的 Real-ESRGAN 还是有几率失败的,因为现实生活的降质过程比较复杂。**<br>
|
| 100 |
+
而且,本项目对**人脸以及文字之类**的效果还不是太好,但是我们会持续进行优化的。<br>
|
| 101 |
+
|
| 102 |
+
Real-ESRGAN 将会被长期支持,我会在空闲的时间中持续维护更新。
|
| 103 |
+
|
| 104 |
+
这些是未来计划的几个新功能:
|
| 105 |
+
|
| 106 |
+
- [ ] 优化人脸
|
| 107 |
+
- [ ] 优化文字
|
| 108 |
+
- [x] 优化动画图像
|
| 109 |
+
- [ ] 支持更多的超分辨率比例
|
| 110 |
+
- [ ] 可调节的复原
|
| 111 |
+
|
| 112 |
+
如果你有好主意或需求,欢迎在 issue 或 discussion 中提出。<br/>
|
| 113 |
+
如果你有一些 Real-ESRGAN 中有问题的照片,你也可以在 issue 或者 discussion 中发出来。我会留意(但是不一定能解决:stuck_out_tongue:)。如果有必要的话,我还会专门开一页来记录那些有待解决的图像。
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
### 便携版(绿色版)可执行文件
|
| 118 |
+
|
| 119 |
+
你可以下载**支持Intel/AMD/Nvidia显卡**的绿色版exe文件: [Windows版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip) / [Linux版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-ubuntu.zip) / [macOS版](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-macos.zip)。
|
| 120 |
+
|
| 121 |
+
绿色版指的是这些exe你可以直接运行(放U盘里拷走都没问题),因为里面已经有所需的文件和模型了。它不需要 CUDA 或者 PyTorch运行环境。<br>
|
| 122 |
+
|
| 123 |
+
你可以通过下面这个命令来运行(Windows版本的例子,更多信息请查看对应版本的README.md):
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
./realesrgan-ncnn-vulkan.exe -i 输入图像.jpg -o 输出图像.png -n 模型名字
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
我们提供了五种模型:
|
| 130 |
+
|
| 131 |
+
1. realesrgan-x4plus(默认)
|
| 132 |
+
2. reaesrnet-x4plus
|
| 133 |
+
3. realesrgan-x4plus-anime(针对动漫插画图像优化,有更小的体积)
|
| 134 |
+
4. realesr-animevideov3 (针对动漫视频)
|
| 135 |
+
|
| 136 |
+
你可以通过`-n`参数来使用其他模型,例如`./realesrgan-ncnn-vulkan.exe -i 二次元图片.jpg -o 二刺螈图片.png -n realesrgan-x4plus-anime`
|
| 137 |
+
|
| 138 |
+
### 可执行文件的用法
|
| 139 |
+
|
| 140 |
+
1. 更多细节可以参考 [Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan#computer-usages).
|
| 141 |
+
2. 注意:可执行文件并没有支持 python 脚本 `inference_realesrgan.py` 中所有的功能,比如 `outscale` 选项) .
|
| 142 |
+
|
| 143 |
+
```console
|
| 144 |
+
Usage: realesrgan-ncnn-vulkan.exe -i infile -o outfile [options]...
|
| 145 |
+
|
| 146 |
+
-h show this help
|
| 147 |
+
-i input-path input image path (jpg/png/webp) or directory
|
| 148 |
+
-o output-path output image path (jpg/png/webp) or directory
|
| 149 |
+
-s scale upscale ratio (can be 2, 3, 4. default=4)
|
| 150 |
+
-t tile-size tile size (>=32/0=auto, default=0) can be 0,0,0 for multi-gpu
|
| 151 |
+
-m model-path folder path to the pre-trained models. default=models
|
| 152 |
+
-n model-name model name (default=realesr-animevideov3, can be realesr-animevideov3 | realesrgan-x4plus | realesrgan-x4plus-anime | realesrnet-x4plus)
|
| 153 |
+
-g gpu-id gpu device to use (default=auto) can be 0,1,2 for multi-gpu
|
| 154 |
+
-j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu
|
| 155 |
+
-x enable tta mode"
|
| 156 |
+
-f format output image format (jpg/png/webp, default=ext/png)
|
| 157 |
+
-v verbose output
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
由于这些exe文件会把图像分成几个板块,然后来分别进行处理,再合成导出,输出的图像可能会有一点割裂感(而且可能跟PyTorch的输出不太一样)
|
| 161 |
+
|
| 162 |
+
---
|
| 163 |
+
|
| 164 |
+
## :wrench: 依赖以及安装
|
| 165 |
+
|
| 166 |
+
- Python >= 3.7 (推荐使用[Anaconda](https://www.anaconda.com/download/#linux)或[Miniconda](https://docs.conda.io/en/latest/miniconda.html))
|
| 167 |
+
- [PyTorch >= 1.7](https://pytorch.org/)
|
| 168 |
+
|
| 169 |
+
#### 安装
|
| 170 |
+
|
| 171 |
+
1. 把项目克隆到本地
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
git clone https://github.com/xinntao/Real-ESRGAN.git
|
| 175 |
+
cd Real-ESRGAN
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
2. 安装各种依赖
|
| 179 |
+
|
| 180 |
+
```bash
|
| 181 |
+
# 安装 basicsr - https://github.com/xinntao/BasicSR
|
| 182 |
+
# 我们使用BasicSR来训练以及推断
|
| 183 |
+
pip install basicsr
|
| 184 |
+
# facexlib和gfpgan是用来增强人脸的
|
| 185 |
+
pip install facexlib
|
| 186 |
+
pip install gfpgan
|
| 187 |
+
pip install -r requirements.txt
|
| 188 |
+
python setup.py develop
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
## :zap: 快速上手
|
| 192 |
+
|
| 193 |
+
### 普通图片
|
| 194 |
+
|
| 195 |
+
下载我们训练好的模型: [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth)
|
| 196 |
+
|
| 197 |
+
```bash
|
| 198 |
+
wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P weights
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
推断!
|
| 202 |
+
|
| 203 |
+
```bash
|
| 204 |
+
python inference_realesrgan.py -n RealESRGAN_x4plus -i inputs --face_enhance
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
结果在`results`文件夹
|
| 208 |
+
|
| 209 |
+
### 动画图片
|
| 210 |
+
|
| 211 |
+
<p align="center">
|
| 212 |
+
<img src="https://raw.githubusercontent.com/xinntao/public-figures/master/Real-ESRGAN/cmp_realesrgan_anime_1.png">
|
| 213 |
+
</p>
|
| 214 |
+
|
| 215 |
+
训练好的模型: [RealESRGAN_x4plus_anime_6B](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth)<br>
|
| 216 |
+
有关[waifu2x](https://github.com/nihui/waifu2x-ncnn-vulkan)的更多信息和对比在[**anime_model.md**](docs/anime_model.md)中。
|
| 217 |
+
|
| 218 |
+
```bash
|
| 219 |
+
# 下载模型
|
| 220 |
+
wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P weights
|
| 221 |
+
# 推断
|
| 222 |
+
python inference_realesrgan.py -n RealESRGAN_x4plus_anime_6B -i inputs
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
结果在`results`文件夹
|
| 226 |
+
|
| 227 |
+
### Python 脚本的用法
|
| 228 |
+
|
| 229 |
+
1. 虽然你使用了 X4 模型,但是你可以 **输出任意尺寸比例的图片**,只要实用了 `outscale` 参数. 程序会进一步对模型的输出图像进行缩放。
|
| 230 |
+
|
| 231 |
+
```console
|
| 232 |
+
Usage: python inference_realesrgan.py -n RealESRGAN_x4plus -i infile -o outfile [options]...
|
| 233 |
+
|
| 234 |
+
A common command: python inference_realesrgan.py -n RealESRGAN_x4plus -i infile --outscale 3.5 --face_enhance
|
| 235 |
+
|
| 236 |
+
-h show this help
|
| 237 |
+
-i --input Input image or folder. Default: inputs
|
| 238 |
+
-o --output Output folder. Default: results
|
| 239 |
+
-n --model_name Model name. Default: RealESRGAN_x4plus
|
| 240 |
+
-s, --outscale The final upsampling scale of the image. Default: 4
|
| 241 |
+
--suffix Suffix of the restored image. Default: out
|
| 242 |
+
-t, --tile Tile size, 0 for no tile during testing. Default: 0
|
| 243 |
+
--face_enhance Whether to use GFPGAN to enhance face. Default: False
|
| 244 |
+
--fp32 Whether to use half precision during inference. Default: False
|
| 245 |
+
--ext Image extension. Options: auto | jpg | png, auto means using the same extension as inputs. Default: auto
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
## :european_castle: 模型库
|
| 249 |
+
|
| 250 |
+
请参见 [docs/model_zoo.md](docs/model_zoo.md)
|
| 251 |
+
|
| 252 |
+
## :computer: 训练,在你的数据上微调(Fine-tune)
|
| 253 |
+
|
| 254 |
+
这里有一份详细的指南:[Training.md](docs/Training.md).
|
| 255 |
+
|
| 256 |
+
## BibTeX 引用
|
| 257 |
+
|
| 258 |
+
@Article{wang2021realesrgan,
|
| 259 |
+
title={Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data},
|
| 260 |
+
author={Xintao Wang and Liangbin Xie and Chao Dong and Ying Shan},
|
| 261 |
+
journal={arXiv:2107.10833},
|
| 262 |
+
year={2021}
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
## :e-mail: 联系我们
|
| 266 |
+
|
| 267 |
+
如果你有任何问题,请通过 `xintao.wang@outlook.com` 或 `xintaowang@tencent.com` 联系我们。
|
| 268 |
+
|
| 269 |
+
## :hugs: 感谢
|
| 270 |
+
|
| 271 |
+
感谢所有的贡献者大大们~
|
| 272 |
+
|
| 273 |
+
- [AK391](https://github.com/AK391): 通过[Gradio](https://github.com/gradio-app/gradio)添加到了[Huggingface Spaces](https://huggingface.co/spaces)(一个机器学习应用的在线平台):[Gradio在线版](https://huggingface.co/spaces/akhaliq/Real-ESRGAN)。
|
| 274 |
+
- [Asiimoviet](https://github.com/Asiimoviet): 把 README.md 文档 翻译成了中文。
|
| 275 |
+
- [2ji3150](https://github.com/2ji3150): 感谢详尽并且富有价值的[反馈、建议](https://github.com/xinntao/Real-ESRGAN/issues/131).
|
| 276 |
+
- [Jared-02](https://github.com/Jared-02): 把 Training.md 文档 翻译成了中文。
|
Real-ESRGAN/cog.yaml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is used for constructing replicate env
|
| 2 |
+
image: "r8.im/tencentarc/realesrgan"
|
| 3 |
+
|
| 4 |
+
build:
|
| 5 |
+
gpu: true
|
| 6 |
+
python_version: "3.8"
|
| 7 |
+
system_packages:
|
| 8 |
+
- "libgl1-mesa-glx"
|
| 9 |
+
- "libglib2.0-0"
|
| 10 |
+
python_packages:
|
| 11 |
+
- "torch==1.7.1"
|
| 12 |
+
- "torchvision==0.8.2"
|
| 13 |
+
- "numpy==1.21.1"
|
| 14 |
+
- "lmdb==1.2.1"
|
| 15 |
+
- "opencv-python==4.5.3.56"
|
| 16 |
+
- "PyYAML==5.4.1"
|
| 17 |
+
- "tqdm==4.62.2"
|
| 18 |
+
- "yapf==0.31.0"
|
| 19 |
+
- "basicsr==1.4.2"
|
| 20 |
+
- "facexlib==0.2.5"
|
| 21 |
+
|
| 22 |
+
predict: "cog_predict.py:Predictor"
|
Real-ESRGAN/cog_predict.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa
|
| 2 |
+
# This file is used for deploying replicate models
|
| 3 |
+
# running: cog predict -i img=@inputs/00017_gray.png -i version='General - v3' -i scale=2 -i face_enhance=True -i tile=0
|
| 4 |
+
# push: cog push r8.im/xinntao/realesrgan
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
os.system('pip install gfpgan')
|
| 9 |
+
os.system('python setup.py develop')
|
| 10 |
+
|
| 11 |
+
import cv2
|
| 12 |
+
import shutil
|
| 13 |
+
import tempfile
|
| 14 |
+
import torch
|
| 15 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 16 |
+
from basicsr.archs.srvgg_arch import SRVGGNetCompact
|
| 17 |
+
|
| 18 |
+
from realesrgan.utils import RealESRGANer
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
from cog import BasePredictor, Input, Path
|
| 22 |
+
from gfpgan import GFPGANer
|
| 23 |
+
except Exception:
|
| 24 |
+
print('please install cog and realesrgan package')
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class Predictor(BasePredictor):
|
| 28 |
+
|
| 29 |
+
def setup(self):
|
| 30 |
+
os.makedirs('output', exist_ok=True)
|
| 31 |
+
# download weights
|
| 32 |
+
if not os.path.exists('weights/realesr-general-x4v3.pth'):
|
| 33 |
+
os.system(
|
| 34 |
+
'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth -P ./weights'
|
| 35 |
+
)
|
| 36 |
+
if not os.path.exists('weights/GFPGANv1.4.pth'):
|
| 37 |
+
os.system('wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P ./weights')
|
| 38 |
+
if not os.path.exists('weights/RealESRGAN_x4plus.pth'):
|
| 39 |
+
os.system(
|
| 40 |
+
'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P ./weights'
|
| 41 |
+
)
|
| 42 |
+
if not os.path.exists('weights/RealESRGAN_x4plus_anime_6B.pth'):
|
| 43 |
+
os.system(
|
| 44 |
+
'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P ./weights'
|
| 45 |
+
)
|
| 46 |
+
if not os.path.exists('weights/realesr-animevideov3.pth'):
|
| 47 |
+
os.system(
|
| 48 |
+
'wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth -P ./weights'
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
def choose_model(self, scale, version, tile=0):
|
| 52 |
+
half = True if torch.cuda.is_available() else False
|
| 53 |
+
if version == 'General - RealESRGANplus':
|
| 54 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 55 |
+
model_path = 'weights/RealESRGAN_x4plus.pth'
|
| 56 |
+
self.upsampler = RealESRGANer(
|
| 57 |
+
scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
|
| 58 |
+
elif version == 'General - v3':
|
| 59 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
|
| 60 |
+
model_path = 'weights/realesr-general-x4v3.pth'
|
| 61 |
+
self.upsampler = RealESRGANer(
|
| 62 |
+
scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
|
| 63 |
+
elif version == 'Anime - anime6B':
|
| 64 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
|
| 65 |
+
model_path = 'weights/RealESRGAN_x4plus_anime_6B.pth'
|
| 66 |
+
self.upsampler = RealESRGANer(
|
| 67 |
+
scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
|
| 68 |
+
elif version == 'AnimeVideo - v3':
|
| 69 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
|
| 70 |
+
model_path = 'weights/realesr-animevideov3.pth'
|
| 71 |
+
self.upsampler = RealESRGANer(
|
| 72 |
+
scale=4, model_path=model_path, model=model, tile=tile, tile_pad=10, pre_pad=0, half=half)
|
| 73 |
+
|
| 74 |
+
self.face_enhancer = GFPGANer(
|
| 75 |
+
model_path='weights/GFPGANv1.4.pth',
|
| 76 |
+
upscale=scale,
|
| 77 |
+
arch='clean',
|
| 78 |
+
channel_multiplier=2,
|
| 79 |
+
bg_upsampler=self.upsampler)
|
| 80 |
+
|
| 81 |
+
def predict(
|
| 82 |
+
self,
|
| 83 |
+
img: Path = Input(description='Input'),
|
| 84 |
+
version: str = Input(
|
| 85 |
+
description='RealESRGAN version. Please see [Readme] below for more descriptions',
|
| 86 |
+
choices=['General - RealESRGANplus', 'General - v3', 'Anime - anime6B', 'AnimeVideo - v3'],
|
| 87 |
+
default='General - v3'),
|
| 88 |
+
scale: float = Input(description='Rescaling factor', default=2),
|
| 89 |
+
face_enhance: bool = Input(
|
| 90 |
+
description='Enhance faces with GFPGAN. Note that it does not work for anime images/vidoes', default=False),
|
| 91 |
+
tile: int = Input(
|
| 92 |
+
description=
|
| 93 |
+
'Tile size. Default is 0, that is no tile. When encountering the out-of-GPU-memory issue, please specify it, e.g., 400 or 200',
|
| 94 |
+
default=0)
|
| 95 |
+
) -> Path:
|
| 96 |
+
if tile <= 100 or tile is None:
|
| 97 |
+
tile = 0
|
| 98 |
+
print(f'img: {img}. version: {version}. scale: {scale}. face_enhance: {face_enhance}. tile: {tile}.')
|
| 99 |
+
try:
|
| 100 |
+
extension = os.path.splitext(os.path.basename(str(img)))[1]
|
| 101 |
+
img = cv2.imread(str(img), cv2.IMREAD_UNCHANGED)
|
| 102 |
+
if len(img.shape) == 3 and img.shape[2] == 4:
|
| 103 |
+
img_mode = 'RGBA'
|
| 104 |
+
elif len(img.shape) == 2:
|
| 105 |
+
img_mode = None
|
| 106 |
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
| 107 |
+
else:
|
| 108 |
+
img_mode = None
|
| 109 |
+
|
| 110 |
+
h, w = img.shape[0:2]
|
| 111 |
+
if h < 300:
|
| 112 |
+
img = cv2.resize(img, (w * 2, h * 2), interpolation=cv2.INTER_LANCZOS4)
|
| 113 |
+
|
| 114 |
+
self.choose_model(scale, version, tile)
|
| 115 |
+
|
| 116 |
+
try:
|
| 117 |
+
if face_enhance:
|
| 118 |
+
_, _, output = self.face_enhancer.enhance(
|
| 119 |
+
img, has_aligned=False, only_center_face=False, paste_back=True)
|
| 120 |
+
else:
|
| 121 |
+
output, _ = self.upsampler.enhance(img, outscale=scale)
|
| 122 |
+
except RuntimeError as error:
|
| 123 |
+
print('Error', error)
|
| 124 |
+
print('If you encounter CUDA out of memory, try to set "tile" to a smaller size, e.g., 400.')
|
| 125 |
+
|
| 126 |
+
if img_mode == 'RGBA': # RGBA images should be saved in png format
|
| 127 |
+
extension = 'png'
|
| 128 |
+
# save_path = f'output/out.{extension}'
|
| 129 |
+
# cv2.imwrite(save_path, output)
|
| 130 |
+
out_path = Path(tempfile.mkdtemp()) / f'out.{extension}'
|
| 131 |
+
cv2.imwrite(str(out_path), output)
|
| 132 |
+
except Exception as error:
|
| 133 |
+
print('global exception: ', error)
|
| 134 |
+
finally:
|
| 135 |
+
clean_folder('output')
|
| 136 |
+
return out_path
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def clean_folder(folder):
|
| 140 |
+
for filename in os.listdir(folder):
|
| 141 |
+
file_path = os.path.join(folder, filename)
|
| 142 |
+
try:
|
| 143 |
+
if os.path.isfile(file_path) or os.path.islink(file_path):
|
| 144 |
+
os.unlink(file_path)
|
| 145 |
+
elif os.path.isdir(file_path):
|
| 146 |
+
shutil.rmtree(file_path)
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f'Failed to delete {file_path}. Reason: {e}')
|
Real-ESRGAN/experiments/pretrained_models/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Put downloaded pre-trained models here
|
Real-ESRGAN/gfpgan/weights/detection_Resnet50_Final.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
|
| 3 |
+
size 109497761
|
Real-ESRGAN/gfpgan/weights/parsing_parsenet.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
|
| 3 |
+
size 85331193
|
Real-ESRGAN/inference_realesrgan.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import cv2
|
| 3 |
+
import glob
|
| 4 |
+
import os
|
| 5 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 6 |
+
from basicsr.utils.download_util import load_file_from_url
|
| 7 |
+
|
| 8 |
+
from realesrgan import RealESRGANer
|
| 9 |
+
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def main():
|
| 13 |
+
"""Inference demo for Real-ESRGAN.
|
| 14 |
+
"""
|
| 15 |
+
parser = argparse.ArgumentParser()
|
| 16 |
+
parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
'-n',
|
| 19 |
+
'--model_name',
|
| 20 |
+
type=str,
|
| 21 |
+
default='RealESRGAN_x4plus',
|
| 22 |
+
help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
|
| 23 |
+
'realesr-animevideov3 | realesr-general-x4v3'))
|
| 24 |
+
parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
|
| 25 |
+
parser.add_argument(
|
| 26 |
+
'-dn',
|
| 27 |
+
'--denoise_strength',
|
| 28 |
+
type=float,
|
| 29 |
+
default=0.5,
|
| 30 |
+
help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
|
| 31 |
+
'Only used for the realesr-general-x4v3 model'))
|
| 32 |
+
parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
|
| 33 |
+
parser.add_argument(
|
| 34 |
+
'--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
|
| 35 |
+
parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
|
| 36 |
+
parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
|
| 37 |
+
parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
|
| 38 |
+
parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
|
| 39 |
+
parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
|
| 40 |
+
parser.add_argument(
|
| 41 |
+
'--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
|
| 42 |
+
parser.add_argument(
|
| 43 |
+
'--alpha_upsampler',
|
| 44 |
+
type=str,
|
| 45 |
+
default='realesrgan',
|
| 46 |
+
help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
'--ext',
|
| 49 |
+
type=str,
|
| 50 |
+
default='auto',
|
| 51 |
+
help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
|
| 52 |
+
parser.add_argument(
|
| 53 |
+
'-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
|
| 54 |
+
|
| 55 |
+
args = parser.parse_args()
|
| 56 |
+
|
| 57 |
+
# determine models according to model names
|
| 58 |
+
args.model_name = args.model_name.split('.')[0]
|
| 59 |
+
if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
|
| 60 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 61 |
+
netscale = 4
|
| 62 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
|
| 63 |
+
elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
|
| 64 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 65 |
+
netscale = 4
|
| 66 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
|
| 67 |
+
elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
|
| 68 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
|
| 69 |
+
netscale = 4
|
| 70 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
|
| 71 |
+
elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
|
| 72 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
|
| 73 |
+
netscale = 2
|
| 74 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
|
| 75 |
+
elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
|
| 76 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
|
| 77 |
+
netscale = 4
|
| 78 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
|
| 79 |
+
elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
|
| 80 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
|
| 81 |
+
netscale = 4
|
| 82 |
+
file_url = [
|
| 83 |
+
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
|
| 84 |
+
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
# determine model paths
|
| 88 |
+
if args.model_path is not None:
|
| 89 |
+
model_path = args.model_path
|
| 90 |
+
else:
|
| 91 |
+
model_path = os.path.join('weights', args.model_name + '.pth')
|
| 92 |
+
if not os.path.isfile(model_path):
|
| 93 |
+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 94 |
+
for url in file_url:
|
| 95 |
+
# model_path will be updated
|
| 96 |
+
model_path = load_file_from_url(
|
| 97 |
+
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
|
| 98 |
+
|
| 99 |
+
# use dni to control the denoise strength
|
| 100 |
+
dni_weight = None
|
| 101 |
+
if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
|
| 102 |
+
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
|
| 103 |
+
model_path = [model_path, wdn_model_path]
|
| 104 |
+
dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
|
| 105 |
+
|
| 106 |
+
# restorer
|
| 107 |
+
upsampler = RealESRGANer(
|
| 108 |
+
scale=netscale,
|
| 109 |
+
model_path=model_path,
|
| 110 |
+
dni_weight=dni_weight,
|
| 111 |
+
model=model,
|
| 112 |
+
tile=args.tile,
|
| 113 |
+
tile_pad=args.tile_pad,
|
| 114 |
+
pre_pad=args.pre_pad,
|
| 115 |
+
half=not args.fp32,
|
| 116 |
+
gpu_id=args.gpu_id)
|
| 117 |
+
|
| 118 |
+
if args.face_enhance: # Use GFPGAN for face enhancement
|
| 119 |
+
from gfpgan import GFPGANer
|
| 120 |
+
face_enhancer = GFPGANer(
|
| 121 |
+
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
|
| 122 |
+
upscale=args.outscale,
|
| 123 |
+
arch='clean',
|
| 124 |
+
channel_multiplier=2,
|
| 125 |
+
bg_upsampler=upsampler)
|
| 126 |
+
os.makedirs(args.output, exist_ok=True)
|
| 127 |
+
|
| 128 |
+
if os.path.isfile(args.input):
|
| 129 |
+
paths = [args.input]
|
| 130 |
+
else:
|
| 131 |
+
paths = sorted(glob.glob(os.path.join(args.input, '*')))
|
| 132 |
+
|
| 133 |
+
for idx, path in enumerate(paths):
|
| 134 |
+
imgname, extension = os.path.splitext(os.path.basename(path))
|
| 135 |
+
print('Testing', idx, imgname)
|
| 136 |
+
|
| 137 |
+
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
|
| 138 |
+
if len(img.shape) == 3 and img.shape[2] == 4:
|
| 139 |
+
img_mode = 'RGBA'
|
| 140 |
+
else:
|
| 141 |
+
img_mode = None
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
if args.face_enhance:
|
| 145 |
+
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
|
| 146 |
+
else:
|
| 147 |
+
output, _ = upsampler.enhance(img, outscale=args.outscale)
|
| 148 |
+
except RuntimeError as error:
|
| 149 |
+
print('Error', error)
|
| 150 |
+
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
|
| 151 |
+
else:
|
| 152 |
+
if args.ext == 'auto':
|
| 153 |
+
extension = extension[1:]
|
| 154 |
+
else:
|
| 155 |
+
extension = args.ext
|
| 156 |
+
if img_mode == 'RGBA': # RGBA images should be saved in png format
|
| 157 |
+
extension = 'png'
|
| 158 |
+
if args.suffix == '':
|
| 159 |
+
save_path = os.path.join(args.output, f'{imgname}.{extension}')
|
| 160 |
+
else:
|
| 161 |
+
save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
|
| 162 |
+
cv2.imwrite(save_path, output)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
if __name__ == '__main__':
|
| 166 |
+
main()
|
Real-ESRGAN/inference_realesrgan_video.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import cv2
|
| 3 |
+
import glob
|
| 4 |
+
import mimetypes
|
| 5 |
+
import numpy as np
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
import subprocess
|
| 9 |
+
import torch
|
| 10 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 11 |
+
from basicsr.utils.download_util import load_file_from_url
|
| 12 |
+
from os import path as osp
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
from realesrgan import RealESRGANer
|
| 16 |
+
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
import ffmpeg
|
| 20 |
+
except ImportError:
|
| 21 |
+
import pip
|
| 22 |
+
pip.main(['install', '--user', 'ffmpeg-python'])
|
| 23 |
+
import ffmpeg
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_video_meta_info(video_path):
|
| 27 |
+
ret = {}
|
| 28 |
+
probe = ffmpeg.probe(video_path)
|
| 29 |
+
video_streams = [stream for stream in probe['streams'] if stream['codec_type'] == 'video']
|
| 30 |
+
has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams'])
|
| 31 |
+
ret['width'] = video_streams[0]['width']
|
| 32 |
+
ret['height'] = video_streams[0]['height']
|
| 33 |
+
ret['fps'] = eval(video_streams[0]['avg_frame_rate'])
|
| 34 |
+
ret['audio'] = ffmpeg.input(video_path).audio if has_audio else None
|
| 35 |
+
ret['nb_frames'] = int(video_streams[0]['nb_frames'])
|
| 36 |
+
return ret
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_sub_video(args, num_process, process_idx):
|
| 40 |
+
if num_process == 1:
|
| 41 |
+
return args.input
|
| 42 |
+
meta = get_video_meta_info(args.input)
|
| 43 |
+
duration = int(meta['nb_frames'] / meta['fps'])
|
| 44 |
+
part_time = duration // num_process
|
| 45 |
+
print(f'duration: {duration}, part_time: {part_time}')
|
| 46 |
+
os.makedirs(osp.join(args.output, f'{args.video_name}_inp_tmp_videos'), exist_ok=True)
|
| 47 |
+
out_path = osp.join(args.output, f'{args.video_name}_inp_tmp_videos', f'{process_idx:03d}.mp4')
|
| 48 |
+
cmd = [
|
| 49 |
+
args.ffmpeg_bin, f'-i {args.input}', '-ss', f'{part_time * process_idx}',
|
| 50 |
+
f'-to {part_time * (process_idx + 1)}' if process_idx != num_process - 1 else '', '-async 1', out_path, '-y'
|
| 51 |
+
]
|
| 52 |
+
print(' '.join(cmd))
|
| 53 |
+
subprocess.call(' '.join(cmd), shell=True)
|
| 54 |
+
return out_path
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class Reader:
|
| 58 |
+
|
| 59 |
+
def __init__(self, args, total_workers=1, worker_idx=0):
|
| 60 |
+
self.args = args
|
| 61 |
+
input_type = mimetypes.guess_type(args.input)[0]
|
| 62 |
+
self.input_type = 'folder' if input_type is None else input_type
|
| 63 |
+
self.paths = [] # for image&folder type
|
| 64 |
+
self.audio = None
|
| 65 |
+
self.input_fps = None
|
| 66 |
+
if self.input_type.startswith('video'):
|
| 67 |
+
video_path = get_sub_video(args, total_workers, worker_idx)
|
| 68 |
+
self.stream_reader = (
|
| 69 |
+
ffmpeg.input(video_path).output('pipe:', format='rawvideo', pix_fmt='bgr24',
|
| 70 |
+
loglevel='error').run_async(
|
| 71 |
+
pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
|
| 72 |
+
meta = get_video_meta_info(video_path)
|
| 73 |
+
self.width = meta['width']
|
| 74 |
+
self.height = meta['height']
|
| 75 |
+
self.input_fps = meta['fps']
|
| 76 |
+
self.audio = meta['audio']
|
| 77 |
+
self.nb_frames = meta['nb_frames']
|
| 78 |
+
|
| 79 |
+
else:
|
| 80 |
+
if self.input_type.startswith('image'):
|
| 81 |
+
self.paths = [args.input]
|
| 82 |
+
else:
|
| 83 |
+
paths = sorted(glob.glob(os.path.join(args.input, '*')))
|
| 84 |
+
tot_frames = len(paths)
|
| 85 |
+
num_frame_per_worker = tot_frames // total_workers + (1 if tot_frames % total_workers else 0)
|
| 86 |
+
self.paths = paths[num_frame_per_worker * worker_idx:num_frame_per_worker * (worker_idx + 1)]
|
| 87 |
+
|
| 88 |
+
self.nb_frames = len(self.paths)
|
| 89 |
+
assert self.nb_frames > 0, 'empty folder'
|
| 90 |
+
from PIL import Image
|
| 91 |
+
tmp_img = Image.open(self.paths[0])
|
| 92 |
+
self.width, self.height = tmp_img.size
|
| 93 |
+
self.idx = 0
|
| 94 |
+
|
| 95 |
+
def get_resolution(self):
|
| 96 |
+
return self.height, self.width
|
| 97 |
+
|
| 98 |
+
def get_fps(self):
|
| 99 |
+
if self.args.fps is not None:
|
| 100 |
+
return self.args.fps
|
| 101 |
+
elif self.input_fps is not None:
|
| 102 |
+
return self.input_fps
|
| 103 |
+
return 24
|
| 104 |
+
|
| 105 |
+
def get_audio(self):
|
| 106 |
+
return self.audio
|
| 107 |
+
|
| 108 |
+
def __len__(self):
|
| 109 |
+
return self.nb_frames
|
| 110 |
+
|
| 111 |
+
def get_frame_from_stream(self):
|
| 112 |
+
img_bytes = self.stream_reader.stdout.read(self.width * self.height * 3) # 3 bytes for one pixel
|
| 113 |
+
if not img_bytes:
|
| 114 |
+
return None
|
| 115 |
+
img = np.frombuffer(img_bytes, np.uint8).reshape([self.height, self.width, 3])
|
| 116 |
+
return img
|
| 117 |
+
|
| 118 |
+
def get_frame_from_list(self):
|
| 119 |
+
if self.idx >= self.nb_frames:
|
| 120 |
+
return None
|
| 121 |
+
img = cv2.imread(self.paths[self.idx])
|
| 122 |
+
self.idx += 1
|
| 123 |
+
return img
|
| 124 |
+
|
| 125 |
+
def get_frame(self):
|
| 126 |
+
if self.input_type.startswith('video'):
|
| 127 |
+
return self.get_frame_from_stream()
|
| 128 |
+
else:
|
| 129 |
+
return self.get_frame_from_list()
|
| 130 |
+
|
| 131 |
+
def close(self):
|
| 132 |
+
if self.input_type.startswith('video'):
|
| 133 |
+
self.stream_reader.stdin.close()
|
| 134 |
+
self.stream_reader.wait()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
class Writer:
|
| 138 |
+
|
| 139 |
+
def __init__(self, args, audio, height, width, video_save_path, fps):
|
| 140 |
+
out_width, out_height = int(width * args.outscale), int(height * args.outscale)
|
| 141 |
+
if out_height > 2160:
|
| 142 |
+
print('You are generating video that is larger than 4K, which will be very slow due to IO speed.',
|
| 143 |
+
'We highly recommend to decrease the outscale(aka, -s).')
|
| 144 |
+
|
| 145 |
+
if audio is not None:
|
| 146 |
+
self.stream_writer = (
|
| 147 |
+
ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{out_width}x{out_height}',
|
| 148 |
+
framerate=fps).output(
|
| 149 |
+
audio,
|
| 150 |
+
video_save_path,
|
| 151 |
+
pix_fmt='yuv420p',
|
| 152 |
+
vcodec='libx264',
|
| 153 |
+
loglevel='error',
|
| 154 |
+
acodec='copy').overwrite_output().run_async(
|
| 155 |
+
pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
|
| 156 |
+
else:
|
| 157 |
+
self.stream_writer = (
|
| 158 |
+
ffmpeg.input('pipe:', format='rawvideo', pix_fmt='bgr24', s=f'{out_width}x{out_height}',
|
| 159 |
+
framerate=fps).output(
|
| 160 |
+
video_save_path, pix_fmt='yuv420p', vcodec='libx264',
|
| 161 |
+
loglevel='error').overwrite_output().run_async(
|
| 162 |
+
pipe_stdin=True, pipe_stdout=True, cmd=args.ffmpeg_bin))
|
| 163 |
+
|
| 164 |
+
def write_frame(self, frame):
|
| 165 |
+
frame = frame.astype(np.uint8).tobytes()
|
| 166 |
+
self.stream_writer.stdin.write(frame)
|
| 167 |
+
|
| 168 |
+
def close(self):
|
| 169 |
+
self.stream_writer.stdin.close()
|
| 170 |
+
self.stream_writer.wait()
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def inference_video(args, video_save_path, device=None, total_workers=1, worker_idx=0):
|
| 174 |
+
# ---------------------- determine models according to model names ---------------------- #
|
| 175 |
+
args.model_name = args.model_name.split('.pth')[0]
|
| 176 |
+
if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
|
| 177 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 178 |
+
netscale = 4
|
| 179 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
|
| 180 |
+
elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
|
| 181 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
| 182 |
+
netscale = 4
|
| 183 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
|
| 184 |
+
elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
|
| 185 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
|
| 186 |
+
netscale = 4
|
| 187 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
|
| 188 |
+
elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
|
| 189 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
|
| 190 |
+
netscale = 2
|
| 191 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
|
| 192 |
+
elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
|
| 193 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
|
| 194 |
+
netscale = 4
|
| 195 |
+
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
|
| 196 |
+
elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
|
| 197 |
+
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
|
| 198 |
+
netscale = 4
|
| 199 |
+
file_url = [
|
| 200 |
+
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
|
| 201 |
+
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
|
| 202 |
+
]
|
| 203 |
+
|
| 204 |
+
# ---------------------- determine model paths ---------------------- #
|
| 205 |
+
model_path = os.path.join('weights', args.model_name + '.pth')
|
| 206 |
+
if not os.path.isfile(model_path):
|
| 207 |
+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 208 |
+
for url in file_url:
|
| 209 |
+
# model_path will be updated
|
| 210 |
+
model_path = load_file_from_url(
|
| 211 |
+
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
|
| 212 |
+
|
| 213 |
+
# use dni to control the denoise strength
|
| 214 |
+
dni_weight = None
|
| 215 |
+
if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
|
| 216 |
+
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
|
| 217 |
+
model_path = [model_path, wdn_model_path]
|
| 218 |
+
dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
|
| 219 |
+
|
| 220 |
+
# restorer
|
| 221 |
+
upsampler = RealESRGANer(
|
| 222 |
+
scale=netscale,
|
| 223 |
+
model_path=model_path,
|
| 224 |
+
dni_weight=dni_weight,
|
| 225 |
+
model=model,
|
| 226 |
+
tile=args.tile,
|
| 227 |
+
tile_pad=args.tile_pad,
|
| 228 |
+
pre_pad=args.pre_pad,
|
| 229 |
+
half=not args.fp32,
|
| 230 |
+
device=device,
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
if 'anime' in args.model_name and args.face_enhance:
|
| 234 |
+
print('face_enhance is not supported in anime models, we turned this option off for you. '
|
| 235 |
+
'if you insist on turning it on, please manually comment the relevant lines of code.')
|
| 236 |
+
args.face_enhance = False
|
| 237 |
+
|
| 238 |
+
if args.face_enhance: # Use GFPGAN for face enhancement
|
| 239 |
+
from gfpgan import GFPGANer
|
| 240 |
+
face_enhancer = GFPGANer(
|
| 241 |
+
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
|
| 242 |
+
upscale=args.outscale,
|
| 243 |
+
arch='clean',
|
| 244 |
+
channel_multiplier=2,
|
| 245 |
+
bg_upsampler=upsampler) # TODO support custom device
|
| 246 |
+
else:
|
| 247 |
+
face_enhancer = None
|
| 248 |
+
|
| 249 |
+
reader = Reader(args, total_workers, worker_idx)
|
| 250 |
+
audio = reader.get_audio()
|
| 251 |
+
height, width = reader.get_resolution()
|
| 252 |
+
fps = reader.get_fps()
|
| 253 |
+
writer = Writer(args, audio, height, width, video_save_path, fps)
|
| 254 |
+
|
| 255 |
+
pbar = tqdm(total=len(reader), unit='frame', desc='inference')
|
| 256 |
+
while True:
|
| 257 |
+
img = reader.get_frame()
|
| 258 |
+
if img is None:
|
| 259 |
+
break
|
| 260 |
+
|
| 261 |
+
try:
|
| 262 |
+
if args.face_enhance:
|
| 263 |
+
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
|
| 264 |
+
else:
|
| 265 |
+
output, _ = upsampler.enhance(img, outscale=args.outscale)
|
| 266 |
+
except RuntimeError as error:
|
| 267 |
+
print('Error', error)
|
| 268 |
+
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
|
| 269 |
+
else:
|
| 270 |
+
writer.write_frame(output)
|
| 271 |
+
|
| 272 |
+
torch.cuda.synchronize(device)
|
| 273 |
+
pbar.update(1)
|
| 274 |
+
|
| 275 |
+
reader.close()
|
| 276 |
+
writer.close()
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def run(args):
|
| 280 |
+
args.video_name = osp.splitext(os.path.basename(args.input))[0]
|
| 281 |
+
video_save_path = osp.join(args.output, f'{args.video_name}_{args.suffix}.mp4')
|
| 282 |
+
|
| 283 |
+
if args.extract_frame_first:
|
| 284 |
+
tmp_frames_folder = osp.join(args.output, f'{args.video_name}_inp_tmp_frames')
|
| 285 |
+
os.makedirs(tmp_frames_folder, exist_ok=True)
|
| 286 |
+
os.system(f'ffmpeg -i {args.input} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0 {tmp_frames_folder}/frame%08d.png')
|
| 287 |
+
args.input = tmp_frames_folder
|
| 288 |
+
|
| 289 |
+
num_gpus = torch.cuda.device_count()
|
| 290 |
+
num_process = num_gpus * args.num_process_per_gpu
|
| 291 |
+
if num_process == 1:
|
| 292 |
+
inference_video(args, video_save_path)
|
| 293 |
+
return
|
| 294 |
+
|
| 295 |
+
ctx = torch.multiprocessing.get_context('spawn')
|
| 296 |
+
pool = ctx.Pool(num_process)
|
| 297 |
+
os.makedirs(osp.join(args.output, f'{args.video_name}_out_tmp_videos'), exist_ok=True)
|
| 298 |
+
pbar = tqdm(total=num_process, unit='sub_video', desc='inference')
|
| 299 |
+
for i in range(num_process):
|
| 300 |
+
sub_video_save_path = osp.join(args.output, f'{args.video_name}_out_tmp_videos', f'{i:03d}.mp4')
|
| 301 |
+
pool.apply_async(
|
| 302 |
+
inference_video,
|
| 303 |
+
args=(args, sub_video_save_path, torch.device(i % num_gpus), num_process, i),
|
| 304 |
+
callback=lambda arg: pbar.update(1))
|
| 305 |
+
pool.close()
|
| 306 |
+
pool.join()
|
| 307 |
+
|
| 308 |
+
# combine sub videos
|
| 309 |
+
# prepare vidlist.txt
|
| 310 |
+
with open(f'{args.output}/{args.video_name}_vidlist.txt', 'w') as f:
|
| 311 |
+
for i in range(num_process):
|
| 312 |
+
f.write(f'file \'{args.video_name}_out_tmp_videos/{i:03d}.mp4\'\n')
|
| 313 |
+
|
| 314 |
+
cmd = [
|
| 315 |
+
args.ffmpeg_bin, '-f', 'concat', '-safe', '0', '-i', f'{args.output}/{args.video_name}_vidlist.txt', '-c',
|
| 316 |
+
'copy', f'{video_save_path}'
|
| 317 |
+
]
|
| 318 |
+
print(' '.join(cmd))
|
| 319 |
+
subprocess.call(cmd)
|
| 320 |
+
shutil.rmtree(osp.join(args.output, f'{args.video_name}_out_tmp_videos'))
|
| 321 |
+
if osp.exists(osp.join(args.output, f'{args.video_name}_inp_tmp_videos')):
|
| 322 |
+
shutil.rmtree(osp.join(args.output, f'{args.video_name}_inp_tmp_videos'))
|
| 323 |
+
os.remove(f'{args.output}/{args.video_name}_vidlist.txt')
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def main():
|
| 327 |
+
"""Inference demo for Real-ESRGAN.
|
| 328 |
+
It mainly for restoring anime videos.
|
| 329 |
+
|
| 330 |
+
"""
|
| 331 |
+
parser = argparse.ArgumentParser()
|
| 332 |
+
parser.add_argument('-i', '--input', type=str, default='inputs', help='Input video, image or folder')
|
| 333 |
+
parser.add_argument(
|
| 334 |
+
'-n',
|
| 335 |
+
'--model_name',
|
| 336 |
+
type=str,
|
| 337 |
+
default='realesr-animevideov3',
|
| 338 |
+
help=('Model names: realesr-animevideov3 | RealESRGAN_x4plus_anime_6B | RealESRGAN_x4plus | RealESRNet_x4plus |'
|
| 339 |
+
' RealESRGAN_x2plus | realesr-general-x4v3'
|
| 340 |
+
'Default:realesr-animevideov3'))
|
| 341 |
+
parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
|
| 342 |
+
parser.add_argument(
|
| 343 |
+
'-dn',
|
| 344 |
+
'--denoise_strength',
|
| 345 |
+
type=float,
|
| 346 |
+
default=0.5,
|
| 347 |
+
help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
|
| 348 |
+
'Only used for the realesr-general-x4v3 model'))
|
| 349 |
+
parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
|
| 350 |
+
parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored video')
|
| 351 |
+
parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
|
| 352 |
+
parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
|
| 353 |
+
parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
|
| 354 |
+
parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
|
| 355 |
+
parser.add_argument(
|
| 356 |
+
'--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
|
| 357 |
+
parser.add_argument('--fps', type=float, default=None, help='FPS of the output video')
|
| 358 |
+
parser.add_argument('--ffmpeg_bin', type=str, default='ffmpeg', help='The path to ffmpeg')
|
| 359 |
+
parser.add_argument('--extract_frame_first', action='store_true')
|
| 360 |
+
parser.add_argument('--num_process_per_gpu', type=int, default=1)
|
| 361 |
+
|
| 362 |
+
parser.add_argument(
|
| 363 |
+
'--alpha_upsampler',
|
| 364 |
+
type=str,
|
| 365 |
+
default='realesrgan',
|
| 366 |
+
help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
|
| 367 |
+
parser.add_argument(
|
| 368 |
+
'--ext',
|
| 369 |
+
type=str,
|
| 370 |
+
default='auto',
|
| 371 |
+
help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
|
| 372 |
+
args = parser.parse_args()
|
| 373 |
+
|
| 374 |
+
args.input = args.input.rstrip('/').rstrip('\\')
|
| 375 |
+
os.makedirs(args.output, exist_ok=True)
|
| 376 |
+
|
| 377 |
+
if mimetypes.guess_type(args.input)[0] is not None and mimetypes.guess_type(args.input)[0].startswith('video'):
|
| 378 |
+
is_video = True
|
| 379 |
+
else:
|
| 380 |
+
is_video = False
|
| 381 |
+
|
| 382 |
+
if is_video and args.input.endswith('.flv'):
|
| 383 |
+
mp4_path = args.input.replace('.flv', '.mp4')
|
| 384 |
+
os.system(f'ffmpeg -i {args.input} -codec copy {mp4_path}')
|
| 385 |
+
args.input = mp4_path
|
| 386 |
+
|
| 387 |
+
if args.extract_frame_first and not is_video:
|
| 388 |
+
args.extract_frame_first = False
|
| 389 |
+
|
| 390 |
+
run(args)
|
| 391 |
+
|
| 392 |
+
if args.extract_frame_first:
|
| 393 |
+
tmp_frames_folder = osp.join(args.output, f'{args.video_name}_inp_tmp_frames')
|
| 394 |
+
shutil.rmtree(tmp_frames_folder)
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
if __name__ == '__main__':
|
| 398 |
+
main()
|
Real-ESRGAN/inputs/00003.png
ADDED
|
Real-ESRGAN/inputs/00017_gray.png
ADDED
|
Real-ESRGAN/inputs/0014.jpg
ADDED
|
Real-ESRGAN/inputs/0030.jpg
ADDED
|
Real-ESRGAN/inputs/ADE_val_00000114.jpg
ADDED
|
Real-ESRGAN/inputs/OST_009.png
ADDED
|
Real-ESRGAN/inputs/children-alpha.png
ADDED
|
Real-ESRGAN/inputs/tree_alpha_16bit.png
ADDED
|
Real-ESRGAN/inputs/video/onepiece_demo.mp4
ADDED
|
Binary file (593 kB). View file
|
|
|
Real-ESRGAN/inputs/wolf_gray.jpg
ADDED
|
Real-ESRGAN/options/finetune_realesrgan_x4plus.yml
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: finetune_RealESRGANx4plus_400k
|
| 3 |
+
model_type: RealESRGANModel
|
| 4 |
+
scale: 4
|
| 5 |
+
num_gpu: auto
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
|
| 9 |
+
# USM the ground-truth
|
| 10 |
+
l1_gt_usm: True
|
| 11 |
+
percep_gt_usm: True
|
| 12 |
+
gan_gt_usm: False
|
| 13 |
+
|
| 14 |
+
# the first degradation process
|
| 15 |
+
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
|
| 16 |
+
resize_range: [0.15, 1.5]
|
| 17 |
+
gaussian_noise_prob: 0.5
|
| 18 |
+
noise_range: [1, 30]
|
| 19 |
+
poisson_scale_range: [0.05, 3]
|
| 20 |
+
gray_noise_prob: 0.4
|
| 21 |
+
jpeg_range: [30, 95]
|
| 22 |
+
|
| 23 |
+
# the second degradation process
|
| 24 |
+
second_blur_prob: 0.8
|
| 25 |
+
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
|
| 26 |
+
resize_range2: [0.3, 1.2]
|
| 27 |
+
gaussian_noise_prob2: 0.5
|
| 28 |
+
noise_range2: [1, 25]
|
| 29 |
+
poisson_scale_range2: [0.05, 2.5]
|
| 30 |
+
gray_noise_prob2: 0.4
|
| 31 |
+
jpeg_range2: [30, 95]
|
| 32 |
+
|
| 33 |
+
gt_size: 256
|
| 34 |
+
queue_size: 180
|
| 35 |
+
|
| 36 |
+
# dataset and data loader settings
|
| 37 |
+
datasets:
|
| 38 |
+
train:
|
| 39 |
+
name: DF2K+OST
|
| 40 |
+
type: RealESRGANDataset
|
| 41 |
+
dataroot_gt: datasets/DF2K
|
| 42 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
|
| 43 |
+
io_backend:
|
| 44 |
+
type: disk
|
| 45 |
+
|
| 46 |
+
blur_kernel_size: 21
|
| 47 |
+
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 48 |
+
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 49 |
+
sinc_prob: 0.1
|
| 50 |
+
blur_sigma: [0.2, 3]
|
| 51 |
+
betag_range: [0.5, 4]
|
| 52 |
+
betap_range: [1, 2]
|
| 53 |
+
|
| 54 |
+
blur_kernel_size2: 21
|
| 55 |
+
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 56 |
+
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 57 |
+
sinc_prob2: 0.1
|
| 58 |
+
blur_sigma2: [0.2, 1.5]
|
| 59 |
+
betag_range2: [0.5, 4]
|
| 60 |
+
betap_range2: [1, 2]
|
| 61 |
+
|
| 62 |
+
final_sinc_prob: 0.8
|
| 63 |
+
|
| 64 |
+
gt_size: 256
|
| 65 |
+
use_hflip: True
|
| 66 |
+
use_rot: False
|
| 67 |
+
|
| 68 |
+
# data loader
|
| 69 |
+
use_shuffle: true
|
| 70 |
+
num_worker_per_gpu: 5
|
| 71 |
+
batch_size_per_gpu: 12
|
| 72 |
+
dataset_enlarge_ratio: 1
|
| 73 |
+
prefetch_mode: ~
|
| 74 |
+
|
| 75 |
+
# Uncomment these for validation
|
| 76 |
+
# val:
|
| 77 |
+
# name: validation
|
| 78 |
+
# type: PairedImageDataset
|
| 79 |
+
# dataroot_gt: path_to_gt
|
| 80 |
+
# dataroot_lq: path_to_lq
|
| 81 |
+
# io_backend:
|
| 82 |
+
# type: disk
|
| 83 |
+
|
| 84 |
+
# network structures
|
| 85 |
+
network_g:
|
| 86 |
+
type: RRDBNet
|
| 87 |
+
num_in_ch: 3
|
| 88 |
+
num_out_ch: 3
|
| 89 |
+
num_feat: 64
|
| 90 |
+
num_block: 23
|
| 91 |
+
num_grow_ch: 32
|
| 92 |
+
|
| 93 |
+
network_d:
|
| 94 |
+
type: UNetDiscriminatorSN
|
| 95 |
+
num_in_ch: 3
|
| 96 |
+
num_feat: 64
|
| 97 |
+
skip_connection: True
|
| 98 |
+
|
| 99 |
+
# path
|
| 100 |
+
path:
|
| 101 |
+
# use the pre-trained Real-ESRNet model
|
| 102 |
+
pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
|
| 103 |
+
param_key_g: params_ema
|
| 104 |
+
strict_load_g: true
|
| 105 |
+
pretrain_network_d: experiments/pretrained_models/RealESRGAN_x4plus_netD.pth
|
| 106 |
+
param_key_d: params
|
| 107 |
+
strict_load_d: true
|
| 108 |
+
resume_state: ~
|
| 109 |
+
|
| 110 |
+
# training settings
|
| 111 |
+
train:
|
| 112 |
+
ema_decay: 0.999
|
| 113 |
+
optim_g:
|
| 114 |
+
type: Adam
|
| 115 |
+
lr: !!float 1e-4
|
| 116 |
+
weight_decay: 0
|
| 117 |
+
betas: [0.9, 0.99]
|
| 118 |
+
optim_d:
|
| 119 |
+
type: Adam
|
| 120 |
+
lr: !!float 1e-4
|
| 121 |
+
weight_decay: 0
|
| 122 |
+
betas: [0.9, 0.99]
|
| 123 |
+
|
| 124 |
+
scheduler:
|
| 125 |
+
type: MultiStepLR
|
| 126 |
+
milestones: [400000]
|
| 127 |
+
gamma: 0.5
|
| 128 |
+
|
| 129 |
+
total_iter: 400000
|
| 130 |
+
warmup_iter: -1 # no warm up
|
| 131 |
+
|
| 132 |
+
# losses
|
| 133 |
+
pixel_opt:
|
| 134 |
+
type: L1Loss
|
| 135 |
+
loss_weight: 1.0
|
| 136 |
+
reduction: mean
|
| 137 |
+
# perceptual loss (content and style losses)
|
| 138 |
+
perceptual_opt:
|
| 139 |
+
type: PerceptualLoss
|
| 140 |
+
layer_weights:
|
| 141 |
+
# before relu
|
| 142 |
+
'conv1_2': 0.1
|
| 143 |
+
'conv2_2': 0.1
|
| 144 |
+
'conv3_4': 1
|
| 145 |
+
'conv4_4': 1
|
| 146 |
+
'conv5_4': 1
|
| 147 |
+
vgg_type: vgg19
|
| 148 |
+
use_input_norm: true
|
| 149 |
+
perceptual_weight: !!float 1.0
|
| 150 |
+
style_weight: 0
|
| 151 |
+
range_norm: false
|
| 152 |
+
criterion: l1
|
| 153 |
+
# gan loss
|
| 154 |
+
gan_opt:
|
| 155 |
+
type: GANLoss
|
| 156 |
+
gan_type: vanilla
|
| 157 |
+
real_label_val: 1.0
|
| 158 |
+
fake_label_val: 0.0
|
| 159 |
+
loss_weight: !!float 1e-1
|
| 160 |
+
|
| 161 |
+
net_d_iters: 1
|
| 162 |
+
net_d_init_iters: 0
|
| 163 |
+
|
| 164 |
+
# Uncomment these for validation
|
| 165 |
+
# validation settings
|
| 166 |
+
# val:
|
| 167 |
+
# val_freq: !!float 5e3
|
| 168 |
+
# save_img: True
|
| 169 |
+
|
| 170 |
+
# metrics:
|
| 171 |
+
# psnr: # metric name
|
| 172 |
+
# type: calculate_psnr
|
| 173 |
+
# crop_border: 4
|
| 174 |
+
# test_y_channel: false
|
| 175 |
+
|
| 176 |
+
# logging settings
|
| 177 |
+
logger:
|
| 178 |
+
print_freq: 100
|
| 179 |
+
save_checkpoint_freq: !!float 5e3
|
| 180 |
+
use_tb_logger: true
|
| 181 |
+
wandb:
|
| 182 |
+
project: ~
|
| 183 |
+
resume_id: ~
|
| 184 |
+
|
| 185 |
+
# dist training settings
|
| 186 |
+
dist_params:
|
| 187 |
+
backend: nccl
|
| 188 |
+
port: 29500
|
Real-ESRGAN/options/finetune_realesrgan_x4plus_pairdata.yml
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: finetune_RealESRGANx4plus_400k_pairdata
|
| 3 |
+
model_type: RealESRGANModel
|
| 4 |
+
scale: 4
|
| 5 |
+
num_gpu: auto
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# USM the ground-truth
|
| 9 |
+
l1_gt_usm: True
|
| 10 |
+
percep_gt_usm: True
|
| 11 |
+
gan_gt_usm: False
|
| 12 |
+
|
| 13 |
+
high_order_degradation: False # do not use the high-order degradation generation process
|
| 14 |
+
|
| 15 |
+
# dataset and data loader settings
|
| 16 |
+
datasets:
|
| 17 |
+
train:
|
| 18 |
+
name: DIV2K
|
| 19 |
+
type: RealESRGANPairedDataset
|
| 20 |
+
dataroot_gt: datasets/DF2K
|
| 21 |
+
dataroot_lq: datasets/DF2K
|
| 22 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DIV2K_sub_pair.txt
|
| 23 |
+
io_backend:
|
| 24 |
+
type: disk
|
| 25 |
+
|
| 26 |
+
gt_size: 256
|
| 27 |
+
use_hflip: True
|
| 28 |
+
use_rot: False
|
| 29 |
+
|
| 30 |
+
# data loader
|
| 31 |
+
use_shuffle: true
|
| 32 |
+
num_worker_per_gpu: 5
|
| 33 |
+
batch_size_per_gpu: 12
|
| 34 |
+
dataset_enlarge_ratio: 1
|
| 35 |
+
prefetch_mode: ~
|
| 36 |
+
|
| 37 |
+
# Uncomment these for validation
|
| 38 |
+
# val:
|
| 39 |
+
# name: validation
|
| 40 |
+
# type: PairedImageDataset
|
| 41 |
+
# dataroot_gt: path_to_gt
|
| 42 |
+
# dataroot_lq: path_to_lq
|
| 43 |
+
# io_backend:
|
| 44 |
+
# type: disk
|
| 45 |
+
|
| 46 |
+
# network structures
|
| 47 |
+
network_g:
|
| 48 |
+
type: RRDBNet
|
| 49 |
+
num_in_ch: 3
|
| 50 |
+
num_out_ch: 3
|
| 51 |
+
num_feat: 64
|
| 52 |
+
num_block: 23
|
| 53 |
+
num_grow_ch: 32
|
| 54 |
+
|
| 55 |
+
network_d:
|
| 56 |
+
type: UNetDiscriminatorSN
|
| 57 |
+
num_in_ch: 3
|
| 58 |
+
num_feat: 64
|
| 59 |
+
skip_connection: True
|
| 60 |
+
|
| 61 |
+
# path
|
| 62 |
+
path:
|
| 63 |
+
# use the pre-trained Real-ESRNet model
|
| 64 |
+
pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
|
| 65 |
+
param_key_g: params_ema
|
| 66 |
+
strict_load_g: true
|
| 67 |
+
pretrain_network_d: experiments/pretrained_models/RealESRGAN_x4plus_netD.pth
|
| 68 |
+
param_key_d: params
|
| 69 |
+
strict_load_d: true
|
| 70 |
+
resume_state: ~
|
| 71 |
+
|
| 72 |
+
# training settings
|
| 73 |
+
train:
|
| 74 |
+
ema_decay: 0.999
|
| 75 |
+
optim_g:
|
| 76 |
+
type: Adam
|
| 77 |
+
lr: !!float 1e-4
|
| 78 |
+
weight_decay: 0
|
| 79 |
+
betas: [0.9, 0.99]
|
| 80 |
+
optim_d:
|
| 81 |
+
type: Adam
|
| 82 |
+
lr: !!float 1e-4
|
| 83 |
+
weight_decay: 0
|
| 84 |
+
betas: [0.9, 0.99]
|
| 85 |
+
|
| 86 |
+
scheduler:
|
| 87 |
+
type: MultiStepLR
|
| 88 |
+
milestones: [400000]
|
| 89 |
+
gamma: 0.5
|
| 90 |
+
|
| 91 |
+
total_iter: 400000
|
| 92 |
+
warmup_iter: -1 # no warm up
|
| 93 |
+
|
| 94 |
+
# losses
|
| 95 |
+
pixel_opt:
|
| 96 |
+
type: L1Loss
|
| 97 |
+
loss_weight: 1.0
|
| 98 |
+
reduction: mean
|
| 99 |
+
# perceptual loss (content and style losses)
|
| 100 |
+
perceptual_opt:
|
| 101 |
+
type: PerceptualLoss
|
| 102 |
+
layer_weights:
|
| 103 |
+
# before relu
|
| 104 |
+
'conv1_2': 0.1
|
| 105 |
+
'conv2_2': 0.1
|
| 106 |
+
'conv3_4': 1
|
| 107 |
+
'conv4_4': 1
|
| 108 |
+
'conv5_4': 1
|
| 109 |
+
vgg_type: vgg19
|
| 110 |
+
use_input_norm: true
|
| 111 |
+
perceptual_weight: !!float 1.0
|
| 112 |
+
style_weight: 0
|
| 113 |
+
range_norm: false
|
| 114 |
+
criterion: l1
|
| 115 |
+
# gan loss
|
| 116 |
+
gan_opt:
|
| 117 |
+
type: GANLoss
|
| 118 |
+
gan_type: vanilla
|
| 119 |
+
real_label_val: 1.0
|
| 120 |
+
fake_label_val: 0.0
|
| 121 |
+
loss_weight: !!float 1e-1
|
| 122 |
+
|
| 123 |
+
net_d_iters: 1
|
| 124 |
+
net_d_init_iters: 0
|
| 125 |
+
|
| 126 |
+
# Uncomment these for validation
|
| 127 |
+
# validation settings
|
| 128 |
+
# val:
|
| 129 |
+
# val_freq: !!float 5e3
|
| 130 |
+
# save_img: True
|
| 131 |
+
|
| 132 |
+
# metrics:
|
| 133 |
+
# psnr: # metric name
|
| 134 |
+
# type: calculate_psnr
|
| 135 |
+
# crop_border: 4
|
| 136 |
+
# test_y_channel: false
|
| 137 |
+
|
| 138 |
+
# logging settings
|
| 139 |
+
logger:
|
| 140 |
+
print_freq: 100
|
| 141 |
+
save_checkpoint_freq: !!float 5e3
|
| 142 |
+
use_tb_logger: true
|
| 143 |
+
wandb:
|
| 144 |
+
project: ~
|
| 145 |
+
resume_id: ~
|
| 146 |
+
|
| 147 |
+
# dist training settings
|
| 148 |
+
dist_params:
|
| 149 |
+
backend: nccl
|
| 150 |
+
port: 29500
|
Real-ESRGAN/options/train_realesrgan_x2plus.yml
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: train_RealESRGANx2plus_400k_B12G4
|
| 3 |
+
model_type: RealESRGANModel
|
| 4 |
+
scale: 2
|
| 5 |
+
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
|
| 9 |
+
# USM the ground-truth
|
| 10 |
+
l1_gt_usm: True
|
| 11 |
+
percep_gt_usm: True
|
| 12 |
+
gan_gt_usm: False
|
| 13 |
+
|
| 14 |
+
# the first degradation process
|
| 15 |
+
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
|
| 16 |
+
resize_range: [0.15, 1.5]
|
| 17 |
+
gaussian_noise_prob: 0.5
|
| 18 |
+
noise_range: [1, 30]
|
| 19 |
+
poisson_scale_range: [0.05, 3]
|
| 20 |
+
gray_noise_prob: 0.4
|
| 21 |
+
jpeg_range: [30, 95]
|
| 22 |
+
|
| 23 |
+
# the second degradation process
|
| 24 |
+
second_blur_prob: 0.8
|
| 25 |
+
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
|
| 26 |
+
resize_range2: [0.3, 1.2]
|
| 27 |
+
gaussian_noise_prob2: 0.5
|
| 28 |
+
noise_range2: [1, 25]
|
| 29 |
+
poisson_scale_range2: [0.05, 2.5]
|
| 30 |
+
gray_noise_prob2: 0.4
|
| 31 |
+
jpeg_range2: [30, 95]
|
| 32 |
+
|
| 33 |
+
gt_size: 256
|
| 34 |
+
queue_size: 180
|
| 35 |
+
|
| 36 |
+
# dataset and data loader settings
|
| 37 |
+
datasets:
|
| 38 |
+
train:
|
| 39 |
+
name: DF2K+OST
|
| 40 |
+
type: RealESRGANDataset
|
| 41 |
+
dataroot_gt: datasets/DF2K
|
| 42 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
|
| 43 |
+
io_backend:
|
| 44 |
+
type: disk
|
| 45 |
+
|
| 46 |
+
blur_kernel_size: 21
|
| 47 |
+
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 48 |
+
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 49 |
+
sinc_prob: 0.1
|
| 50 |
+
blur_sigma: [0.2, 3]
|
| 51 |
+
betag_range: [0.5, 4]
|
| 52 |
+
betap_range: [1, 2]
|
| 53 |
+
|
| 54 |
+
blur_kernel_size2: 21
|
| 55 |
+
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 56 |
+
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 57 |
+
sinc_prob2: 0.1
|
| 58 |
+
blur_sigma2: [0.2, 1.5]
|
| 59 |
+
betag_range2: [0.5, 4]
|
| 60 |
+
betap_range2: [1, 2]
|
| 61 |
+
|
| 62 |
+
final_sinc_prob: 0.8
|
| 63 |
+
|
| 64 |
+
gt_size: 256
|
| 65 |
+
use_hflip: True
|
| 66 |
+
use_rot: False
|
| 67 |
+
|
| 68 |
+
# data loader
|
| 69 |
+
use_shuffle: true
|
| 70 |
+
num_worker_per_gpu: 5
|
| 71 |
+
batch_size_per_gpu: 12
|
| 72 |
+
dataset_enlarge_ratio: 1
|
| 73 |
+
prefetch_mode: ~
|
| 74 |
+
|
| 75 |
+
# Uncomment these for validation
|
| 76 |
+
# val:
|
| 77 |
+
# name: validation
|
| 78 |
+
# type: PairedImageDataset
|
| 79 |
+
# dataroot_gt: path_to_gt
|
| 80 |
+
# dataroot_lq: path_to_lq
|
| 81 |
+
# io_backend:
|
| 82 |
+
# type: disk
|
| 83 |
+
|
| 84 |
+
# network structures
|
| 85 |
+
network_g:
|
| 86 |
+
type: RRDBNet
|
| 87 |
+
num_in_ch: 3
|
| 88 |
+
num_out_ch: 3
|
| 89 |
+
num_feat: 64
|
| 90 |
+
num_block: 23
|
| 91 |
+
num_grow_ch: 32
|
| 92 |
+
scale: 2
|
| 93 |
+
|
| 94 |
+
network_d:
|
| 95 |
+
type: UNetDiscriminatorSN
|
| 96 |
+
num_in_ch: 3
|
| 97 |
+
num_feat: 64
|
| 98 |
+
skip_connection: True
|
| 99 |
+
|
| 100 |
+
# path
|
| 101 |
+
path:
|
| 102 |
+
# use the pre-trained Real-ESRNet model
|
| 103 |
+
pretrain_network_g: experiments/pretrained_models/RealESRNet_x2plus.pth
|
| 104 |
+
param_key_g: params_ema
|
| 105 |
+
strict_load_g: true
|
| 106 |
+
resume_state: ~
|
| 107 |
+
|
| 108 |
+
# training settings
|
| 109 |
+
train:
|
| 110 |
+
ema_decay: 0.999
|
| 111 |
+
optim_g:
|
| 112 |
+
type: Adam
|
| 113 |
+
lr: !!float 1e-4
|
| 114 |
+
weight_decay: 0
|
| 115 |
+
betas: [0.9, 0.99]
|
| 116 |
+
optim_d:
|
| 117 |
+
type: Adam
|
| 118 |
+
lr: !!float 1e-4
|
| 119 |
+
weight_decay: 0
|
| 120 |
+
betas: [0.9, 0.99]
|
| 121 |
+
|
| 122 |
+
scheduler:
|
| 123 |
+
type: MultiStepLR
|
| 124 |
+
milestones: [400000]
|
| 125 |
+
gamma: 0.5
|
| 126 |
+
|
| 127 |
+
total_iter: 400000
|
| 128 |
+
warmup_iter: -1 # no warm up
|
| 129 |
+
|
| 130 |
+
# losses
|
| 131 |
+
pixel_opt:
|
| 132 |
+
type: L1Loss
|
| 133 |
+
loss_weight: 1.0
|
| 134 |
+
reduction: mean
|
| 135 |
+
# perceptual loss (content and style losses)
|
| 136 |
+
perceptual_opt:
|
| 137 |
+
type: PerceptualLoss
|
| 138 |
+
layer_weights:
|
| 139 |
+
# before relu
|
| 140 |
+
'conv1_2': 0.1
|
| 141 |
+
'conv2_2': 0.1
|
| 142 |
+
'conv3_4': 1
|
| 143 |
+
'conv4_4': 1
|
| 144 |
+
'conv5_4': 1
|
| 145 |
+
vgg_type: vgg19
|
| 146 |
+
use_input_norm: true
|
| 147 |
+
perceptual_weight: !!float 1.0
|
| 148 |
+
style_weight: 0
|
| 149 |
+
range_norm: false
|
| 150 |
+
criterion: l1
|
| 151 |
+
# gan loss
|
| 152 |
+
gan_opt:
|
| 153 |
+
type: GANLoss
|
| 154 |
+
gan_type: vanilla
|
| 155 |
+
real_label_val: 1.0
|
| 156 |
+
fake_label_val: 0.0
|
| 157 |
+
loss_weight: !!float 1e-1
|
| 158 |
+
|
| 159 |
+
net_d_iters: 1
|
| 160 |
+
net_d_init_iters: 0
|
| 161 |
+
|
| 162 |
+
# Uncomment these for validation
|
| 163 |
+
# validation settings
|
| 164 |
+
# val:
|
| 165 |
+
# val_freq: !!float 5e3
|
| 166 |
+
# save_img: True
|
| 167 |
+
|
| 168 |
+
# metrics:
|
| 169 |
+
# psnr: # metric name
|
| 170 |
+
# type: calculate_psnr
|
| 171 |
+
# crop_border: 4
|
| 172 |
+
# test_y_channel: false
|
| 173 |
+
|
| 174 |
+
# logging settings
|
| 175 |
+
logger:
|
| 176 |
+
print_freq: 100
|
| 177 |
+
save_checkpoint_freq: !!float 5e3
|
| 178 |
+
use_tb_logger: true
|
| 179 |
+
wandb:
|
| 180 |
+
project: ~
|
| 181 |
+
resume_id: ~
|
| 182 |
+
|
| 183 |
+
# dist training settings
|
| 184 |
+
dist_params:
|
| 185 |
+
backend: nccl
|
| 186 |
+
port: 29500
|
Real-ESRGAN/options/train_realesrgan_x4plus.yml
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: train_RealESRGANx4plus_400k_B12G4
|
| 3 |
+
model_type: RealESRGANModel
|
| 4 |
+
scale: 4
|
| 5 |
+
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# ----------------- options for synthesizing training data in RealESRGANModel ----------------- #
|
| 9 |
+
# USM the ground-truth
|
| 10 |
+
l1_gt_usm: True
|
| 11 |
+
percep_gt_usm: True
|
| 12 |
+
gan_gt_usm: False
|
| 13 |
+
|
| 14 |
+
# the first degradation process
|
| 15 |
+
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
|
| 16 |
+
resize_range: [0.15, 1.5]
|
| 17 |
+
gaussian_noise_prob: 0.5
|
| 18 |
+
noise_range: [1, 30]
|
| 19 |
+
poisson_scale_range: [0.05, 3]
|
| 20 |
+
gray_noise_prob: 0.4
|
| 21 |
+
jpeg_range: [30, 95]
|
| 22 |
+
|
| 23 |
+
# the second degradation process
|
| 24 |
+
second_blur_prob: 0.8
|
| 25 |
+
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
|
| 26 |
+
resize_range2: [0.3, 1.2]
|
| 27 |
+
gaussian_noise_prob2: 0.5
|
| 28 |
+
noise_range2: [1, 25]
|
| 29 |
+
poisson_scale_range2: [0.05, 2.5]
|
| 30 |
+
gray_noise_prob2: 0.4
|
| 31 |
+
jpeg_range2: [30, 95]
|
| 32 |
+
|
| 33 |
+
gt_size: 256
|
| 34 |
+
queue_size: 180
|
| 35 |
+
|
| 36 |
+
# dataset and data loader settings
|
| 37 |
+
datasets:
|
| 38 |
+
train:
|
| 39 |
+
name: DF2K+OST
|
| 40 |
+
type: RealESRGANDataset
|
| 41 |
+
dataroot_gt: datasets/DF2K
|
| 42 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
|
| 43 |
+
io_backend:
|
| 44 |
+
type: disk
|
| 45 |
+
|
| 46 |
+
blur_kernel_size: 21
|
| 47 |
+
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 48 |
+
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 49 |
+
sinc_prob: 0.1
|
| 50 |
+
blur_sigma: [0.2, 3]
|
| 51 |
+
betag_range: [0.5, 4]
|
| 52 |
+
betap_range: [1, 2]
|
| 53 |
+
|
| 54 |
+
blur_kernel_size2: 21
|
| 55 |
+
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 56 |
+
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 57 |
+
sinc_prob2: 0.1
|
| 58 |
+
blur_sigma2: [0.2, 1.5]
|
| 59 |
+
betag_range2: [0.5, 4]
|
| 60 |
+
betap_range2: [1, 2]
|
| 61 |
+
|
| 62 |
+
final_sinc_prob: 0.8
|
| 63 |
+
|
| 64 |
+
gt_size: 256
|
| 65 |
+
use_hflip: True
|
| 66 |
+
use_rot: False
|
| 67 |
+
|
| 68 |
+
# data loader
|
| 69 |
+
use_shuffle: true
|
| 70 |
+
num_worker_per_gpu: 5
|
| 71 |
+
batch_size_per_gpu: 12
|
| 72 |
+
dataset_enlarge_ratio: 1
|
| 73 |
+
prefetch_mode: ~
|
| 74 |
+
|
| 75 |
+
# Uncomment these for validation
|
| 76 |
+
# val:
|
| 77 |
+
# name: validation
|
| 78 |
+
# type: PairedImageDataset
|
| 79 |
+
# dataroot_gt: path_to_gt
|
| 80 |
+
# dataroot_lq: path_to_lq
|
| 81 |
+
# io_backend:
|
| 82 |
+
# type: disk
|
| 83 |
+
|
| 84 |
+
# network structures
|
| 85 |
+
network_g:
|
| 86 |
+
type: RRDBNet
|
| 87 |
+
num_in_ch: 3
|
| 88 |
+
num_out_ch: 3
|
| 89 |
+
num_feat: 64
|
| 90 |
+
num_block: 23
|
| 91 |
+
num_grow_ch: 32
|
| 92 |
+
|
| 93 |
+
network_d:
|
| 94 |
+
type: UNetDiscriminatorSN
|
| 95 |
+
num_in_ch: 3
|
| 96 |
+
num_feat: 64
|
| 97 |
+
skip_connection: True
|
| 98 |
+
|
| 99 |
+
# path
|
| 100 |
+
path:
|
| 101 |
+
# use the pre-trained Real-ESRNet model
|
| 102 |
+
pretrain_network_g: experiments/pretrained_models/RealESRNet_x4plus.pth
|
| 103 |
+
param_key_g: params_ema
|
| 104 |
+
strict_load_g: true
|
| 105 |
+
resume_state: ~
|
| 106 |
+
|
| 107 |
+
# training settings
|
| 108 |
+
train:
|
| 109 |
+
ema_decay: 0.999
|
| 110 |
+
optim_g:
|
| 111 |
+
type: Adam
|
| 112 |
+
lr: !!float 1e-4
|
| 113 |
+
weight_decay: 0
|
| 114 |
+
betas: [0.9, 0.99]
|
| 115 |
+
optim_d:
|
| 116 |
+
type: Adam
|
| 117 |
+
lr: !!float 1e-4
|
| 118 |
+
weight_decay: 0
|
| 119 |
+
betas: [0.9, 0.99]
|
| 120 |
+
|
| 121 |
+
scheduler:
|
| 122 |
+
type: MultiStepLR
|
| 123 |
+
milestones: [400000]
|
| 124 |
+
gamma: 0.5
|
| 125 |
+
|
| 126 |
+
total_iter: 400000
|
| 127 |
+
warmup_iter: -1 # no warm up
|
| 128 |
+
|
| 129 |
+
# losses
|
| 130 |
+
pixel_opt:
|
| 131 |
+
type: L1Loss
|
| 132 |
+
loss_weight: 1.0
|
| 133 |
+
reduction: mean
|
| 134 |
+
# perceptual loss (content and style losses)
|
| 135 |
+
perceptual_opt:
|
| 136 |
+
type: PerceptualLoss
|
| 137 |
+
layer_weights:
|
| 138 |
+
# before relu
|
| 139 |
+
'conv1_2': 0.1
|
| 140 |
+
'conv2_2': 0.1
|
| 141 |
+
'conv3_4': 1
|
| 142 |
+
'conv4_4': 1
|
| 143 |
+
'conv5_4': 1
|
| 144 |
+
vgg_type: vgg19
|
| 145 |
+
use_input_norm: true
|
| 146 |
+
perceptual_weight: !!float 1.0
|
| 147 |
+
style_weight: 0
|
| 148 |
+
range_norm: false
|
| 149 |
+
criterion: l1
|
| 150 |
+
# gan loss
|
| 151 |
+
gan_opt:
|
| 152 |
+
type: GANLoss
|
| 153 |
+
gan_type: vanilla
|
| 154 |
+
real_label_val: 1.0
|
| 155 |
+
fake_label_val: 0.0
|
| 156 |
+
loss_weight: !!float 1e-1
|
| 157 |
+
|
| 158 |
+
net_d_iters: 1
|
| 159 |
+
net_d_init_iters: 0
|
| 160 |
+
|
| 161 |
+
# Uncomment these for validation
|
| 162 |
+
# validation settings
|
| 163 |
+
# val:
|
| 164 |
+
# val_freq: !!float 5e3
|
| 165 |
+
# save_img: True
|
| 166 |
+
|
| 167 |
+
# metrics:
|
| 168 |
+
# psnr: # metric name
|
| 169 |
+
# type: calculate_psnr
|
| 170 |
+
# crop_border: 4
|
| 171 |
+
# test_y_channel: false
|
| 172 |
+
|
| 173 |
+
# logging settings
|
| 174 |
+
logger:
|
| 175 |
+
print_freq: 100
|
| 176 |
+
save_checkpoint_freq: !!float 5e3
|
| 177 |
+
use_tb_logger: true
|
| 178 |
+
wandb:
|
| 179 |
+
project: ~
|
| 180 |
+
resume_id: ~
|
| 181 |
+
|
| 182 |
+
# dist training settings
|
| 183 |
+
dist_params:
|
| 184 |
+
backend: nccl
|
| 185 |
+
port: 29500
|
Real-ESRGAN/options/train_realesrnet_x2plus.yml
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: train_RealESRNetx2plus_1000k_B12G4
|
| 3 |
+
model_type: RealESRNetModel
|
| 4 |
+
scale: 2
|
| 5 |
+
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
|
| 9 |
+
gt_usm: True # USM the ground-truth
|
| 10 |
+
|
| 11 |
+
# the first degradation process
|
| 12 |
+
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
|
| 13 |
+
resize_range: [0.15, 1.5]
|
| 14 |
+
gaussian_noise_prob: 0.5
|
| 15 |
+
noise_range: [1, 30]
|
| 16 |
+
poisson_scale_range: [0.05, 3]
|
| 17 |
+
gray_noise_prob: 0.4
|
| 18 |
+
jpeg_range: [30, 95]
|
| 19 |
+
|
| 20 |
+
# the second degradation process
|
| 21 |
+
second_blur_prob: 0.8
|
| 22 |
+
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
|
| 23 |
+
resize_range2: [0.3, 1.2]
|
| 24 |
+
gaussian_noise_prob2: 0.5
|
| 25 |
+
noise_range2: [1, 25]
|
| 26 |
+
poisson_scale_range2: [0.05, 2.5]
|
| 27 |
+
gray_noise_prob2: 0.4
|
| 28 |
+
jpeg_range2: [30, 95]
|
| 29 |
+
|
| 30 |
+
gt_size: 256
|
| 31 |
+
queue_size: 180
|
| 32 |
+
|
| 33 |
+
# dataset and data loader settings
|
| 34 |
+
datasets:
|
| 35 |
+
train:
|
| 36 |
+
name: DF2K+OST
|
| 37 |
+
type: RealESRGANDataset
|
| 38 |
+
dataroot_gt: datasets/DF2K
|
| 39 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
|
| 40 |
+
io_backend:
|
| 41 |
+
type: disk
|
| 42 |
+
|
| 43 |
+
blur_kernel_size: 21
|
| 44 |
+
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 45 |
+
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 46 |
+
sinc_prob: 0.1
|
| 47 |
+
blur_sigma: [0.2, 3]
|
| 48 |
+
betag_range: [0.5, 4]
|
| 49 |
+
betap_range: [1, 2]
|
| 50 |
+
|
| 51 |
+
blur_kernel_size2: 21
|
| 52 |
+
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 53 |
+
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 54 |
+
sinc_prob2: 0.1
|
| 55 |
+
blur_sigma2: [0.2, 1.5]
|
| 56 |
+
betag_range2: [0.5, 4]
|
| 57 |
+
betap_range2: [1, 2]
|
| 58 |
+
|
| 59 |
+
final_sinc_prob: 0.8
|
| 60 |
+
|
| 61 |
+
gt_size: 256
|
| 62 |
+
use_hflip: True
|
| 63 |
+
use_rot: False
|
| 64 |
+
|
| 65 |
+
# data loader
|
| 66 |
+
use_shuffle: true
|
| 67 |
+
num_worker_per_gpu: 5
|
| 68 |
+
batch_size_per_gpu: 12
|
| 69 |
+
dataset_enlarge_ratio: 1
|
| 70 |
+
prefetch_mode: ~
|
| 71 |
+
|
| 72 |
+
# Uncomment these for validation
|
| 73 |
+
# val:
|
| 74 |
+
# name: validation
|
| 75 |
+
# type: PairedImageDataset
|
| 76 |
+
# dataroot_gt: path_to_gt
|
| 77 |
+
# dataroot_lq: path_to_lq
|
| 78 |
+
# io_backend:
|
| 79 |
+
# type: disk
|
| 80 |
+
|
| 81 |
+
# network structures
|
| 82 |
+
network_g:
|
| 83 |
+
type: RRDBNet
|
| 84 |
+
num_in_ch: 3
|
| 85 |
+
num_out_ch: 3
|
| 86 |
+
num_feat: 64
|
| 87 |
+
num_block: 23
|
| 88 |
+
num_grow_ch: 32
|
| 89 |
+
scale: 2
|
| 90 |
+
|
| 91 |
+
# path
|
| 92 |
+
path:
|
| 93 |
+
pretrain_network_g: experiments/pretrained_models/RealESRGAN_x4plus.pth
|
| 94 |
+
param_key_g: params_ema
|
| 95 |
+
strict_load_g: False
|
| 96 |
+
resume_state: ~
|
| 97 |
+
|
| 98 |
+
# training settings
|
| 99 |
+
train:
|
| 100 |
+
ema_decay: 0.999
|
| 101 |
+
optim_g:
|
| 102 |
+
type: Adam
|
| 103 |
+
lr: !!float 2e-4
|
| 104 |
+
weight_decay: 0
|
| 105 |
+
betas: [0.9, 0.99]
|
| 106 |
+
|
| 107 |
+
scheduler:
|
| 108 |
+
type: MultiStepLR
|
| 109 |
+
milestones: [1000000]
|
| 110 |
+
gamma: 0.5
|
| 111 |
+
|
| 112 |
+
total_iter: 1000000
|
| 113 |
+
warmup_iter: -1 # no warm up
|
| 114 |
+
|
| 115 |
+
# losses
|
| 116 |
+
pixel_opt:
|
| 117 |
+
type: L1Loss
|
| 118 |
+
loss_weight: 1.0
|
| 119 |
+
reduction: mean
|
| 120 |
+
|
| 121 |
+
# Uncomment these for validation
|
| 122 |
+
# validation settings
|
| 123 |
+
# val:
|
| 124 |
+
# val_freq: !!float 5e3
|
| 125 |
+
# save_img: True
|
| 126 |
+
|
| 127 |
+
# metrics:
|
| 128 |
+
# psnr: # metric name
|
| 129 |
+
# type: calculate_psnr
|
| 130 |
+
# crop_border: 4
|
| 131 |
+
# test_y_channel: false
|
| 132 |
+
|
| 133 |
+
# logging settings
|
| 134 |
+
logger:
|
| 135 |
+
print_freq: 100
|
| 136 |
+
save_checkpoint_freq: !!float 5e3
|
| 137 |
+
use_tb_logger: true
|
| 138 |
+
wandb:
|
| 139 |
+
project: ~
|
| 140 |
+
resume_id: ~
|
| 141 |
+
|
| 142 |
+
# dist training settings
|
| 143 |
+
dist_params:
|
| 144 |
+
backend: nccl
|
| 145 |
+
port: 29500
|
Real-ESRGAN/options/train_realesrnet_x4plus.yml
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# general settings
|
| 2 |
+
name: train_RealESRNetx4plus_1000k_B12G4
|
| 3 |
+
model_type: RealESRNetModel
|
| 4 |
+
scale: 4
|
| 5 |
+
num_gpu: auto # auto: can infer from your visible devices automatically. official: 4 GPUs
|
| 6 |
+
manual_seed: 0
|
| 7 |
+
|
| 8 |
+
# ----------------- options for synthesizing training data in RealESRNetModel ----------------- #
|
| 9 |
+
gt_usm: True # USM the ground-truth
|
| 10 |
+
|
| 11 |
+
# the first degradation process
|
| 12 |
+
resize_prob: [0.2, 0.7, 0.1] # up, down, keep
|
| 13 |
+
resize_range: [0.15, 1.5]
|
| 14 |
+
gaussian_noise_prob: 0.5
|
| 15 |
+
noise_range: [1, 30]
|
| 16 |
+
poisson_scale_range: [0.05, 3]
|
| 17 |
+
gray_noise_prob: 0.4
|
| 18 |
+
jpeg_range: [30, 95]
|
| 19 |
+
|
| 20 |
+
# the second degradation process
|
| 21 |
+
second_blur_prob: 0.8
|
| 22 |
+
resize_prob2: [0.3, 0.4, 0.3] # up, down, keep
|
| 23 |
+
resize_range2: [0.3, 1.2]
|
| 24 |
+
gaussian_noise_prob2: 0.5
|
| 25 |
+
noise_range2: [1, 25]
|
| 26 |
+
poisson_scale_range2: [0.05, 2.5]
|
| 27 |
+
gray_noise_prob2: 0.4
|
| 28 |
+
jpeg_range2: [30, 95]
|
| 29 |
+
|
| 30 |
+
gt_size: 256
|
| 31 |
+
queue_size: 180
|
| 32 |
+
|
| 33 |
+
# dataset and data loader settings
|
| 34 |
+
datasets:
|
| 35 |
+
train:
|
| 36 |
+
name: DF2K+OST
|
| 37 |
+
type: RealESRGANDataset
|
| 38 |
+
dataroot_gt: datasets/DF2K
|
| 39 |
+
meta_info: datasets/DF2K/meta_info/meta_info_DF2Kmultiscale+OST_sub.txt
|
| 40 |
+
io_backend:
|
| 41 |
+
type: disk
|
| 42 |
+
|
| 43 |
+
blur_kernel_size: 21
|
| 44 |
+
kernel_list: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 45 |
+
kernel_prob: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 46 |
+
sinc_prob: 0.1
|
| 47 |
+
blur_sigma: [0.2, 3]
|
| 48 |
+
betag_range: [0.5, 4]
|
| 49 |
+
betap_range: [1, 2]
|
| 50 |
+
|
| 51 |
+
blur_kernel_size2: 21
|
| 52 |
+
kernel_list2: ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
|
| 53 |
+
kernel_prob2: [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
|
| 54 |
+
sinc_prob2: 0.1
|
| 55 |
+
blur_sigma2: [0.2, 1.5]
|
| 56 |
+
betag_range2: [0.5, 4]
|
| 57 |
+
betap_range2: [1, 2]
|
| 58 |
+
|
| 59 |
+
final_sinc_prob: 0.8
|
| 60 |
+
|
| 61 |
+
gt_size: 256
|
| 62 |
+
use_hflip: True
|
| 63 |
+
use_rot: False
|
| 64 |
+
|
| 65 |
+
# data loader
|
| 66 |
+
use_shuffle: true
|
| 67 |
+
num_worker_per_gpu: 5
|
| 68 |
+
batch_size_per_gpu: 12
|
| 69 |
+
dataset_enlarge_ratio: 1
|
| 70 |
+
prefetch_mode: ~
|
| 71 |
+
|
| 72 |
+
# Uncomment these for validation
|
| 73 |
+
# val:
|
| 74 |
+
# name: validation
|
| 75 |
+
# type: PairedImageDataset
|
| 76 |
+
# dataroot_gt: path_to_gt
|
| 77 |
+
# dataroot_lq: path_to_lq
|
| 78 |
+
# io_backend:
|
| 79 |
+
# type: disk
|
| 80 |
+
|
| 81 |
+
# network structures
|
| 82 |
+
network_g:
|
| 83 |
+
type: RRDBNet
|
| 84 |
+
num_in_ch: 3
|
| 85 |
+
num_out_ch: 3
|
| 86 |
+
num_feat: 64
|
| 87 |
+
num_block: 23
|
| 88 |
+
num_grow_ch: 32
|
| 89 |
+
|
| 90 |
+
# path
|
| 91 |
+
path:
|
| 92 |
+
pretrain_network_g: experiments/pretrained_models/ESRGAN_SRx4_DF2KOST_official-ff704c30.pth
|
| 93 |
+
param_key_g: params_ema
|
| 94 |
+
strict_load_g: true
|
| 95 |
+
resume_state: ~
|
| 96 |
+
|
| 97 |
+
# training settings
|
| 98 |
+
train:
|
| 99 |
+
ema_decay: 0.999
|
| 100 |
+
optim_g:
|
| 101 |
+
type: Adam
|
| 102 |
+
lr: !!float 2e-4
|
| 103 |
+
weight_decay: 0
|
| 104 |
+
betas: [0.9, 0.99]
|
| 105 |
+
|
| 106 |
+
scheduler:
|
| 107 |
+
type: MultiStepLR
|
| 108 |
+
milestones: [1000000]
|
| 109 |
+
gamma: 0.5
|
| 110 |
+
|
| 111 |
+
total_iter: 1000000
|
| 112 |
+
warmup_iter: -1 # no warm up
|
| 113 |
+
|
| 114 |
+
# losses
|
| 115 |
+
pixel_opt:
|
| 116 |
+
type: L1Loss
|
| 117 |
+
loss_weight: 1.0
|
| 118 |
+
reduction: mean
|
| 119 |
+
|
| 120 |
+
# Uncomment these for validation
|
| 121 |
+
# validation settings
|
| 122 |
+
# val:
|
| 123 |
+
# val_freq: !!float 5e3
|
| 124 |
+
# save_img: True
|
| 125 |
+
|
| 126 |
+
# metrics:
|
| 127 |
+
# psnr: # metric name
|
| 128 |
+
# type: calculate_psnr
|
| 129 |
+
# crop_border: 4
|
| 130 |
+
# test_y_channel: false
|
| 131 |
+
|
| 132 |
+
# logging settings
|
| 133 |
+
logger:
|
| 134 |
+
print_freq: 100
|
| 135 |
+
save_checkpoint_freq: !!float 5e3
|
| 136 |
+
use_tb_logger: true
|
| 137 |
+
wandb:
|
| 138 |
+
project: ~
|
| 139 |
+
resume_id: ~
|
| 140 |
+
|
| 141 |
+
# dist training settings
|
| 142 |
+
dist_params:
|
| 143 |
+
backend: nccl
|
| 144 |
+
port: 29500
|
Real-ESRGAN/realesrgan/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# flake8: noqa
|
| 2 |
+
from .archs import *
|
| 3 |
+
from .data import *
|
| 4 |
+
from .models import *
|
| 5 |
+
from .utils import *
|
| 6 |
+
from .version import *
|
Real-ESRGAN/realesrgan/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (327 Bytes). View file
|
|
|
Real-ESRGAN/realesrgan/__pycache__/utils.cpython-311.pyc
ADDED
|
Binary file (17.9 kB). View file
|
|
|
Real-ESRGAN/realesrgan/__pycache__/version.cpython-311.pyc
ADDED
|
Binary file (278 Bytes). View file
|
|
|
Real-ESRGAN/realesrgan/archs/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
from basicsr.utils import scandir
|
| 3 |
+
from os import path as osp
|
| 4 |
+
|
| 5 |
+
# automatically scan and import arch modules for registry
|
| 6 |
+
# scan all the files that end with '_arch.py' under the archs folder
|
| 7 |
+
arch_folder = osp.dirname(osp.abspath(__file__))
|
| 8 |
+
arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
|
| 9 |
+
# import all the arch modules
|
| 10 |
+
_arch_modules = [importlib.import_module(f'realesrgan.archs.{file_name}') for file_name in arch_filenames]
|
Real-ESRGAN/realesrgan/archs/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.18 kB). View file
|
|
|
Real-ESRGAN/realesrgan/archs/__pycache__/discriminator_arch.cpython-311.pyc
ADDED
|
Binary file (4.89 kB). View file
|
|
|
Real-ESRGAN/realesrgan/archs/__pycache__/srvgg_arch.cpython-311.pyc
ADDED
|
Binary file (4.1 kB). View file
|
|
|
Real-ESRGAN/realesrgan/archs/discriminator_arch.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from basicsr.utils.registry import ARCH_REGISTRY
|
| 2 |
+
from torch import nn as nn
|
| 3 |
+
from torch.nn import functional as F
|
| 4 |
+
from torch.nn.utils import spectral_norm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@ARCH_REGISTRY.register()
|
| 8 |
+
class UNetDiscriminatorSN(nn.Module):
|
| 9 |
+
"""Defines a U-Net discriminator with spectral normalization (SN)
|
| 10 |
+
|
| 11 |
+
It is used in Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
|
| 12 |
+
|
| 13 |
+
Arg:
|
| 14 |
+
num_in_ch (int): Channel number of inputs. Default: 3.
|
| 15 |
+
num_feat (int): Channel number of base intermediate features. Default: 64.
|
| 16 |
+
skip_connection (bool): Whether to use skip connections between U-Net. Default: True.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, num_in_ch, num_feat=64, skip_connection=True):
|
| 20 |
+
super(UNetDiscriminatorSN, self).__init__()
|
| 21 |
+
self.skip_connection = skip_connection
|
| 22 |
+
norm = spectral_norm
|
| 23 |
+
# the first convolution
|
| 24 |
+
self.conv0 = nn.Conv2d(num_in_ch, num_feat, kernel_size=3, stride=1, padding=1)
|
| 25 |
+
# downsample
|
| 26 |
+
self.conv1 = norm(nn.Conv2d(num_feat, num_feat * 2, 4, 2, 1, bias=False))
|
| 27 |
+
self.conv2 = norm(nn.Conv2d(num_feat * 2, num_feat * 4, 4, 2, 1, bias=False))
|
| 28 |
+
self.conv3 = norm(nn.Conv2d(num_feat * 4, num_feat * 8, 4, 2, 1, bias=False))
|
| 29 |
+
# upsample
|
| 30 |
+
self.conv4 = norm(nn.Conv2d(num_feat * 8, num_feat * 4, 3, 1, 1, bias=False))
|
| 31 |
+
self.conv5 = norm(nn.Conv2d(num_feat * 4, num_feat * 2, 3, 1, 1, bias=False))
|
| 32 |
+
self.conv6 = norm(nn.Conv2d(num_feat * 2, num_feat, 3, 1, 1, bias=False))
|
| 33 |
+
# extra convolutions
|
| 34 |
+
self.conv7 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
|
| 35 |
+
self.conv8 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
|
| 36 |
+
self.conv9 = nn.Conv2d(num_feat, 1, 3, 1, 1)
|
| 37 |
+
|
| 38 |
+
def forward(self, x):
|
| 39 |
+
# downsample
|
| 40 |
+
x0 = F.leaky_relu(self.conv0(x), negative_slope=0.2, inplace=True)
|
| 41 |
+
x1 = F.leaky_relu(self.conv1(x0), negative_slope=0.2, inplace=True)
|
| 42 |
+
x2 = F.leaky_relu(self.conv2(x1), negative_slope=0.2, inplace=True)
|
| 43 |
+
x3 = F.leaky_relu(self.conv3(x2), negative_slope=0.2, inplace=True)
|
| 44 |
+
|
| 45 |
+
# upsample
|
| 46 |
+
x3 = F.interpolate(x3, scale_factor=2, mode='bilinear', align_corners=False)
|
| 47 |
+
x4 = F.leaky_relu(self.conv4(x3), negative_slope=0.2, inplace=True)
|
| 48 |
+
|
| 49 |
+
if self.skip_connection:
|
| 50 |
+
x4 = x4 + x2
|
| 51 |
+
x4 = F.interpolate(x4, scale_factor=2, mode='bilinear', align_corners=False)
|
| 52 |
+
x5 = F.leaky_relu(self.conv5(x4), negative_slope=0.2, inplace=True)
|
| 53 |
+
|
| 54 |
+
if self.skip_connection:
|
| 55 |
+
x5 = x5 + x1
|
| 56 |
+
x5 = F.interpolate(x5, scale_factor=2, mode='bilinear', align_corners=False)
|
| 57 |
+
x6 = F.leaky_relu(self.conv6(x5), negative_slope=0.2, inplace=True)
|
| 58 |
+
|
| 59 |
+
if self.skip_connection:
|
| 60 |
+
x6 = x6 + x0
|
| 61 |
+
|
| 62 |
+
# extra convolutions
|
| 63 |
+
out = F.leaky_relu(self.conv7(x6), negative_slope=0.2, inplace=True)
|
| 64 |
+
out = F.leaky_relu(self.conv8(out), negative_slope=0.2, inplace=True)
|
| 65 |
+
out = self.conv9(out)
|
| 66 |
+
|
| 67 |
+
return out
|
Real-ESRGAN/realesrgan/archs/srvgg_arch.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from basicsr.utils.registry import ARCH_REGISTRY
|
| 2 |
+
from torch import nn as nn
|
| 3 |
+
from torch.nn import functional as F
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@ARCH_REGISTRY.register()
|
| 7 |
+
class SRVGGNetCompact(nn.Module):
|
| 8 |
+
"""A compact VGG-style network structure for super-resolution.
|
| 9 |
+
|
| 10 |
+
It is a compact network structure, which performs upsampling in the last layer and no convolution is
|
| 11 |
+
conducted on the HR feature space.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
num_in_ch (int): Channel number of inputs. Default: 3.
|
| 15 |
+
num_out_ch (int): Channel number of outputs. Default: 3.
|
| 16 |
+
num_feat (int): Channel number of intermediate features. Default: 64.
|
| 17 |
+
num_conv (int): Number of convolution layers in the body network. Default: 16.
|
| 18 |
+
upscale (int): Upsampling factor. Default: 4.
|
| 19 |
+
act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'):
|
| 23 |
+
super(SRVGGNetCompact, self).__init__()
|
| 24 |
+
self.num_in_ch = num_in_ch
|
| 25 |
+
self.num_out_ch = num_out_ch
|
| 26 |
+
self.num_feat = num_feat
|
| 27 |
+
self.num_conv = num_conv
|
| 28 |
+
self.upscale = upscale
|
| 29 |
+
self.act_type = act_type
|
| 30 |
+
|
| 31 |
+
self.body = nn.ModuleList()
|
| 32 |
+
# the first conv
|
| 33 |
+
self.body.append(nn.Conv2d(num_in_ch, num_feat, 3, 1, 1))
|
| 34 |
+
# the first activation
|
| 35 |
+
if act_type == 'relu':
|
| 36 |
+
activation = nn.ReLU(inplace=True)
|
| 37 |
+
elif act_type == 'prelu':
|
| 38 |
+
activation = nn.PReLU(num_parameters=num_feat)
|
| 39 |
+
elif act_type == 'leakyrelu':
|
| 40 |
+
activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
|
| 41 |
+
self.body.append(activation)
|
| 42 |
+
|
| 43 |
+
# the body structure
|
| 44 |
+
for _ in range(num_conv):
|
| 45 |
+
self.body.append(nn.Conv2d(num_feat, num_feat, 3, 1, 1))
|
| 46 |
+
# activation
|
| 47 |
+
if act_type == 'relu':
|
| 48 |
+
activation = nn.ReLU(inplace=True)
|
| 49 |
+
elif act_type == 'prelu':
|
| 50 |
+
activation = nn.PReLU(num_parameters=num_feat)
|
| 51 |
+
elif act_type == 'leakyrelu':
|
| 52 |
+
activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
|
| 53 |
+
self.body.append(activation)
|
| 54 |
+
|
| 55 |
+
# the last conv
|
| 56 |
+
self.body.append(nn.Conv2d(num_feat, num_out_ch * upscale * upscale, 3, 1, 1))
|
| 57 |
+
# upsample
|
| 58 |
+
self.upsampler = nn.PixelShuffle(upscale)
|
| 59 |
+
|
| 60 |
+
def forward(self, x):
|
| 61 |
+
out = x
|
| 62 |
+
for i in range(0, len(self.body)):
|
| 63 |
+
out = self.body[i](out)
|
| 64 |
+
|
| 65 |
+
out = self.upsampler(out)
|
| 66 |
+
# add the nearest upsampled image, so that the network learns the residual
|
| 67 |
+
base = F.interpolate(x, scale_factor=self.upscale, mode='nearest')
|
| 68 |
+
out += base
|
| 69 |
+
return out
|
Real-ESRGAN/realesrgan/data/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
from basicsr.utils import scandir
|
| 3 |
+
from os import path as osp
|
| 4 |
+
|
| 5 |
+
# automatically scan and import dataset modules for registry
|
| 6 |
+
# scan all the files that end with '_dataset.py' under the data folder
|
| 7 |
+
data_folder = osp.dirname(osp.abspath(__file__))
|
| 8 |
+
dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
|
| 9 |
+
# import all the dataset modules
|
| 10 |
+
_dataset_modules = [importlib.import_module(f'realesrgan.data.{file_name}') for file_name in dataset_filenames]
|
Real-ESRGAN/realesrgan/data/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.19 kB). View file
|
|
|
Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_dataset.cpython-311.pyc
ADDED
|
Binary file (11.4 kB). View file
|
|
|
Real-ESRGAN/realesrgan/data/__pycache__/realesrgan_paired_dataset.cpython-311.pyc
ADDED
|
Binary file (6.67 kB). View file
|
|
|
Real-ESRGAN/realesrgan/data/realesrgan_dataset.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import math
|
| 3 |
+
import numpy as np
|
| 4 |
+
import os
|
| 5 |
+
import os.path as osp
|
| 6 |
+
import random
|
| 7 |
+
import time
|
| 8 |
+
import torch
|
| 9 |
+
from basicsr.data.degradations import circular_lowpass_kernel, random_mixed_kernels
|
| 10 |
+
from basicsr.data.transforms import augment
|
| 11 |
+
from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
|
| 12 |
+
from basicsr.utils.registry import DATASET_REGISTRY
|
| 13 |
+
from torch.utils import data as data
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@DATASET_REGISTRY.register()
|
| 17 |
+
class RealESRGANDataset(data.Dataset):
|
| 18 |
+
"""Dataset used for Real-ESRGAN model:
|
| 19 |
+
Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
|
| 20 |
+
|
| 21 |
+
It loads gt (Ground-Truth) images, and augments them.
|
| 22 |
+
It also generates blur kernels and sinc kernels for generating low-quality images.
|
| 23 |
+
Note that the low-quality images are processed in tensors on GPUS for faster processing.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
opt (dict): Config for train datasets. It contains the following keys:
|
| 27 |
+
dataroot_gt (str): Data root path for gt.
|
| 28 |
+
meta_info (str): Path for meta information file.
|
| 29 |
+
io_backend (dict): IO backend type and other kwarg.
|
| 30 |
+
use_hflip (bool): Use horizontal flips.
|
| 31 |
+
use_rot (bool): Use rotation (use vertical flip and transposing h and w for implementation).
|
| 32 |
+
Please see more options in the codes.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, opt):
|
| 36 |
+
super(RealESRGANDataset, self).__init__()
|
| 37 |
+
self.opt = opt
|
| 38 |
+
self.file_client = None
|
| 39 |
+
self.io_backend_opt = opt['io_backend']
|
| 40 |
+
self.gt_folder = opt['dataroot_gt']
|
| 41 |
+
|
| 42 |
+
# file client (lmdb io backend)
|
| 43 |
+
if self.io_backend_opt['type'] == 'lmdb':
|
| 44 |
+
self.io_backend_opt['db_paths'] = [self.gt_folder]
|
| 45 |
+
self.io_backend_opt['client_keys'] = ['gt']
|
| 46 |
+
if not self.gt_folder.endswith('.lmdb'):
|
| 47 |
+
raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}")
|
| 48 |
+
with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
|
| 49 |
+
self.paths = [line.split('.')[0] for line in fin]
|
| 50 |
+
else:
|
| 51 |
+
# disk backend with meta_info
|
| 52 |
+
# Each line in the meta_info describes the relative path to an image
|
| 53 |
+
with open(self.opt['meta_info']) as fin:
|
| 54 |
+
paths = [line.strip().split(' ')[0] for line in fin]
|
| 55 |
+
self.paths = [os.path.join(self.gt_folder, v) for v in paths]
|
| 56 |
+
|
| 57 |
+
# blur settings for the first degradation
|
| 58 |
+
self.blur_kernel_size = opt['blur_kernel_size']
|
| 59 |
+
self.kernel_list = opt['kernel_list']
|
| 60 |
+
self.kernel_prob = opt['kernel_prob'] # a list for each kernel probability
|
| 61 |
+
self.blur_sigma = opt['blur_sigma']
|
| 62 |
+
self.betag_range = opt['betag_range'] # betag used in generalized Gaussian blur kernels
|
| 63 |
+
self.betap_range = opt['betap_range'] # betap used in plateau blur kernels
|
| 64 |
+
self.sinc_prob = opt['sinc_prob'] # the probability for sinc filters
|
| 65 |
+
|
| 66 |
+
# blur settings for the second degradation
|
| 67 |
+
self.blur_kernel_size2 = opt['blur_kernel_size2']
|
| 68 |
+
self.kernel_list2 = opt['kernel_list2']
|
| 69 |
+
self.kernel_prob2 = opt['kernel_prob2']
|
| 70 |
+
self.blur_sigma2 = opt['blur_sigma2']
|
| 71 |
+
self.betag_range2 = opt['betag_range2']
|
| 72 |
+
self.betap_range2 = opt['betap_range2']
|
| 73 |
+
self.sinc_prob2 = opt['sinc_prob2']
|
| 74 |
+
|
| 75 |
+
# a final sinc filter
|
| 76 |
+
self.final_sinc_prob = opt['final_sinc_prob']
|
| 77 |
+
|
| 78 |
+
self.kernel_range = [2 * v + 1 for v in range(3, 11)] # kernel size ranges from 7 to 21
|
| 79 |
+
# TODO: kernel range is now hard-coded, should be in the configure file
|
| 80 |
+
self.pulse_tensor = torch.zeros(21, 21).float() # convolving with pulse tensor brings no blurry effect
|
| 81 |
+
self.pulse_tensor[10, 10] = 1
|
| 82 |
+
|
| 83 |
+
def __getitem__(self, index):
|
| 84 |
+
if self.file_client is None:
|
| 85 |
+
self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
|
| 86 |
+
|
| 87 |
+
# -------------------------------- Load gt images -------------------------------- #
|
| 88 |
+
# Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
|
| 89 |
+
gt_path = self.paths[index]
|
| 90 |
+
# avoid errors caused by high latency in reading files
|
| 91 |
+
retry = 3
|
| 92 |
+
while retry > 0:
|
| 93 |
+
try:
|
| 94 |
+
img_bytes = self.file_client.get(gt_path, 'gt')
|
| 95 |
+
except (IOError, OSError) as e:
|
| 96 |
+
logger = get_root_logger()
|
| 97 |
+
logger.warn(f'File client error: {e}, remaining retry times: {retry - 1}')
|
| 98 |
+
# change another file to read
|
| 99 |
+
index = random.randint(0, self.__len__())
|
| 100 |
+
gt_path = self.paths[index]
|
| 101 |
+
time.sleep(1) # sleep 1s for occasional server congestion
|
| 102 |
+
else:
|
| 103 |
+
break
|
| 104 |
+
finally:
|
| 105 |
+
retry -= 1
|
| 106 |
+
img_gt = imfrombytes(img_bytes, float32=True)
|
| 107 |
+
|
| 108 |
+
# -------------------- Do augmentation for training: flip, rotation -------------------- #
|
| 109 |
+
img_gt = augment(img_gt, self.opt['use_hflip'], self.opt['use_rot'])
|
| 110 |
+
|
| 111 |
+
# crop or pad to 400
|
| 112 |
+
# TODO: 400 is hard-coded. You may change it accordingly
|
| 113 |
+
h, w = img_gt.shape[0:2]
|
| 114 |
+
crop_pad_size = 400
|
| 115 |
+
# pad
|
| 116 |
+
if h < crop_pad_size or w < crop_pad_size:
|
| 117 |
+
pad_h = max(0, crop_pad_size - h)
|
| 118 |
+
pad_w = max(0, crop_pad_size - w)
|
| 119 |
+
img_gt = cv2.copyMakeBorder(img_gt, 0, pad_h, 0, pad_w, cv2.BORDER_REFLECT_101)
|
| 120 |
+
# crop
|
| 121 |
+
if img_gt.shape[0] > crop_pad_size or img_gt.shape[1] > crop_pad_size:
|
| 122 |
+
h, w = img_gt.shape[0:2]
|
| 123 |
+
# randomly choose top and left coordinates
|
| 124 |
+
top = random.randint(0, h - crop_pad_size)
|
| 125 |
+
left = random.randint(0, w - crop_pad_size)
|
| 126 |
+
img_gt = img_gt[top:top + crop_pad_size, left:left + crop_pad_size, ...]
|
| 127 |
+
|
| 128 |
+
# ------------------------ Generate kernels (used in the first degradation) ------------------------ #
|
| 129 |
+
kernel_size = random.choice(self.kernel_range)
|
| 130 |
+
if np.random.uniform() < self.opt['sinc_prob']:
|
| 131 |
+
# this sinc filter setting is for kernels ranging from [7, 21]
|
| 132 |
+
if kernel_size < 13:
|
| 133 |
+
omega_c = np.random.uniform(np.pi / 3, np.pi)
|
| 134 |
+
else:
|
| 135 |
+
omega_c = np.random.uniform(np.pi / 5, np.pi)
|
| 136 |
+
kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
|
| 137 |
+
else:
|
| 138 |
+
kernel = random_mixed_kernels(
|
| 139 |
+
self.kernel_list,
|
| 140 |
+
self.kernel_prob,
|
| 141 |
+
kernel_size,
|
| 142 |
+
self.blur_sigma,
|
| 143 |
+
self.blur_sigma, [-math.pi, math.pi],
|
| 144 |
+
self.betag_range,
|
| 145 |
+
self.betap_range,
|
| 146 |
+
noise_range=None)
|
| 147 |
+
# pad kernel
|
| 148 |
+
pad_size = (21 - kernel_size) // 2
|
| 149 |
+
kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
|
| 150 |
+
|
| 151 |
+
# ------------------------ Generate kernels (used in the second degradation) ------------------------ #
|
| 152 |
+
kernel_size = random.choice(self.kernel_range)
|
| 153 |
+
if np.random.uniform() < self.opt['sinc_prob2']:
|
| 154 |
+
if kernel_size < 13:
|
| 155 |
+
omega_c = np.random.uniform(np.pi / 3, np.pi)
|
| 156 |
+
else:
|
| 157 |
+
omega_c = np.random.uniform(np.pi / 5, np.pi)
|
| 158 |
+
kernel2 = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
|
| 159 |
+
else:
|
| 160 |
+
kernel2 = random_mixed_kernels(
|
| 161 |
+
self.kernel_list2,
|
| 162 |
+
self.kernel_prob2,
|
| 163 |
+
kernel_size,
|
| 164 |
+
self.blur_sigma2,
|
| 165 |
+
self.blur_sigma2, [-math.pi, math.pi],
|
| 166 |
+
self.betag_range2,
|
| 167 |
+
self.betap_range2,
|
| 168 |
+
noise_range=None)
|
| 169 |
+
|
| 170 |
+
# pad kernel
|
| 171 |
+
pad_size = (21 - kernel_size) // 2
|
| 172 |
+
kernel2 = np.pad(kernel2, ((pad_size, pad_size), (pad_size, pad_size)))
|
| 173 |
+
|
| 174 |
+
# ------------------------------------- the final sinc kernel ------------------------------------- #
|
| 175 |
+
if np.random.uniform() < self.opt['final_sinc_prob']:
|
| 176 |
+
kernel_size = random.choice(self.kernel_range)
|
| 177 |
+
omega_c = np.random.uniform(np.pi / 3, np.pi)
|
| 178 |
+
sinc_kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=21)
|
| 179 |
+
sinc_kernel = torch.FloatTensor(sinc_kernel)
|
| 180 |
+
else:
|
| 181 |
+
sinc_kernel = self.pulse_tensor
|
| 182 |
+
|
| 183 |
+
# BGR to RGB, HWC to CHW, numpy to tensor
|
| 184 |
+
img_gt = img2tensor([img_gt], bgr2rgb=True, float32=True)[0]
|
| 185 |
+
kernel = torch.FloatTensor(kernel)
|
| 186 |
+
kernel2 = torch.FloatTensor(kernel2)
|
| 187 |
+
|
| 188 |
+
return_d = {'gt': img_gt, 'kernel1': kernel, 'kernel2': kernel2, 'sinc_kernel': sinc_kernel, 'gt_path': gt_path}
|
| 189 |
+
return return_d
|
| 190 |
+
|
| 191 |
+
def __len__(self):
|
| 192 |
+
return len(self.paths)
|
Real-ESRGAN/realesrgan/data/realesrgan_paired_dataset.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from basicsr.data.data_util import paired_paths_from_folder, paired_paths_from_lmdb
|
| 3 |
+
from basicsr.data.transforms import augment, paired_random_crop
|
| 4 |
+
from basicsr.utils import FileClient, imfrombytes, img2tensor
|
| 5 |
+
from basicsr.utils.registry import DATASET_REGISTRY
|
| 6 |
+
from torch.utils import data as data
|
| 7 |
+
from torchvision.transforms.functional import normalize
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@DATASET_REGISTRY.register()
|
| 11 |
+
class RealESRGANPairedDataset(data.Dataset):
|
| 12 |
+
"""Paired image dataset for image restoration.
|
| 13 |
+
|
| 14 |
+
Read LQ (Low Quality, e.g. LR (Low Resolution), blurry, noisy, etc) and GT image pairs.
|
| 15 |
+
|
| 16 |
+
There are three modes:
|
| 17 |
+
1. 'lmdb': Use lmdb files.
|
| 18 |
+
If opt['io_backend'] == lmdb.
|
| 19 |
+
2. 'meta_info': Use meta information file to generate paths.
|
| 20 |
+
If opt['io_backend'] != lmdb and opt['meta_info'] is not None.
|
| 21 |
+
3. 'folder': Scan folders to generate paths.
|
| 22 |
+
The rest.
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
opt (dict): Config for train datasets. It contains the following keys:
|
| 26 |
+
dataroot_gt (str): Data root path for gt.
|
| 27 |
+
dataroot_lq (str): Data root path for lq.
|
| 28 |
+
meta_info (str): Path for meta information file.
|
| 29 |
+
io_backend (dict): IO backend type and other kwarg.
|
| 30 |
+
filename_tmpl (str): Template for each filename. Note that the template excludes the file extension.
|
| 31 |
+
Default: '{}'.
|
| 32 |
+
gt_size (int): Cropped patched size for gt patches.
|
| 33 |
+
use_hflip (bool): Use horizontal flips.
|
| 34 |
+
use_rot (bool): Use rotation (use vertical flip and transposing h
|
| 35 |
+
and w for implementation).
|
| 36 |
+
|
| 37 |
+
scale (bool): Scale, which will be added automatically.
|
| 38 |
+
phase (str): 'train' or 'val'.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
def __init__(self, opt):
|
| 42 |
+
super(RealESRGANPairedDataset, self).__init__()
|
| 43 |
+
self.opt = opt
|
| 44 |
+
self.file_client = None
|
| 45 |
+
self.io_backend_opt = opt['io_backend']
|
| 46 |
+
# mean and std for normalizing the input images
|
| 47 |
+
self.mean = opt['mean'] if 'mean' in opt else None
|
| 48 |
+
self.std = opt['std'] if 'std' in opt else None
|
| 49 |
+
|
| 50 |
+
self.gt_folder, self.lq_folder = opt['dataroot_gt'], opt['dataroot_lq']
|
| 51 |
+
self.filename_tmpl = opt['filename_tmpl'] if 'filename_tmpl' in opt else '{}'
|
| 52 |
+
|
| 53 |
+
# file client (lmdb io backend)
|
| 54 |
+
if self.io_backend_opt['type'] == 'lmdb':
|
| 55 |
+
self.io_backend_opt['db_paths'] = [self.lq_folder, self.gt_folder]
|
| 56 |
+
self.io_backend_opt['client_keys'] = ['lq', 'gt']
|
| 57 |
+
self.paths = paired_paths_from_lmdb([self.lq_folder, self.gt_folder], ['lq', 'gt'])
|
| 58 |
+
elif 'meta_info' in self.opt and self.opt['meta_info'] is not None:
|
| 59 |
+
# disk backend with meta_info
|
| 60 |
+
# Each line in the meta_info describes the relative path to an image
|
| 61 |
+
with open(self.opt['meta_info']) as fin:
|
| 62 |
+
paths = [line.strip() for line in fin]
|
| 63 |
+
self.paths = []
|
| 64 |
+
for path in paths:
|
| 65 |
+
gt_path, lq_path = path.split(', ')
|
| 66 |
+
gt_path = os.path.join(self.gt_folder, gt_path)
|
| 67 |
+
lq_path = os.path.join(self.lq_folder, lq_path)
|
| 68 |
+
self.paths.append(dict([('gt_path', gt_path), ('lq_path', lq_path)]))
|
| 69 |
+
else:
|
| 70 |
+
# disk backend
|
| 71 |
+
# it will scan the whole folder to get meta info
|
| 72 |
+
# it will be time-consuming for folders with too many files. It is recommended using an extra meta txt file
|
| 73 |
+
self.paths = paired_paths_from_folder([self.lq_folder, self.gt_folder], ['lq', 'gt'], self.filename_tmpl)
|
| 74 |
+
|
| 75 |
+
def __getitem__(self, index):
|
| 76 |
+
if self.file_client is None:
|
| 77 |
+
self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
|
| 78 |
+
|
| 79 |
+
scale = self.opt['scale']
|
| 80 |
+
|
| 81 |
+
# Load gt and lq images. Dimension order: HWC; channel order: BGR;
|
| 82 |
+
# image range: [0, 1], float32.
|
| 83 |
+
gt_path = self.paths[index]['gt_path']
|
| 84 |
+
img_bytes = self.file_client.get(gt_path, 'gt')
|
| 85 |
+
img_gt = imfrombytes(img_bytes, float32=True)
|
| 86 |
+
lq_path = self.paths[index]['lq_path']
|
| 87 |
+
img_bytes = self.file_client.get(lq_path, 'lq')
|
| 88 |
+
img_lq = imfrombytes(img_bytes, float32=True)
|
| 89 |
+
|
| 90 |
+
# augmentation for training
|
| 91 |
+
if self.opt['phase'] == 'train':
|
| 92 |
+
gt_size = self.opt['gt_size']
|
| 93 |
+
# random crop
|
| 94 |
+
img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path)
|
| 95 |
+
# flip, rotation
|
| 96 |
+
img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_hflip'], self.opt['use_rot'])
|
| 97 |
+
|
| 98 |
+
# BGR to RGB, HWC to CHW, numpy to tensor
|
| 99 |
+
img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)
|
| 100 |
+
# normalize
|
| 101 |
+
if self.mean is not None or self.std is not None:
|
| 102 |
+
normalize(img_lq, self.mean, self.std, inplace=True)
|
| 103 |
+
normalize(img_gt, self.mean, self.std, inplace=True)
|
| 104 |
+
|
| 105 |
+
return {'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path}
|
| 106 |
+
|
| 107 |
+
def __len__(self):
|
| 108 |
+
return len(self.paths)
|
Real-ESRGAN/realesrgan/models/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import importlib
|
| 2 |
+
from basicsr.utils import scandir
|
| 3 |
+
from os import path as osp
|
| 4 |
+
|
| 5 |
+
# automatically scan and import model modules for registry
|
| 6 |
+
# scan all the files that end with '_model.py' under the model folder
|
| 7 |
+
model_folder = osp.dirname(osp.abspath(__file__))
|
| 8 |
+
model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')]
|
| 9 |
+
# import all the model modules
|
| 10 |
+
_model_modules = [importlib.import_module(f'realesrgan.models.{file_name}') for file_name in model_filenames]
|
Real-ESRGAN/realesrgan/models/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (1.19 kB). View file
|
|
|
Real-ESRGAN/realesrgan/models/__pycache__/realesrgan_model.cpython-311.pyc
ADDED
|
Binary file (15.2 kB). View file
|
|
|
Real-ESRGAN/realesrgan/models/__pycache__/realesrnet_model.cpython-311.pyc
ADDED
|
Binary file (11.9 kB). View file
|
|
|
Real-ESRGAN/realesrgan/models/realesrgan_model.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import random
|
| 3 |
+
import torch
|
| 4 |
+
from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
|
| 5 |
+
from basicsr.data.transforms import paired_random_crop
|
| 6 |
+
from basicsr.models.srgan_model import SRGANModel
|
| 7 |
+
from basicsr.utils import DiffJPEG, USMSharp
|
| 8 |
+
from basicsr.utils.img_process_util import filter2D
|
| 9 |
+
from basicsr.utils.registry import MODEL_REGISTRY
|
| 10 |
+
from collections import OrderedDict
|
| 11 |
+
from torch.nn import functional as F
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@MODEL_REGISTRY.register()
|
| 15 |
+
class RealESRGANModel(SRGANModel):
|
| 16 |
+
"""RealESRGAN Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
|
| 17 |
+
|
| 18 |
+
It mainly performs:
|
| 19 |
+
1. randomly synthesize LQ images in GPU tensors
|
| 20 |
+
2. optimize the networks with GAN training.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, opt):
|
| 24 |
+
super(RealESRGANModel, self).__init__(opt)
|
| 25 |
+
self.jpeger = DiffJPEG(differentiable=False).cuda() # simulate JPEG compression artifacts
|
| 26 |
+
self.usm_sharpener = USMSharp().cuda() # do usm sharpening
|
| 27 |
+
self.queue_size = opt.get('queue_size', 180)
|
| 28 |
+
|
| 29 |
+
@torch.no_grad()
|
| 30 |
+
def _dequeue_and_enqueue(self):
|
| 31 |
+
"""It is the training pair pool for increasing the diversity in a batch.
|
| 32 |
+
|
| 33 |
+
Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
|
| 34 |
+
batch could not have different resize scaling factors. Therefore, we employ this training pair pool
|
| 35 |
+
to increase the degradation diversity in a batch.
|
| 36 |
+
"""
|
| 37 |
+
# initialize
|
| 38 |
+
b, c, h, w = self.lq.size()
|
| 39 |
+
if not hasattr(self, 'queue_lr'):
|
| 40 |
+
assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
|
| 41 |
+
self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
|
| 42 |
+
_, c, h, w = self.gt.size()
|
| 43 |
+
self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
|
| 44 |
+
self.queue_ptr = 0
|
| 45 |
+
if self.queue_ptr == self.queue_size: # the pool is full
|
| 46 |
+
# do dequeue and enqueue
|
| 47 |
+
# shuffle
|
| 48 |
+
idx = torch.randperm(self.queue_size)
|
| 49 |
+
self.queue_lr = self.queue_lr[idx]
|
| 50 |
+
self.queue_gt = self.queue_gt[idx]
|
| 51 |
+
# get first b samples
|
| 52 |
+
lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
|
| 53 |
+
gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
|
| 54 |
+
# update the queue
|
| 55 |
+
self.queue_lr[0:b, :, :, :] = self.lq.clone()
|
| 56 |
+
self.queue_gt[0:b, :, :, :] = self.gt.clone()
|
| 57 |
+
|
| 58 |
+
self.lq = lq_dequeue
|
| 59 |
+
self.gt = gt_dequeue
|
| 60 |
+
else:
|
| 61 |
+
# only do enqueue
|
| 62 |
+
self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
|
| 63 |
+
self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
|
| 64 |
+
self.queue_ptr = self.queue_ptr + b
|
| 65 |
+
|
| 66 |
+
@torch.no_grad()
|
| 67 |
+
def feed_data(self, data):
|
| 68 |
+
"""Accept data from dataloader, and then add two-order degradations to obtain LQ images.
|
| 69 |
+
"""
|
| 70 |
+
if self.is_train and self.opt.get('high_order_degradation', True):
|
| 71 |
+
# training data synthesis
|
| 72 |
+
self.gt = data['gt'].to(self.device)
|
| 73 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
| 74 |
+
|
| 75 |
+
self.kernel1 = data['kernel1'].to(self.device)
|
| 76 |
+
self.kernel2 = data['kernel2'].to(self.device)
|
| 77 |
+
self.sinc_kernel = data['sinc_kernel'].to(self.device)
|
| 78 |
+
|
| 79 |
+
ori_h, ori_w = self.gt.size()[2:4]
|
| 80 |
+
|
| 81 |
+
# ----------------------- The first degradation process ----------------------- #
|
| 82 |
+
# blur
|
| 83 |
+
out = filter2D(self.gt_usm, self.kernel1)
|
| 84 |
+
# random resize
|
| 85 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
|
| 86 |
+
if updown_type == 'up':
|
| 87 |
+
scale = np.random.uniform(1, self.opt['resize_range'][1])
|
| 88 |
+
elif updown_type == 'down':
|
| 89 |
+
scale = np.random.uniform(self.opt['resize_range'][0], 1)
|
| 90 |
+
else:
|
| 91 |
+
scale = 1
|
| 92 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 93 |
+
out = F.interpolate(out, scale_factor=scale, mode=mode)
|
| 94 |
+
# add noise
|
| 95 |
+
gray_noise_prob = self.opt['gray_noise_prob']
|
| 96 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob']:
|
| 97 |
+
out = random_add_gaussian_noise_pt(
|
| 98 |
+
out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
| 99 |
+
else:
|
| 100 |
+
out = random_add_poisson_noise_pt(
|
| 101 |
+
out,
|
| 102 |
+
scale_range=self.opt['poisson_scale_range'],
|
| 103 |
+
gray_prob=gray_noise_prob,
|
| 104 |
+
clip=True,
|
| 105 |
+
rounds=False)
|
| 106 |
+
# JPEG compression
|
| 107 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
|
| 108 |
+
out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
|
| 109 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 110 |
+
|
| 111 |
+
# ----------------------- The second degradation process ----------------------- #
|
| 112 |
+
# blur
|
| 113 |
+
if np.random.uniform() < self.opt['second_blur_prob']:
|
| 114 |
+
out = filter2D(out, self.kernel2)
|
| 115 |
+
# random resize
|
| 116 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
|
| 117 |
+
if updown_type == 'up':
|
| 118 |
+
scale = np.random.uniform(1, self.opt['resize_range2'][1])
|
| 119 |
+
elif updown_type == 'down':
|
| 120 |
+
scale = np.random.uniform(self.opt['resize_range2'][0], 1)
|
| 121 |
+
else:
|
| 122 |
+
scale = 1
|
| 123 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 124 |
+
out = F.interpolate(
|
| 125 |
+
out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
|
| 126 |
+
# add noise
|
| 127 |
+
gray_noise_prob = self.opt['gray_noise_prob2']
|
| 128 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob2']:
|
| 129 |
+
out = random_add_gaussian_noise_pt(
|
| 130 |
+
out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
| 131 |
+
else:
|
| 132 |
+
out = random_add_poisson_noise_pt(
|
| 133 |
+
out,
|
| 134 |
+
scale_range=self.opt['poisson_scale_range2'],
|
| 135 |
+
gray_prob=gray_noise_prob,
|
| 136 |
+
clip=True,
|
| 137 |
+
rounds=False)
|
| 138 |
+
|
| 139 |
+
# JPEG compression + the final sinc filter
|
| 140 |
+
# We also need to resize images to desired sizes. We group [resize back + sinc filter] together
|
| 141 |
+
# as one operation.
|
| 142 |
+
# We consider two orders:
|
| 143 |
+
# 1. [resize back + sinc filter] + JPEG compression
|
| 144 |
+
# 2. JPEG compression + [resize back + sinc filter]
|
| 145 |
+
# Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
|
| 146 |
+
if np.random.uniform() < 0.5:
|
| 147 |
+
# resize back + the final sinc filter
|
| 148 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 149 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
| 150 |
+
out = filter2D(out, self.sinc_kernel)
|
| 151 |
+
# JPEG compression
|
| 152 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
| 153 |
+
out = torch.clamp(out, 0, 1)
|
| 154 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 155 |
+
else:
|
| 156 |
+
# JPEG compression
|
| 157 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
| 158 |
+
out = torch.clamp(out, 0, 1)
|
| 159 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 160 |
+
# resize back + the final sinc filter
|
| 161 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 162 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
| 163 |
+
out = filter2D(out, self.sinc_kernel)
|
| 164 |
+
|
| 165 |
+
# clamp and round
|
| 166 |
+
self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
|
| 167 |
+
|
| 168 |
+
# random crop
|
| 169 |
+
gt_size = self.opt['gt_size']
|
| 170 |
+
(self.gt, self.gt_usm), self.lq = paired_random_crop([self.gt, self.gt_usm], self.lq, gt_size,
|
| 171 |
+
self.opt['scale'])
|
| 172 |
+
|
| 173 |
+
# training pair pool
|
| 174 |
+
self._dequeue_and_enqueue()
|
| 175 |
+
# sharpen self.gt again, as we have changed the self.gt with self._dequeue_and_enqueue
|
| 176 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
| 177 |
+
self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract
|
| 178 |
+
else:
|
| 179 |
+
# for paired training or validation
|
| 180 |
+
self.lq = data['lq'].to(self.device)
|
| 181 |
+
if 'gt' in data:
|
| 182 |
+
self.gt = data['gt'].to(self.device)
|
| 183 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
| 184 |
+
|
| 185 |
+
def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
|
| 186 |
+
# do not use the synthetic process during validation
|
| 187 |
+
self.is_train = False
|
| 188 |
+
super(RealESRGANModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
|
| 189 |
+
self.is_train = True
|
| 190 |
+
|
| 191 |
+
def optimize_parameters(self, current_iter):
|
| 192 |
+
# usm sharpening
|
| 193 |
+
l1_gt = self.gt_usm
|
| 194 |
+
percep_gt = self.gt_usm
|
| 195 |
+
gan_gt = self.gt_usm
|
| 196 |
+
if self.opt['l1_gt_usm'] is False:
|
| 197 |
+
l1_gt = self.gt
|
| 198 |
+
if self.opt['percep_gt_usm'] is False:
|
| 199 |
+
percep_gt = self.gt
|
| 200 |
+
if self.opt['gan_gt_usm'] is False:
|
| 201 |
+
gan_gt = self.gt
|
| 202 |
+
|
| 203 |
+
# optimize net_g
|
| 204 |
+
for p in self.net_d.parameters():
|
| 205 |
+
p.requires_grad = False
|
| 206 |
+
|
| 207 |
+
self.optimizer_g.zero_grad()
|
| 208 |
+
self.output = self.net_g(self.lq)
|
| 209 |
+
|
| 210 |
+
l_g_total = 0
|
| 211 |
+
loss_dict = OrderedDict()
|
| 212 |
+
if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters):
|
| 213 |
+
# pixel loss
|
| 214 |
+
if self.cri_pix:
|
| 215 |
+
l_g_pix = self.cri_pix(self.output, l1_gt)
|
| 216 |
+
l_g_total += l_g_pix
|
| 217 |
+
loss_dict['l_g_pix'] = l_g_pix
|
| 218 |
+
# perceptual loss
|
| 219 |
+
if self.cri_perceptual:
|
| 220 |
+
l_g_percep, l_g_style = self.cri_perceptual(self.output, percep_gt)
|
| 221 |
+
if l_g_percep is not None:
|
| 222 |
+
l_g_total += l_g_percep
|
| 223 |
+
loss_dict['l_g_percep'] = l_g_percep
|
| 224 |
+
if l_g_style is not None:
|
| 225 |
+
l_g_total += l_g_style
|
| 226 |
+
loss_dict['l_g_style'] = l_g_style
|
| 227 |
+
# gan loss
|
| 228 |
+
fake_g_pred = self.net_d(self.output)
|
| 229 |
+
l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False)
|
| 230 |
+
l_g_total += l_g_gan
|
| 231 |
+
loss_dict['l_g_gan'] = l_g_gan
|
| 232 |
+
|
| 233 |
+
l_g_total.backward()
|
| 234 |
+
self.optimizer_g.step()
|
| 235 |
+
|
| 236 |
+
# optimize net_d
|
| 237 |
+
for p in self.net_d.parameters():
|
| 238 |
+
p.requires_grad = True
|
| 239 |
+
|
| 240 |
+
self.optimizer_d.zero_grad()
|
| 241 |
+
# real
|
| 242 |
+
real_d_pred = self.net_d(gan_gt)
|
| 243 |
+
l_d_real = self.cri_gan(real_d_pred, True, is_disc=True)
|
| 244 |
+
loss_dict['l_d_real'] = l_d_real
|
| 245 |
+
loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
|
| 246 |
+
l_d_real.backward()
|
| 247 |
+
# fake
|
| 248 |
+
fake_d_pred = self.net_d(self.output.detach().clone()) # clone for pt1.9
|
| 249 |
+
l_d_fake = self.cri_gan(fake_d_pred, False, is_disc=True)
|
| 250 |
+
loss_dict['l_d_fake'] = l_d_fake
|
| 251 |
+
loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
|
| 252 |
+
l_d_fake.backward()
|
| 253 |
+
self.optimizer_d.step()
|
| 254 |
+
|
| 255 |
+
if self.ema_decay > 0:
|
| 256 |
+
self.model_ema(decay=self.ema_decay)
|
| 257 |
+
|
| 258 |
+
self.log_dict = self.reduce_loss_dict(loss_dict)
|
Real-ESRGAN/realesrgan/models/realesrnet_model.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import random
|
| 3 |
+
import torch
|
| 4 |
+
from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
|
| 5 |
+
from basicsr.data.transforms import paired_random_crop
|
| 6 |
+
from basicsr.models.sr_model import SRModel
|
| 7 |
+
from basicsr.utils import DiffJPEG, USMSharp
|
| 8 |
+
from basicsr.utils.img_process_util import filter2D
|
| 9 |
+
from basicsr.utils.registry import MODEL_REGISTRY
|
| 10 |
+
from torch.nn import functional as F
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@MODEL_REGISTRY.register()
|
| 14 |
+
class RealESRNetModel(SRModel):
|
| 15 |
+
"""RealESRNet Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
|
| 16 |
+
|
| 17 |
+
It is trained without GAN losses.
|
| 18 |
+
It mainly performs:
|
| 19 |
+
1. randomly synthesize LQ images in GPU tensors
|
| 20 |
+
2. optimize the networks with GAN training.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, opt):
|
| 24 |
+
super(RealESRNetModel, self).__init__(opt)
|
| 25 |
+
self.jpeger = DiffJPEG(differentiable=False).cuda() # simulate JPEG compression artifacts
|
| 26 |
+
self.usm_sharpener = USMSharp().cuda() # do usm sharpening
|
| 27 |
+
self.queue_size = opt.get('queue_size', 180)
|
| 28 |
+
|
| 29 |
+
@torch.no_grad()
|
| 30 |
+
def _dequeue_and_enqueue(self):
|
| 31 |
+
"""It is the training pair pool for increasing the diversity in a batch.
|
| 32 |
+
|
| 33 |
+
Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
|
| 34 |
+
batch could not have different resize scaling factors. Therefore, we employ this training pair pool
|
| 35 |
+
to increase the degradation diversity in a batch.
|
| 36 |
+
"""
|
| 37 |
+
# initialize
|
| 38 |
+
b, c, h, w = self.lq.size()
|
| 39 |
+
if not hasattr(self, 'queue_lr'):
|
| 40 |
+
assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
|
| 41 |
+
self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
|
| 42 |
+
_, c, h, w = self.gt.size()
|
| 43 |
+
self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
|
| 44 |
+
self.queue_ptr = 0
|
| 45 |
+
if self.queue_ptr == self.queue_size: # the pool is full
|
| 46 |
+
# do dequeue and enqueue
|
| 47 |
+
# shuffle
|
| 48 |
+
idx = torch.randperm(self.queue_size)
|
| 49 |
+
self.queue_lr = self.queue_lr[idx]
|
| 50 |
+
self.queue_gt = self.queue_gt[idx]
|
| 51 |
+
# get first b samples
|
| 52 |
+
lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
|
| 53 |
+
gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
|
| 54 |
+
# update the queue
|
| 55 |
+
self.queue_lr[0:b, :, :, :] = self.lq.clone()
|
| 56 |
+
self.queue_gt[0:b, :, :, :] = self.gt.clone()
|
| 57 |
+
|
| 58 |
+
self.lq = lq_dequeue
|
| 59 |
+
self.gt = gt_dequeue
|
| 60 |
+
else:
|
| 61 |
+
# only do enqueue
|
| 62 |
+
self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
|
| 63 |
+
self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
|
| 64 |
+
self.queue_ptr = self.queue_ptr + b
|
| 65 |
+
|
| 66 |
+
@torch.no_grad()
|
| 67 |
+
def feed_data(self, data):
|
| 68 |
+
"""Accept data from dataloader, and then add two-order degradations to obtain LQ images.
|
| 69 |
+
"""
|
| 70 |
+
if self.is_train and self.opt.get('high_order_degradation', True):
|
| 71 |
+
# training data synthesis
|
| 72 |
+
self.gt = data['gt'].to(self.device)
|
| 73 |
+
# USM sharpen the GT images
|
| 74 |
+
if self.opt['gt_usm'] is True:
|
| 75 |
+
self.gt = self.usm_sharpener(self.gt)
|
| 76 |
+
|
| 77 |
+
self.kernel1 = data['kernel1'].to(self.device)
|
| 78 |
+
self.kernel2 = data['kernel2'].to(self.device)
|
| 79 |
+
self.sinc_kernel = data['sinc_kernel'].to(self.device)
|
| 80 |
+
|
| 81 |
+
ori_h, ori_w = self.gt.size()[2:4]
|
| 82 |
+
|
| 83 |
+
# ----------------------- The first degradation process ----------------------- #
|
| 84 |
+
# blur
|
| 85 |
+
out = filter2D(self.gt, self.kernel1)
|
| 86 |
+
# random resize
|
| 87 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
|
| 88 |
+
if updown_type == 'up':
|
| 89 |
+
scale = np.random.uniform(1, self.opt['resize_range'][1])
|
| 90 |
+
elif updown_type == 'down':
|
| 91 |
+
scale = np.random.uniform(self.opt['resize_range'][0], 1)
|
| 92 |
+
else:
|
| 93 |
+
scale = 1
|
| 94 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 95 |
+
out = F.interpolate(out, scale_factor=scale, mode=mode)
|
| 96 |
+
# add noise
|
| 97 |
+
gray_noise_prob = self.opt['gray_noise_prob']
|
| 98 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob']:
|
| 99 |
+
out = random_add_gaussian_noise_pt(
|
| 100 |
+
out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
| 101 |
+
else:
|
| 102 |
+
out = random_add_poisson_noise_pt(
|
| 103 |
+
out,
|
| 104 |
+
scale_range=self.opt['poisson_scale_range'],
|
| 105 |
+
gray_prob=gray_noise_prob,
|
| 106 |
+
clip=True,
|
| 107 |
+
rounds=False)
|
| 108 |
+
# JPEG compression
|
| 109 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
|
| 110 |
+
out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
|
| 111 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 112 |
+
|
| 113 |
+
# ----------------------- The second degradation process ----------------------- #
|
| 114 |
+
# blur
|
| 115 |
+
if np.random.uniform() < self.opt['second_blur_prob']:
|
| 116 |
+
out = filter2D(out, self.kernel2)
|
| 117 |
+
# random resize
|
| 118 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
|
| 119 |
+
if updown_type == 'up':
|
| 120 |
+
scale = np.random.uniform(1, self.opt['resize_range2'][1])
|
| 121 |
+
elif updown_type == 'down':
|
| 122 |
+
scale = np.random.uniform(self.opt['resize_range2'][0], 1)
|
| 123 |
+
else:
|
| 124 |
+
scale = 1
|
| 125 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 126 |
+
out = F.interpolate(
|
| 127 |
+
out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
|
| 128 |
+
# add noise
|
| 129 |
+
gray_noise_prob = self.opt['gray_noise_prob2']
|
| 130 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob2']:
|
| 131 |
+
out = random_add_gaussian_noise_pt(
|
| 132 |
+
out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
| 133 |
+
else:
|
| 134 |
+
out = random_add_poisson_noise_pt(
|
| 135 |
+
out,
|
| 136 |
+
scale_range=self.opt['poisson_scale_range2'],
|
| 137 |
+
gray_prob=gray_noise_prob,
|
| 138 |
+
clip=True,
|
| 139 |
+
rounds=False)
|
| 140 |
+
|
| 141 |
+
# JPEG compression + the final sinc filter
|
| 142 |
+
# We also need to resize images to desired sizes. We group [resize back + sinc filter] together
|
| 143 |
+
# as one operation.
|
| 144 |
+
# We consider two orders:
|
| 145 |
+
# 1. [resize back + sinc filter] + JPEG compression
|
| 146 |
+
# 2. JPEG compression + [resize back + sinc filter]
|
| 147 |
+
# Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
|
| 148 |
+
if np.random.uniform() < 0.5:
|
| 149 |
+
# resize back + the final sinc filter
|
| 150 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 151 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
| 152 |
+
out = filter2D(out, self.sinc_kernel)
|
| 153 |
+
# JPEG compression
|
| 154 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
| 155 |
+
out = torch.clamp(out, 0, 1)
|
| 156 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 157 |
+
else:
|
| 158 |
+
# JPEG compression
|
| 159 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
| 160 |
+
out = torch.clamp(out, 0, 1)
|
| 161 |
+
out = self.jpeger(out, quality=jpeg_p)
|
| 162 |
+
# resize back + the final sinc filter
|
| 163 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
| 164 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
| 165 |
+
out = filter2D(out, self.sinc_kernel)
|
| 166 |
+
|
| 167 |
+
# clamp and round
|
| 168 |
+
self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
|
| 169 |
+
|
| 170 |
+
# random crop
|
| 171 |
+
gt_size = self.opt['gt_size']
|
| 172 |
+
self.gt, self.lq = paired_random_crop(self.gt, self.lq, gt_size, self.opt['scale'])
|
| 173 |
+
|
| 174 |
+
# training pair pool
|
| 175 |
+
self._dequeue_and_enqueue()
|
| 176 |
+
self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract
|
| 177 |
+
else:
|
| 178 |
+
# for paired training or validation
|
| 179 |
+
self.lq = data['lq'].to(self.device)
|
| 180 |
+
if 'gt' in data:
|
| 181 |
+
self.gt = data['gt'].to(self.device)
|
| 182 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
| 183 |
+
|
| 184 |
+
def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
|
| 185 |
+
# do not use the synthetic process during validation
|
| 186 |
+
self.is_train = False
|
| 187 |
+
super(RealESRNetModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
|
| 188 |
+
self.is_train = True
|