umzi commited on Feb 11

Commit

4f763cc

verified ·

1 Parent(s): ea4a206

Upload folder using huggingface_hub

Browse files

Files changed (20) hide show

.gitattributes +1 -0
.gitignore +207 -0
.idea/.gitignore +5 -0
.idea/FIGSR3.iml +8 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
.idea/workspace.xml +55 -0
README.md +109 -3
configs/basicsr.yml +94 -0
configs/neosr.toml +78 -0
configs/trainner-redux.yml +140 -0
figs/FIDSR.png +3 -0
figs/gdb_and_FU.png +0 -0
figsr_arch.py +769 -0
inference.py +80 -0
weights/4x_FIGSR.onnx +3 -0
weights/4x_FIGSR.pth +3 -0
weights/4x_FIGSR.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+figs/FIDSR.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,207 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/

.idea/FIGSR3.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.13 virtualenv at /run/media/umzi/H/resseltrr/.venv" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.13 virtualenv at /run/media/umzi/H/resseltrr/.venv" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 virtualenv at /run/media/umzi/H/resseltrr/.venv" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/FIGSR3.iml" filepath="$PROJECT_DIR$/.idea/FIGSR3.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,55 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="bd19aad7-30f1-41f2-8970-657e29732baf" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="ProjectColorInfo"><![CDATA[{
+  "associatedIndex": 2
+}]]></component>
+  <component name="ProjectId" id="39WMPB6Zm6p9HESSvY2vaN0KQUO" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "RunOnceActivity.git.unshallow": "true",
+    "git-widget-placeholder": "main",
+    "last_opened_file_path": "/run/media/umzi/H/FIGSR3/weights"
+  }
+}]]></component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/weights" />
+      <recent name="$PROJECT_DIR$" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/weights" />
+    </key>
+  </component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-python-sdk-164cda30dcd9-0af03a5fa574-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-252.26830.99" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="bd19aad7-30f1-41f2-8970-657e29732baf" name="Changes" comment="" />
+      <created>1770807346896</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1770807346896</updated>
+    </task>
+    <servers />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,3 +1,109 @@
----
-license: mit
----

+# Fourier Inception Gated Super Resolution
+The main idea of the model is to integrate the [FourierUnit](https://github.com/deng-ai-lab/SFHformer/blob/1f7994112b9ced9153edc7187e320e0383a9dfd3/models/SFHformer.py#L143) into the [GatedCNN](https://github.com/yuweihao/MambaOut/blob/main/models/mambaout.py#L119) pipeline in order to strengthen the model’s global perception with minimal computational overhead.
+The FourierUnit adds feature processing in the frequency domain, expanding the effective receptive field, while the GatedCNN provides efficient local modeling and control of information flow through a gating mechanism. Their combination allows merging global context and computational efficiency within a compact SISR architecture.
+---
+# TODO:
++ [ ] Fix trt inference
+---
+## Showcase:
+[show pics](https://slow.pics/s/fPvcS3P0?image-fit=contain)
+[gdrive](https://drive.google.com/drive/u/1/folders/1ofJo5CCgrOtLdVm9psmlJv15Z3aP4Aiz)
+---
+## Model structure:
+### figsr
+<img src="figs/figsr.png" width="600"/>
+### GDB FU
+<img src="figs/gdb_and_FU.png" width="600"/>
+---
+### Main blocks and their changes relative to the originals:
+* [GatedCNN](https://github.com/yuweihao/MambaOut/blob/main/models/mambaout.py#L119) — borrowed from the [MambaOut](https://github.com/yuweihao/MambaOut/blob/main/models/mambaout.py#L119) repository with the following changes:
+  * `Linear` replaced with `Conv` to avoid unnecessary `permute` operations;
+  * one of the linear layers replaced with a `Conv 3×3`, which improves quality without a significant increase in computational cost;
+  * `LayerNorm` replaced with `RMSNorm` for speed and greater stability;
+  * `DConv` replaced with `InceptionConv`.
+* [InceptionConv](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L627) — a modified version of the block from [InceptionNeXt](https://github.com/sail-sg/inceptionnext/blob/main/models/inceptionnext.py#L19):
+  * `DConv` replaced with standard convolutions;
+  * kernel sizes increased following the findings of [PLKSR](https://github.com/dslisleedh/PLKSR);
+  * the shortcut replaced with `FourierUnit`, which improves convergence because a residual connection is already present inside `GatedCNN`.
+* [FourierUnit](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L585) — a modified version of the block from [SFHformer](https://github.com/deng-ai-lab/SFHformer/blob/1f7994112b9ced9153edc7187e320e0383a9dfd3/models/SFHformer.py#L143):
+  * `BatchNorm` replaced with `RMSNorm`, which works better with the small batch sizes typical for SISR;
+  * structural changes made for correct export to ONNX;
+  * post-normalization added, since without it training instability and `NaN` values were observed in the context of `GatedCNN`.
+---
+## Metrics:
+* Metrics were computed using [PyIQA](https://github.com/chaofengc/IQA-PyTorch/tree/main), except for those starting with “bs”, which were calculated using BasicSR.
+### [Esrgan DF2K](https://drive.google.com/file/d/1mSJ6Z40weL-dnPvi390xDd3uZBCFMeqr/view?usp=sharing):
+| Dataset       | SSIM-Y | PSNR-Y | TOPIQ  | bs_ssim_y | bs_psnr_y |
+| ------------- | ------ | ------ | ------ | --------- | --------- |
+| BHI100        | 0.7150 | 22.84  | 0.5694 | 0.7279    | 24.1636   |
+| psisrd_val125 | 0.7881 | 27.01  | 0.6043 | 0.8034    | 28.3273   |
+| set14         | 0.7730 | 27.67  | 0.6905 | 0.7915    | 28.9969   |
+| urban100      | 0.8025 | 25.71  | 0.6701 | 0.8152    | 27.0282   |
+### [FIGSR BHI](https://huggingface.co/enhancr-dev/figsr/blob/main/weight/v1.0.0):
+| Dataset       | SSIM-Y | PSNR-Y | TOPIQ  | bs_ssim_y | bs_psnr_y |
+| ------------- | ------ | ------ | ------ | --------- | --------- |
+| BHI100        | 0.7196 | 22.83  | 0.5723 | 0.7327    | 24.1549   |
+| psisrd_val125 | 0.7911 | 26.97  | 0.6095 | 0.8065    | 28.2946   |
+| set14         | 0.7769 | 27.70  | 0.7036 | 0.7952    | 29.0221   |
+| urban100      | 0.8056 | 25.80  | 0.6725 | 0.8185    | 27.1170   |
+---
+## Performance 3060 12gb:
+| Model  | input_size | params ↓ | avg_inference ↓ | fps ↑              | memory_use ↓ |
+|--------| ---------- | -------- |-----------------| ------------------ | ------------ |
+| ESRGAN | 1024x1024  | ~16.6m   | ~2.8s           | 0.3483220866736526 | 8.29GB       |
+| FIGSR  | 1024x1024  | ~4.4m    | ~1.64s          | 0.6081749253740837 | 2.26GB       |
+## Training
+To train, choose one of the frameworks and place the model file in the `archs` folder:
+* **[NeoSR](https://github.com/neosr-project/neosr)** — `figsr_arch.py` → `neosr/archs/figsr_arch.py`. [Config](configs/neosr.toml)
+  * Uncomment lines [14–17](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L14-L17), [694](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L694) and [705](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L705).
+  * Comment out line [703](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L703).
+* **[traiNNer-redux](https://github.com/the-database/traiNNer-redux)** — `figsr_arch.py` → `traiNNer/archs/figsr_arch.py`. [Config](configs/trainner-redux.yml)
+  * Uncomment lines [11](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L11) and [694](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L694).
+* **[BasicSR](https://github.com/XPixelGroup/BasicSR/tree/master/basicsr/archs)** — `figsr_arch.py` → `basicsr/archs/figsr_arch.py`. [Config](configs/basicsr.yml)
+  * Uncomment lines [19](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L19) and [694](https://huggingface.co/enhancr-dev/figsr/blob/main/figsr_arch.py#L694).
+---
+## Inference:
+### Resselt install
+```shell
+uv venv  --python=3.12
+source .venv/bin/activate
+uv pip install "resselt==1.3.1" "pepeline==1.2.3"
+```
+### main.py
+```shell
+ python main.py --input_dir urban/x4 --output_dir urban/x4_scale --weights  4x_FIGSR.safetensors
+```
+---
+## Contacts:
+[discord](https://discord.gg/xwZfWWMwBq)

configs/basicsr.yml ADDED Viewed

	@@ -0,0 +1,94 @@

+# general settings
+name: 4x_figsr
+model_type: SRModel
+scale: 4
+num_gpu: 1  # set num_gpu: 0 for cpu mode
+manual_seed: 1024
+# dataset and data loader settings
+datasets:
+  train:
+    name: BHI
+    type: PairedImageDataset
+    dataroot_gt: datasets/BHI
+    dataroot_lq: datasets/BHI_lq
+    filename_tmpl: '{}'
+    io_backend:
+      type: disk
+    gt_size: 256
+    use_hflip: true
+    use_rot: true
+    # data loader
+    num_worker_per_gpu: 6
+    batch_size_per_gpu: 64
+    dataset_enlarge_ratio: 1
+    prefetch_mode: ~
+  val:
+    name: Set5
+    type: PairedImageDataset
+    dataroot_gt: datasets/Set5/GTmod12
+    dataroot_lq: datasets/Set5/LRbicx4
+    io_backend:
+      type: disk
+# network structures
+network_g:
+  type: FIGSR
+# path
+path:
+#  pretrain_network_g: ""
+  strict_load_g: false
+  resume_state: ~
+# training settings
+train:
+  ema_decay: 0.999
+  optim_g:
+    type: Adam
+    lr: !!float 5e-4
+    weight_decay: 0
+    betas: [0.9, 0.99]
+  scheduler:
+    type: MultiStepLR
+    milestones: [200000,400000,600000,800000]
+    gamma: 0.5
+  total_iter: 1000000
+  warmup_iter: -1  # no warm up
+  # losses
+  pixel_opt:
+    type: CharbonnierLoss
+    loss_weight: 1.0
+    reduction: mean
+# validation settings
+val:
+  val_freq: !!float 5e3
+  save_img: true
+  metrics:
+    psnr: # metric name, can be arbitrary
+      type: calculate_psnr
+      crop_border: 4
+      test_y_channel: false
+# logging settings
+logger:
+  print_freq: 100
+  save_checkpoint_freq: !!float 5e3
+  use_tb_logger: true
+  wandb:
+    project: ~
+    resume_id: ~
+# dist training settings
+dist_params:
+  backend: nccl
+  port: 29500

configs/neosr.toml ADDED Viewed

	@@ -0,0 +1,78 @@

+name = "4x_figsr"
+model_type = "image"
+scale = 4
+use_amp = true
+bfloat16 = false
+fast_matmul = false
+#compile = true
+manual_seed = 1024
+[datasets.train]
+type = "paired"
+dataroot_gt = 'datasets/BHI'
+dataroot_lq = 'datasets/BHI_lq'
+patch_size = 64
+batch_size = 64
+#accumulate = 1
+[datasets.val]
+name = "val"
+type = "paired"
+dataroot_gt = 'C:\datasets\val\gt\'
+dataroot_lq = 'C:\datasets\val\lq\'
+[val]
+val_freq = 5000
+#tile = 200
+[val.metrics.psnr]
+type = "calculate_psnr"
+[val.metrics.ssim]
+type = "calculate_ssim"
+#[val.metrics.dists]
+#type = "calculate_dists"
+#better = "lower"
+#[val.metrics.topiq]
+#type = "calculate_topiq"
+[path]
+#pretrain_network_g = 'experiments\pretrain_g.pth'
+#pretrain_network_d = 'experiments\pretrain_d.pth'
+[network_g]
+type = "FIGSR"
+[train]
+grad_clip = false
+ema = 0.999
+wavelet_guided = false
+#wavelet_init = 80000
+#sam = "fsam"
+#sam_init = 1000
+#eco = true
+#eco_init = 15000
+#match_lq_colors = true
+[train.optim_g]
+type = "adamw"
+lr = 5e-4
+betas = [0.9, 0.99]
+weight_decay = 0.01
+[train.scheduler]
+type = "multisteplr"
+milestones = [200000,400000,600000,800000]
+gamma = 0.5
+#  losses
+[train.mssim_opt]
+type = "huber_loss"
+loss_weight = 1.0
+[logger]
+total_iter = 1000000
+save_checkpoint_freq = 5000
+use_tb_logger = true
+#save_tb_img = true
+#print_freq = 100

configs/trainner-redux.yml ADDED Viewed

	@@ -0,0 +1,140 @@

+# yaml-language-server: $schema=https://raw.githubusercontent.com/the-database/traiNNer-redux/refs/heads/master/schemas/redux-config.schema.json
+#########################################################################################
+# General Settings
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#top-level-options
+#########################################################################################
+name: 4x_figsr
+scale: 4  # 1, 2, 3, 4, 8
+use_amp: true  # Speed up training and reduce VRAM usage. NVIDIA only.
+amp_bf16: false  # Use bf16 instead of fp16 for AMP, RTX 3000 series or newer only. Only recommended if fp16 doesn't work.
+use_channels_last: true  # Enable channels last memory format while using AMP. Reduces VRAM and speeds up training for most architectures, but some architectures are slower with channels last.
+fast_matmul: false  # Trade precision for performance.
+use_compile: false  # Enable torch.compile for generator. Takes time on startup to compile the model, but can speed up training after the model is compiled.
+compile_mode: default  # Mode to use with torch.compile. See https://docs.pytorch.org/docs/stable/generated/torch.compile.html for more info.
+num_gpu: auto
+# manual_seed: 1024  # Random seed for training, useful for removing randomness when testing the effect of different settings.
+########################################################################################################################
+# Dataset and Dataloader Settings
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#dataset-options-datasets-train-and-datasets-val
+########################################################################################################################
+datasets:
+  # Settings for the training dataset.
+  train:
+    name: Train Dataset
+    type: pairedimagedataset
+    # Path to the HR (high res) images in your training dataset. Specify one or multiple folders, separated by commas.
+    dataroot_gt: [datasets/BHI]
+    dataroot_lq: [datasets/BHI_lq]
+    # meta_info: data/meta_info/dataset1.txt
+    lq_size: 64  # During training, a square of this size is cropped from LR images. Larger is usually better but uses more VRAM. Previously gt_size, use lq_size = gt_size / scale to convert. Use multiple of 8 for best performance with AMP.
+    use_hflip: true  # Randomly flip the images horizontally.
+    use_rot: true  # Randomly rotate the images.
+    num_worker_per_gpu: 8
+    batch_size_per_gpu: 64  # recommended: 64  # Increasing stabilizes training but with diminishing returns. Use multiple of 8 for best performance with AMP.
+    accum_iter: 1  # Using values larger than 1 simulates higher batch size by trading performance for reduced VRAM usage. If accum_iter = 4 and batch_size_per_gpu = 6 then effective batch size = 4 * 6 = 24 but performance may be as much as 4 times as slow.
+  # Settings for your validation dataset (optional). These settings will
+  # be ignored if val_enabled is false in the Validation section below.
+  val:
+    name: Val Dataset
+    type: pairedimagedataset
+    dataroot_gt: [
+      datasets/val/dataset1/hr,
+      datasets/val/dataset1/hr2,
+    ]
+    dataroot_lq: [
+      datasets/val/dataset1/lr,
+      datasets/val/dataset1/lr2
+    ]
+#####################################################################
+# Network Settings
+# https://trainner-redux.readthedocs.io/en/latest/arch_reference.html
+#####################################################################
+# Generator model settings
+network_g:
+  type: FIGSR
+#########################################################################################
+# Pretrain and Resume Paths
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#path-options-path
+#########################################################################################
+path:
+  # pretrain_network_g: experiments/pretrained_models/pretrain.pth
+  param_key_g: ~
+  strict_load_g: true    # Disable strict loading to partially load a pretrain model with a different scale
+  resume_state: ~
+###########################################################################################
+# Training Settings
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#train-options-train
+###########################################################################################
+train:
+  ema_decay: 0.999
+  ema_power: 0.75  # Gradually warm up ema decay when training from scratch
+  grad_clip: false  # Gradient clipping allows more stable training when using higher learning rates.
+  # Optimizer for generator model
+  optim_g:
+    type: AdamW
+    lr: !!float 5e-4
+    weight_decay: 0
+    betas: [0.9, 0.99]
+  scheduler:
+    type: MultiStepLR
+    milestones: [200000, 400000, 600000, 800000]
+    gamma: 0.5
+  total_iter: 1000000 # Total number of iterations.
+  warmup_iter: -1  # Gradually ramp up learning rates until this iteration, to stabilize early training. Use -1 to disable.
+  # Losses - for any loss set the loss_weight to 0 to disable it.
+  # https://trainner-redux.readthedocs.io/en/latest/loss_reference.html
+  losses:
+    # Charbonnier loss
+    - type: charbonnierloss
+      loss_weight: 1.0
+##############################################################################################
+# Validation
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#validation-options-val
+##############################################################################################
+val:
+  val_enabled: true  # Whether to enable validations. If disabled, all validation settings below are ignored.
+  val_freq: 5000  # How often to run validations, in iterations.
+  save_img: true  # Whether to save the validation images during validation, in the experiments/<name>/visualization folder.
+  tile_size: 0  # Tile size of input, reduce VRAM usage but slower inference. 0 to disable.
+  tile_overlap: 8  # Number of pixels to overlap tiles by, larger is slower but reduces tile seams.
+  metrics_enabled: true  # Whether to run metrics calculations during validation.
+  metrics:
+    psnr:
+      type: calculate_psnr
+      crop_border: 4
+      test_y_channel: true
+    ssim:
+      type: calculate_ssim
+      crop_border: 4  # Whether to crop border during validation.
+      test_y_channel: true  # Whether to convert to Y(CbCr) for validation.
+    #topiq:
+      #type: calculate_topiq
+    #lpips:
+      #type: calculate_lpips
+      #better: lower
+    #dists:
+      #type: calculate_dists
+      #better: lower
+##############################################################################################
+# Logging
+# https://trainner-redux.readthedocs.io/en/latest/config_reference.html#logging-options-logger
+##############################################################################################
+logger:
+  print_freq: 100
+  save_checkpoint_freq: 5000
+  save_checkpoint_format: safetensors
+  use_tb_logger: true

figs/FIDSR.png ADDED Viewed

Git LFS Details

SHA256: 4312b4443e8848f950a36c65521dcecaade497247c0351ef60d5335f75aecc46
Pointer size: 132 Bytes
Size of remote file: 1.5 MB

figs/gdb_and_FU.png ADDED Viewed

figsr_arch.py ADDED Viewed

	@@ -0,0 +1,769 @@

+import math
+from typing import Literal
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from torch import Tensor, nn
+# trainner-redux https://github.com/the-database/traiNNer-redux
+# from traiNNer.utils.registry import ARCH_REGISTRY
+# neosr https://github.com/neosr-project/neosr/tree/master
+# from neosr.archs.arch_util import net_opt
+# from neosr.utils.registry import ARCH_REGISTRY
+#
+# upscale, __ = net_opt()
+# basic sr https://github.com/XPixelGroup/BasicSR/tree/master
+# from basicsr.utils.registry import ARCH_REGISTRY
+SampleMods = Literal[
+    "conv",
+    "pixelshuffledirect",
+    "pixelshuffle",
+    "nearest+conv",
+    "dysample",
+    "transpose+conv",
+    "lda",
+    "pa_up",
+]
+def ICNR(tensor, initializer, upscale_factor=2, *args, **kwargs):
+    upscale_factor_squared = upscale_factor * upscale_factor
+    assert tensor.shape[0] % upscale_factor_squared == 0, (
+        "The size of the first dimension: "
+        f"tensor.shape[0] = {tensor.shape[0]}"
+        " is not divisible by square of upscale_factor: "
+        f"upscale_factor = {upscale_factor}"
+    )
+    sub_kernel = torch.empty(
+        tensor.shape[0] // upscale_factor_squared, *tensor.shape[1:]
+    )
+    sub_kernel = initializer(sub_kernel, *args, **kwargs)
+    return sub_kernel.repeat_interleave(upscale_factor_squared, dim=0)
+class DySample(nn.Module):
+    """Adapted from 'Learning to Upsample by Learning to Sample':
+    https://arxiv.org/abs/2308.15085
+    https://github.com/tiny-smart/dysample
+    """
+    def __init__(
+        self,
+        in_channels: int = 64,
+        out_ch: int = 3,
+        scale: int = 2,
+        groups: int = 4,
+        end_convolution: bool = True,
+        end_kernel=1,
+    ) -> None:
+        super().__init__()
+        if in_channels <= groups or in_channels % groups != 0:
+            msg = "Incorrect in_channels and groups values."
+            raise ValueError(msg)
+        out_channels = 2 * groups * scale**2
+        self.scale = scale
+        self.groups = groups
+        self.end_convolution = end_convolution
+        if end_convolution:
+            self.end_conv = nn.Conv2d(
+                in_channels, out_ch, end_kernel, 1, end_kernel // 2
+            )
+        self.offset = nn.Conv2d(in_channels, out_channels, 1)
+        self.scope = nn.Conv2d(in_channels, out_channels, 1, bias=False)
+        if self.training:
+            nn.init.trunc_normal_(self.offset.weight, std=0.02)
+            nn.init.constant_(self.scope.weight, val=0)
+        self.register_buffer("init_pos", self._init_pos())
+    def _init_pos(self) -> Tensor:
+        h = torch.arange((-self.scale + 1) / 2, (self.scale - 1) / 2 + 1) / self.scale
+        return (
+            torch.stack(torch.meshgrid([h, h], indexing="ij"))
+            .transpose(1, 2)
+            .repeat(1, self.groups, 1)
+            .reshape(1, -1, 1, 1)
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        offset = self.offset(x) * self.scope(x).sigmoid() * 0.5 + self.init_pos
+        B, _, H, W = offset.shape
+        offset = offset.view(B, 2, -1, H, W)
+        coords_h = torch.arange(H) + 0.5
+        coords_w = torch.arange(W) + 0.5
+        coords = (
+            torch.stack(torch.meshgrid([coords_w, coords_h], indexing="ij"))
+            .transpose(1, 2)
+            .unsqueeze(1)
+            .unsqueeze(0)
+            .type(x.dtype)
+            .to(x.device, non_blocking=True)
+        )
+        normalizer = torch.tensor(
+            [W, H], dtype=x.dtype, device=x.device, pin_memory=True
+        ).view(1, 2, 1, 1, 1)
+        coords = 2 * (coords + offset) / normalizer - 1
+        coords = (
+            F.pixel_shuffle(coords.reshape(B, -1, H, W), self.scale)
+            .view(B, 2, -1, self.scale * H, self.scale * W)
+            .permute(0, 2, 3, 4, 1)
+            .contiguous()
+            .flatten(0, 1)
+        )
+        output = F.grid_sample(
+            x.reshape(B * self.groups, -1, H, W),
+            coords,
+            mode="bilinear",
+            align_corners=False,
+            padding_mode="border",
+        ).view(B, -1, self.scale * H, self.scale * W)
+        if self.end_convolution:
+            output = self.end_conv(output)
+        return output
+class LayerNorm(nn.Module):
+    def __init__(self, dim: int = 64, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(dim))
+        self.bias = nn.Parameter(torch.zeros(dim))
+        self.eps = eps
+        self.dim = (dim,)
+    def forward(self, x):
+        if x.is_contiguous(memory_format=torch.channels_last):
+            return F.layer_norm(
+                x.permute(0, 2, 3, 1), self.dim, self.weight, self.bias, self.eps
+            ).permute(0, 3, 1, 2)
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        return self.weight[:, None, None] * x + self.bias[:, None, None]
+class LDA_AQU(nn.Module):
+    def __init__(
+        self,
+        in_channels=48,
+        reduction_factor=4,
+        nh=1,
+        scale_factor=2.0,
+        k_e=3,
+        k_u=3,
+        n_groups=2,
+        range_factor=11,
+        rpb=True,
+    ) -> None:
+        super().__init__()
+        self.k_u = k_u
+        self.num_head = nh
+        self.scale_factor = scale_factor
+        self.n_groups = n_groups
+        self.offset_range_factor = range_factor
+        self.attn_dim = in_channels // (reduction_factor * self.num_head)
+        self.scale = self.attn_dim**-0.5
+        self.rpb = rpb
+        self.hidden_dim = in_channels // reduction_factor
+        self.proj_q = nn.Conv2d(
+            in_channels, self.hidden_dim, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.proj_k = nn.Conv2d(
+            in_channels, self.hidden_dim, kernel_size=1, stride=1, padding=0, bias=False
+        )
+        self.group_channel = in_channels // (reduction_factor * self.n_groups)
+        # print(self.group_channel)
+        self.conv_offset = nn.Sequential(
+            nn.Conv2d(
+                self.group_channel,
+                self.group_channel,
+                3,
+                1,
+                1,
+                groups=self.group_channel,
+                bias=False,
+            ),
+            LayerNorm(self.group_channel),
+            nn.SiLU(),
+            nn.Conv2d(self.group_channel, 2 * k_u**2, k_e, 1, k_e // 2),
+        )
+        print(2 * k_u**2)
+        self.layer_norm = LayerNorm(in_channels)
+        self.pad = int((self.k_u - 1) / 2)
+        base = np.arange(-self.pad, self.pad + 1).astype(np.float32)
+        base_y = np.repeat(base, self.k_u)
+        base_x = np.tile(base, self.k_u)
+        base_offset = np.stack([base_y, base_x], axis=1).flatten()
+        base_offset = torch.tensor(base_offset).view(1, -1, 1, 1)
+        self.register_buffer("base_offset", base_offset, persistent=False)
+        if self.rpb:
+            self.relative_position_bias_table = nn.Parameter(
+                torch.zeros(
+                    1, self.num_head, 1, self.k_u**2, self.hidden_dim // self.num_head
+                )
+            )
+            nn.init.trunc_normal_(self.relative_position_bias_table, std=0.02)
+    def init_weights(self) -> None:
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_uniform(m)
+            elif isinstance(m, nn.LayerNorm):
+                nn.init.constant_(m.bias, 0)
+                nn.init.constant_(m.weight, 1.0)
+        nn.init.constant_(self.conv_offset[-1].weight, 0)
+        nn.init.constant_(self.conv_offset[-1].bias, 0)
+    def get_offset(self, offset, Hout, Wout):
+        B, _, _, _ = offset.shape
+        device = offset.device
+        row_indices = torch.arange(Hout, device=device)
+        col_indices = torch.arange(Wout, device=device)
+        row_indices, col_indices = torch.meshgrid(row_indices, col_indices)
+        index_tensor = torch.stack((row_indices, col_indices), dim=-1).view(
+            1, Hout, Wout, 2
+        )
+        offset = rearrange(
+            offset, "b (kh kw d) h w -> b kh h kw w d", kh=self.k_u, kw=self.k_u
+        )
+        offset = offset + index_tensor.view(1, 1, Hout, 1, Wout, 2)
+        offset = offset.contiguous().view(B, self.k_u * Hout, self.k_u * Wout, 2)
+        offset[..., 0] = 2 * offset[..., 0] / (Hout - 1) - 1
+        offset[..., 1] = 2 * offset[..., 1] / (Wout - 1) - 1
+        offset = offset.flip(-1)
+        return offset
+    def extract_feats(self, x, offset, ks=3):
+        out = nn.functional.grid_sample(
+            x, offset, mode="bilinear", padding_mode="zeros", align_corners=True
+        )
+        out = rearrange(out, "b c (ksh h) (ksw w) -> b (ksh ksw) c h w", ksh=ks, ksw=ks)
+        return out
+    def forward(self, x):
+        B, C, H, W = x.shape
+        out_H, out_W = int(H * self.scale_factor), int(W * self.scale_factor)
+        v = x
+        x = self.layer_norm(x)
+        q = self.proj_q(x)
+        k = self.proj_k(x)
+        q = torch.nn.functional.interpolate(
+            q, (out_H, out_W), mode="bilinear", align_corners=True
+        )
+        q_off = q.view(B * self.n_groups, -1, out_H, out_W)
+        pred_offset = self.conv_offset(q_off)
+        offset = pred_offset.tanh().mul(self.offset_range_factor) + self.base_offset.to(
+            x.dtype
+        )
+        k = k.view(B * self.n_groups, self.hidden_dim // self.n_groups, H, W)
+        v = v.view(B * self.n_groups, C // self.n_groups, H, W)
+        offset = self.get_offset(offset, out_H, out_W)
+        k = self.extract_feats(k, offset=offset)
+        v = self.extract_feats(v, offset=offset)
+        q = rearrange(q, "b (nh c) h w -> b nh (h w) () c", nh=self.num_head)
+        k = rearrange(k, "(b g) n c h w -> b (h w) n (g c)", g=self.n_groups)
+        v = rearrange(v, "(b g) n c h w -> b (h w) n (g c)", g=self.n_groups)
+        k = rearrange(k, "b n1 n (nh c) -> b nh n1 n c", nh=self.num_head)
+        v = rearrange(v, "b n1 n (nh c) -> b nh n1 n c", nh=self.num_head)
+        if self.rpb:
+            k = k + self.relative_position_bias_table
+        q = q * self.scale
+        attn = q @ k.transpose(-1, -2)
+        attn = attn.softmax(dim=-1)
+        out = attn @ v
+        out = rearrange(out, "b nh (h w) t c -> b (nh c) (t h) w", h=out_H)
+        return out
+class PA(nn.Module):
+    def __init__(self, dim) -> None:
+        super().__init__()
+        self.conv = nn.Sequential(nn.Conv2d(dim, dim, 1), nn.Sigmoid())
+    def forward(self, x):
+        return x.mul(self.conv(x))
+class UniUpsampleV3(nn.Sequential):
+    def __init__(
+        self,
+        upsample: SampleMods = "pa_up",
+        scale: int = 2,
+        in_dim: int = 48,
+        out_dim: int = 3,
+        mid_dim: int = 48,
+        group: int = 4,  # Only DySample
+        dysample_end_kernel=1,  # needed only for compatibility with version 2
+    ) -> None:
+        m = []
+        if scale == 1 or upsample == "conv":
+            m.append(nn.Conv2d(in_dim, out_dim, 3, 1, 1))
+        elif upsample == "pixelshuffledirect":
+            m.extend(
+                [nn.Conv2d(in_dim, out_dim * scale**2, 3, 1, 1), nn.PixelShuffle(scale)]
+            )
+        elif upsample == "pixelshuffle":
+            m.extend([nn.Conv2d(in_dim, mid_dim, 3, 1, 1), nn.LeakyReLU(inplace=True)])
+            if (scale & (scale - 1)) == 0:  # scale = 2^n
+                for _ in range(int(math.log2(scale))):
+                    m.extend(
+                        [nn.Conv2d(mid_dim, 4 * mid_dim, 3, 1, 1), nn.PixelShuffle(2)]
+                    )
+            elif scale == 3:
+                m.extend([nn.Conv2d(mid_dim, 9 * mid_dim, 3, 1, 1), nn.PixelShuffle(3)])
+            else:
+                raise ValueError(
+                    f"scale {scale} is not supported. Supported scales: 2^n and 3."
+                )
+            m.append(nn.Conv2d(mid_dim, out_dim, 3, 1, 1))
+        elif upsample == "nearest+conv":
+            if (scale & (scale - 1)) == 0:
+                for _ in range(int(math.log2(scale))):
+                    m.extend(
+                        (
+                            nn.Conv2d(in_dim, in_dim, 3, 1, 1),
+                            nn.Upsample(scale_factor=2),
+                            nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                        )
+                    )
+                m.extend(
+                    (
+                        nn.Conv2d(in_dim, in_dim, 3, 1, 1),
+                        nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                    )
+                )
+            elif scale == 3:
+                m.extend(
+                    (
+                        nn.Conv2d(in_dim, in_dim, 3, 1, 1),
+                        nn.Upsample(scale_factor=scale),
+                        nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                        nn.Conv2d(in_dim, in_dim, 3, 1, 1),
+                        nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                    )
+                )
+            else:
+                raise ValueError(
+                    f"scale {scale} is not supported. Supported scales: 2^n and 3."
+                )
+            m.append(nn.Conv2d(in_dim, out_dim, 3, 1, 1))
+        elif upsample == "dysample":
+            if mid_dim != in_dim:
+                m.extend(
+                    [nn.Conv2d(in_dim, mid_dim, 3, 1, 1), nn.LeakyReLU(inplace=True)]
+                )
+            m.append(
+                DySample(mid_dim, out_dim, scale, group, end_kernel=dysample_end_kernel)
+            )
+            # m.append(nn.Conv2d(mid_dim, out_dim, dysample_end_kernel, 1, dysample_end_kernel//2)) # kernel 1 causes chromatic artifacts
+        elif upsample == "transpose+conv":
+            if scale == 2:
+                m.append(nn.ConvTranspose2d(in_dim, out_dim, 4, 2, 1))
+            elif scale == 3:
+                m.append(nn.ConvTranspose2d(in_dim, out_dim, 3, 3, 0))
+            elif scale == 4:
+                m.extend(
+                    [
+                        nn.ConvTranspose2d(in_dim, in_dim, 4, 2, 1),
+                        nn.GELU(),
+                        nn.ConvTranspose2d(in_dim, out_dim, 4, 2, 1),
+                    ]
+                )
+            else:
+                raise ValueError(
+                    f"scale {scale} is not supported. Supported scales: 2, 3, 4"
+                )
+            m.append(nn.Conv2d(out_dim, out_dim, 3, 1, 1))
+        elif upsample == "lda":
+            if mid_dim != in_dim:
+                m.extend(
+                    [nn.Conv2d(in_dim, mid_dim, 3, 1, 1), nn.LeakyReLU(inplace=True)]
+                )
+            m.append(LDA_AQU(mid_dim, scale_factor=scale))
+            m.append(nn.Conv2d(mid_dim, out_dim, 3, 1, 1))
+        elif upsample == "pa_up":
+            if (scale & (scale - 1)) == 0:
+                for _ in range(int(math.log2(scale))):
+                    m.extend(
+                        [
+                            nn.Upsample(scale_factor=2),
+                            nn.Conv2d(in_dim, mid_dim, 3, 1, 1),
+                            PA(mid_dim),
+                            nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                            nn.Conv2d(mid_dim, mid_dim, 3, 1, 1),
+                            nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                        ]
+                    )
+                    in_dim = mid_dim
+            elif scale == 3:
+                m.extend(
+                    [
+                        nn.Upsample(scale_factor=3),
+                        nn.Conv2d(in_dim, mid_dim, 3, 1, 1),
+                        PA(mid_dim),
+                        nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                        nn.Conv2d(mid_dim, mid_dim, 3, 1, 1),
+                        nn.LeakyReLU(negative_slope=0.2, inplace=True),
+                    ]
+                )
+            else:
+                raise ValueError(
+                    f"scale {scale} is not supported. Supported scales: 2^n and 3."
+                )
+            m.append(nn.Conv2d(mid_dim, out_dim, 3, 1, 1))
+        else:
+            raise ValueError(
+                f"An invalid Upsample was selected. Please choose one of {SampleMods}"
+            )
+        super().__init__(*m)
+        self.register_buffer(
+            "MetaUpsample",
+            torch.tensor(
+                [
+                    3,  # Block version, if you change something, please number from the end so that you can distinguish between authorized changes and third parties
+                    list(SampleMods.__args__).index(upsample),  # UpSample method index
+                    scale,
+                    in_dim,
+                    out_dim,
+                    mid_dim,
+                    group,
+                ],
+                dtype=torch.uint8,
+            ),
+        )
+class RMSNorm(nn.Module):
+    def __init__(self, dim: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.scale = nn.Parameter(torch.ones(dim))
+        self.offset = nn.Parameter(torch.zeros(dim))
+        self.eps = nn.Parameter(torch.Tensor(torch.ones(1) * eps), requires_grad=False)
+        self.rms = nn.Parameter(
+            torch.Tensor(torch.ones(1) * (dim**-0.5)), requires_grad=False
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        norm_x = torch.addcmul(self.eps, x.norm(2, dim=1, keepdim=True), self.rms)
+        return torch.addcmul(
+            self.offset[:, None, None], x.div(norm_x), self.scale[:, None, None]
+        )
+class CustomRFFT2(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x: torch.Tensor):
+        y = torch.fft.rfft2(x, dim=(2, 3), norm="ortho")
+        return torch.view_as_real(y)
+    @staticmethod
+    def symbolic(g, x: torch.Value):
+        shp = g.op("Shape", x)
+        iH = g.op("Constant", value_t=torch.tensor([2], dtype=torch.int64))
+        iW = g.op("Constant", value_t=torch.tensor([3], dtype=torch.int64))
+        nH = g.op("Gather", shp, iH, axis_i=0)
+        nW = g.op("Gather", shp, iW, axis_i=0)
+        axes_last = g.op("Constant", value_t=torch.tensor([4], dtype=torch.int64))
+        x_u = g.op("Unsqueeze", x, axes_last)
+        zero = g.op("Sub", x_u, x_u)
+        x_c = g.op("Concat", x_u, zero, axis_i=4)
+        Hf = g.op("Cast", nH, to_i=torch.onnx.TensorProtoDataType.FLOAT)
+        Wf = g.op("Cast", nW, to_i=torch.onnx.TensorProtoDataType.FLOAT)
+        y = g.op("DFT", x_c, nW, axis_i=3, onesided_i=1)
+        y = g.op("Div", y, g.op("Sqrt", Wf))
+        y = g.op("DFT", y, nH, axis_i=2, onesided_i=0)
+        y = g.op("Div", y, g.op("Sqrt", Hf))
+        return y
+class CustomIRFFT2(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x_ri: torch.Tensor):
+        x_c = torch.view_as_complex(x_ri)
+        return torch.fft.irfft2(x_c, dim=(2, 3), norm="ortho")
+    @staticmethod
+    def symbolic(g, x: torch.Value):
+        shp = g.op("Shape", x)
+        iH = g.op("Constant", value_t=torch.tensor([2], dtype=torch.int64))
+        iWr = g.op("Constant", value_t=torch.tensor([3], dtype=torch.int64))
+        nH = g.op("Gather", shp, iH, axis_i=0)
+        nWr = g.op("Gather", shp, iWr, axis_i=0)
+        one = g.op("Constant", value_t=torch.tensor(1, dtype=torch.int64))
+        two = g.op("Constant", value_t=torch.tensor(2, dtype=torch.int64))
+        nW = g.op("Mul", g.op("Sub", nWr, one), two)
+        Hf = g.op("Cast", nH, to_i=torch.onnx.TensorProtoDataType.FLOAT)
+        Wf = g.op("Cast", nW, to_i=torch.onnx.TensorProtoDataType.FLOAT)
+        yH = g.op("DFT", x, nH, axis_i=2, inverse_i=1, onesided_i=0)
+        yH = g.op("Mul", yH, g.op("Sqrt", Hf))
+        start = g.op("Sub", nWr, two)
+        start = g.op(
+            "Squeeze",
+            start,
+            g.op("Constant", value_t=torch.tensor([0], dtype=torch.int64)),
+        )
+        limit = g.op("Constant", value_t=torch.tensor(0, dtype=torch.int64))
+        step = g.op("Constant", value_t=torch.tensor(-1, dtype=torch.int64))
+        idx_r = g.op("Range", start, limit, step)
+        mirW = g.op("Gather", yH, idx_r, axis_i=3)
+        maskW = g.op("Constant", value_t=torch.tensor([1.0, -1.0], dtype=torch.float32))
+        maskW = g.op(
+            "Unsqueeze",
+            maskW,
+            g.op("Constant", value_t=torch.tensor([0, 1, 2, 3], dtype=torch.int64)),
+        )
+        mirWc = g.op("Mul", mirW, maskW)
+        x_full = g.op("Concat", yH, mirWc, axis_i=3)
+        y = g.op("DFT", x_full, nW, axis_i=3, inverse_i=1, onesided_i=0)
+        y = g.op("Mul", y, g.op("Sqrt", Wf))
+        s0 = g.op("Constant", value_t=torch.tensor([0], dtype=torch.int64))
+        s1 = g.op("Constant", value_t=torch.tensor([1], dtype=torch.int64))
+        axC = g.op("Constant", value_t=torch.tensor([4], dtype=torch.int64))
+        y = g.op("Slice", y, s0, s1, axC)
+        y = g.op("Squeeze", y, axC)
+        return y
+class CustomRfft2Wrap(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+    def forward(self, x):
+        if self.training:
+            y = torch.fft.rfft2(x, dim=(2, 3), norm="ortho")
+            return torch.view_as_real(y)
+        else:
+            return CustomRFFT2().apply(x)
+class CustomIrfft2Wrap(nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+    def forward(self, x):
+        if self.training:
+            x_c = torch.view_as_complex(x)  # [B,C,H,Wr]
+            return torch.fft.irfft2(x_c, dim=(2, 3), norm="ortho")  # [B,C,H,W]
+        else:
+            return CustomIRFFT2().apply(x)
+class FourierUnit(nn.Module):
+    def __init__(self, in_channels: int = 48, out_channels: int = 48) -> None:
+        super().__init__()
+        self.rn = RMSNorm(out_channels * 2)
+        self.post_norm = RMSNorm(out_channels)
+        self.fdc = nn.Conv2d(
+            in_channels=in_channels * 2,
+            out_channels=out_channels * 2,
+            kernel_size=1,
+            bias=True,
+        )
+        self.fpe = nn.Conv2d(
+            in_channels=in_channels * 2,
+            out_channels=in_channels * 2,
+            kernel_size=3,
+            padding=1,
+            groups=in_channels * 2,
+            bias=True,
+        )
+        self.gelu = nn.GELU()
+        self.irfft2 = CustomIrfft2Wrap()
+        self.rfft2 = CustomRfft2Wrap()
+    def forward(self, x: Tensor) -> Tensor:
+        orig_dtype = x.dtype
+        x = x.to(torch.float32)
+        b, c, h, w = x.shape
+        ffted = self.rfft2(x)
+        ffted = ffted.permute(0, 4, 1, 2, 3).contiguous()
+        ffted = ffted.view(b, c * 2, h, -1).to(orig_dtype)
+        ffted = self.rn(ffted)
+        ffted = self.fpe(ffted) + ffted
+        ffted = self.fdc(ffted)
+        ffted = self.gelu(ffted)
+        ffted = ffted.view(b, c, 2, h, -1).permute(0, 1, 3, 4, 2).contiguous().float()
+        out = self.irfft2(ffted)
+        out = self.post_norm(out.to(orig_dtype))
+        return out
+class InceptionConv2d(nn.Module):
+    """Inception convolution"""
+    def __init__(
+        self,
+        fu_dim: int = 24,
+        gc: int = 8,
+        square_kernel_size: int = 13,
+        band_kernel_size: int = 17,
+    ) -> None:
+        super().__init__()
+        self.fu = FourierUnit(fu_dim, fu_dim)
+        self.convhw = nn.Conv2d(
+            gc, gc, square_kernel_size, padding=square_kernel_size // 2
+        )
+        self.convw = nn.Conv2d(
+            gc,
+            gc,
+            kernel_size=(1, band_kernel_size),
+            padding=(0, band_kernel_size // 2),
+        )
+        self.convh = nn.Conv2d(
+            gc,
+            gc,
+            kernel_size=(band_kernel_size, 1),
+            padding=(band_kernel_size // 2, 0),
+        )
+    def forward(
+        self, x: Tensor, x_hw: Tensor, x_w: Tensor, xh: Tensor
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+        return self.fu(x), self.convhw(x_hw), self.convw(x_w), self.convh(xh)
+class GatedCNNBlock(nn.Module):
+    def __init__(
+        self,
+        dim: int = 64,
+        expansion_ratio: float = 8 / 3,
+        gc: int = 8,
+        square_kernel_size: int = 13,
+        band_kernel_size: int = 17,
+    ) -> None:
+        super().__init__()
+        hidden = int(expansion_ratio * dim) // 8 * 8
+        self.norm = RMSNorm(dim)
+        self.fc1 = nn.Conv2d(dim, hidden * 2, 3, 1, 1)
+        self.act = nn.SiLU()
+        self.split_indices = [hidden, hidden - dim, dim - gc * 3, gc, gc, gc]
+        self.conv = InceptionConv2d(
+            dim - gc * 3, gc, square_kernel_size, band_kernel_size
+        )
+        self.fc2 = nn.Conv2d(hidden, dim, 3, 1, 1)
+    def gated_forward(self, x: Tensor) -> Tensor:
+        x = self.norm(x)
+        x = self.fc1(x)
+        g, i, c, c_hw, c_w, c_h = torch.split(x, self.split_indices, dim=1)
+        c, c_hw, c_w, c_h = self.conv(c, c_hw, c_w, c_h)
+        x = self.fc2(self.act(g) * torch.cat((i, c, c_hw, c_w, c_h), dim=1))
+        return x
+    def forward(self, x: Tensor) -> Tensor:
+        return self.gated_forward(x) + x
+# @ARCH_REGISTRY.register()
+class FIGSR(nn.Module):
+    """Fourier Inception Gated Super Resolution"""
+    def __init__(
+        self,
+        in_nc: int = 3,
+        dim: int = 48,
+        expansion_ratio: float = 8 / 3,
+        scale: int = 4,
+        # neosr style:
+        # scale=upscale
+        out_nc: int = 3,
+        upsampler: SampleMods = "pixelshuffledirect",
+        mid_dim: int = 32,
+        n_blocks: int = 24,
+        gc: int = 8,
+        square_kernel_size: int = 13,
+        band_kernel_size: int = 17,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        self.in_to_dim = nn.Conv2d(in_nc, dim, 3, 1, 1)
+        self.pad = 2
+        self.gfisr_body_half = nn.Sequential(
+            *[
+                GatedCNNBlock(
+                    dim, expansion_ratio, gc, square_kernel_size, band_kernel_size
+                )
+                for _ in range(n_blocks // 2)
+            ]
+        )
+        self.gfisr_body_half_2 = nn.Sequential(
+            *[
+                GatedCNNBlock(
+                    dim, expansion_ratio, gc, square_kernel_size, band_kernel_size
+                )
+                for _ in range(n_blocks - n_blocks // 2)
+            ]
+            + [nn.Conv2d(dim, dim, 3, 1, 1)]
+        )
+        self.cat_to_dim = nn.Conv2d(dim * 3, dim, 1)
+        self.upscale = UniUpsampleV3(
+            upsampler, scale, dim, out_nc, mid_dim, dysample_end_kernel=3
+        )
+        if upsampler == "pixelshuffledirect":
+            weight = ICNR(
+                self.upscale[0].weight,
+                initializer=nn.init.kaiming_normal_,
+                upscale_factor=scale,
+            )
+            self.upscale[0].weight.data.copy_(weight)
+        self.scale = scale
+        self.shift = nn.Parameter(torch.ones(1, 3, 1, 1) * 0.5, requires_grad=True)
+        self.scale_norm = nn.Parameter(torch.ones(1, 3, 1, 1) / 6, requires_grad=True)
+    def load_state_dict(self, state_dict, strict=True, assign=True):
+        state_dict["upscale.MetaUpsample"] = self.upscale.MetaUpsample
+        return super().load_state_dict(state_dict, strict, assign)
+    def forward(self, x: Tensor) -> Tensor:
+        x = (x - self.shift) / self.scale_norm
+        _, _, H, W = x.shape
+        mod_pad_h = (self.pad - H % self.pad) % self.pad
+        mod_pad_w = (self.pad - W % self.pad) % self.pad
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), "reflect")
+        x = self.in_to_dim(x)
+        x0 = self.gfisr_body_half(x)
+        x1 = self.gfisr_body_half_2(x0)
+        x = self.cat_to_dim(torch.cat([x1, x, x0], dim=1))
+        x = self.upscale(x)[:, :, : H * self.scale, : W * self.scale]
+        return x * self.scale_norm + self.shift

inference.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import argparse
+import torch
+from resselt import load_from_file
+from pepeline import read, save, ImgColor, ImgFormat
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Batch image upscaling script"
+    )
+    parser.add_argument("--input_dir", type=str, required=True, help="Path to input images")
+    parser.add_argument("--output_dir", type=str, required=True, help="Path to save results")
+    parser.add_argument("--weights", type=str, required=True, help="Path to model weights")
+    parser.add_argument("--device", type=str, default=None, help="cuda or cpu")
+    return parser.parse_args()
+def load_model(weights_path: str, device: torch.device):
+    model = load_from_file(weights_path)
+    model = model.to(
+        device,
+        memory_format=torch.preserve_format,
+        non_blocking=True,
+    ).eval()
+    return model
+def process_image(model, img_path: str, device: torch.device):
+    img = read(img_path, ImgColor.RGB, ImgFormat.F32).transpose(2, 0, 1)
+    img = (
+        torch.tensor(img)
+        .to(
+            device,
+            memory_format=torch.preserve_format,
+            non_blocking=True,
+        )
+        .unsqueeze(0)
+    )
+    with torch.autocast(device.type, torch.float16):
+        with torch.inference_mode():
+            output = model(img)
+    output = output.permute(0, 2, 3, 1).detach().cpu().numpy()[0]
+    return output
+def main():
+    args = parse_args()
+    device = torch.device(
+        args.device if args.device else ("cuda" if torch.cuda.is_available() else "cpu")
+    )
+    os.makedirs(args.output_dir, exist_ok=True)
+    model = load_model(args.weights, device)
+    img_list = os.listdir(args.input_dir)
+    total = len(img_list)
+    for index, img_name in enumerate(img_list, start=1):
+        print(
+            f"\rProcessing {index}/{total} | {img_name}",
+            end="",
+            flush=True,
+        )
+        img_path = os.path.join(args.input_dir, img_name)
+        result = process_image(model, img_path, device)
+        save(result.copy(), os.path.join(args.output_dir, img_name))
+    print("\nDone.")
+if __name__ == "__main__":
+    main()

weights/4x_FIGSR.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86a88ec488726cb6b95642dc846cc7aeff583ed93bc5036d56ff08fd5ac9fb1f
+size 18504754

weights/4x_FIGSR.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d48ee05490c6b63f043cc4b5c7aa546b0661016a7d61fc0deec14c3019e0e5c1
+size 17913930

weights/4x_FIGSR.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98b239859ff3ca726ddc570eb0fb2c1a2f618b86c2d065a2f8cc9eee8e289c82
+size 17763135