Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.gitattributes +1 -0
README.md +94 -0
checkpoints/best.pth +3 -0
configs/depth.yaml +64 -0
configs/training.yaml +57 -0
configs/training_sol.yaml +59 -0
export_manifest.json +21 -0
logs/training_history.json +212 -0
logs/training_history_depth.json +22 -0
onnx/nott_v1.onnx +3 -0
onnx/nott_v1.onnx.data +3 -0
onnx/nott_v2.onnx +3 -0
pytorch/nott_v1.pth +3 -0
pytorch/nott_v1.safetensors +3 -0
pytorch/nott_v2.pth +3 -0
pytorch/nott_v2.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+onnx/nott_v1.onnx.data filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,94 @@

+---
+tags:
+  - robotics
+  - anima
+  - thermal-slam
+  - depth-estimation
+  - thermal-refinement
+  - uav
+  - robot-flow-labs
+library_name: pytorch
+pipeline_tag: image-to-image
+license: apache-2.0
+---
+# NÓTT — Thermal Image Refinement for Monocular ORB-SLAM3
+Part of the [ANIMA Perception Suite](https://github.com/RobotFlow-Labs) by Robot Flow Labs.
+## Paper
+**Thermal Image Refinement with Depth Estimation using Recurrent Networks for Monocular ORB-SLAM3**
+Hürkan Şahin, Huy Xuan Pham, Van Huyen Dang, Alper Yegenoglu, Erdal Kayacan
+[arXiv:2603.14998](https://arxiv.org/abs/2603.14998) (2026)
+## Architecture
+**T-RefNet** — Lightweight U-Net encoder-decoder with ConvGRU recurrent bottleneck:
+- Encoder: 3 levels (32 → 64 → 128 channels), BatchNorm + ReLU + MaxPool
+- Bottleneck: 2x ConvGRU cells for temporal coherence
+- Decoder: 3 levels with skip connections + bilinear upsampling
+- Output: Sigmoid-activated refined thermal image
+**Parameters:** 2,048,320 (~8MB)
+**Input:** Single-channel thermal (1, H, W), tested at 256x320
+## Results
+| Metric | Value | Paper Target |
+|--------|-------|-------------|
+| Val Loss (L1+SSIM) | **0.037** | — |
+| Absolute Relative Error | **0.090** | < 0.10 |
+Trained on VIVID++ dataset (71,917 thermal/depth paired frames, 24 sequences).
+## Exported Formats
+| Format | File | Size | Use Case |
+|--------|------|------|----------|
+| PyTorch (.pth) | `pytorch/nott_v2.pth` | 8.2MB | Training, fine-tuning |
+| SafeTensors | `pytorch/nott_v2.safetensors` | 8.2MB | Fast safe loading |
+| ONNX | `onnx/nott_v2.onnx` | 8.2MB | Cross-platform inference |
+| Checkpoint | `checkpoints/best.pth` | 24MB | Resume training |
+## Usage
+```python
+import torch
+from anima_nott.thermal_refinement import ThermalRefinementNet
+model = ThermalRefinementNet(in_channels=1, base_channels=32, num_levels=3, gru_layers=2)
+state = torch.load("pytorch/nott_v2.pth", weights_only=True)
+model.load_state_dict(state)
+model.eval()
+thermal = torch.randn(1, 1, 256, 320)  # normalized [0,1]
+refined, hidden = model(thermal)
+```
+## Training
+- **Dataset:** VIVID++ (FLIR Boson+ thermal, 24 sequences, bright/dark/dim/aggressive)
+- **Hardware:** NVIDIA L4 (23GB), bf16 mixed precision
+- **Optimizer:** Adam (lr=1e-3, weight_decay=1e-5)
+- **Schedule:** Cosine annealing with linear warmup
+- **Loss:** L1 + 0.1 x SSIM
+## Defense Application
+Low-cost thermal SLAM for GPS-denied, low-light UAV navigation using non-radiometric thermal cameras (~$150 FLIR Lepton 3.5). Target: <0.4m trajectory error, 25+ FPS on Jetson Xavier.
+## Citation
+```bibtex
+@article{sahin2026thermal,
+  title={Thermal Image Refinement with Depth Estimation using Recurrent Networks for Monocular ORB-SLAM3},
+  author={Sahin, Hurkan and Pham, Huy Xuan and Dang, Van Huyen and Yegenoglu, Alper and Kayacan, Erdal},
+  journal={arXiv preprint arXiv:2603.14998},
+  year={2026}
+}
+```
+## License
+Apache-2.0 — Robot Flow Labs / AIFLOW LABS LIMITED

checkpoints/best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7974cde31862f10d606e51d23292a1a5db4de5d4410ab5dbbc3d851ce5d5af1d
+size 24623721

configs/depth.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+model:
+  encoder_channels: [32, 64, 128]
+  depth_range: [0.1, 10.0]
+  predict_uncertainty: false
+training:
+  epochs: 150
+  batch_size: 32
+  num_workers: 4
+  pin_memory: true
+  seed: 42
+  mixed_precision: true
+  mode: joint
+  alpha_refine: 1.0
+  beta_depth: 0.5
+optimizer:
+  type: adam
+  lr: 1.0e-3
+  betas: [0.9, 0.999]
+  weight_decay: 1.0e-5
+  gradient_clip_norm: 1.0
+scheduler:
+  type: cosine_warmup
+  warmup_fraction: 0.05
+  min_lr: 1.0e-6
+loss:
+  lambda_l1: 1.0
+  lambda_ssim: 0.5
+  use_uncertainty: false
+checkpointing:
+  save_every: 5
+  save_best: true
+  keep_last_n: 2
+  checkpoint_dir: /mnt/artifacts-datai/checkpoints/project_nott
+early_stopping:
+  enabled: true
+  patience: 10
+  min_delta: 1.0e-4
+logging:
+  log_every: 10
+  tensorboard_dir: /mnt/artifacts-datai/tensorboard/project_nott
+  log_dir: /mnt/artifacts-datai/logs/project_nott
+transfer:
+  pretrained_checkpoint: /mnt/artifacts-datai/checkpoints/project_nott/
+  freeze_encoder: false
+  finetune_lr: 1.0e-4
+  finetune_epochs: 50
+data:
+  dataset: vivid_plus_plus
+  root: /mnt/forge-data/datasets/vivid_plus_plus
+  modality: depth
+  resolution: null  # native 256x320
+  max_depth: 10.0
+  noise_sigma: 0.03
+  augmentation: true
+  seed: 42

configs/training.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+# NÓTT Training Config — Paper-aligned (arXiv:2603.14998)
+# Loss weights from Section III-B: 0.9·L_SIlog + 0.4·L_SSIM + 0.1·L_ord + 0.1·L_sm
+training:
+  epochs: 150
+  batch_size: 220  # Peak 20.3GB/23GB (88%) — tested with bf16 forward+backward
+  num_workers: 2
+  pin_memory: true
+  seed: 42
+  mixed_precision: true  # bf16 on CUDA
+optimizer:
+  type: adam
+  lr: 5.0e-4  # Mid-range — v15 epoch 0 worked at ~3e-4, cosine will decay to 1e-6
+  betas: [0.9, 0.999]
+  weight_decay: 1.0e-5
+  gradient_clip_norm: 1.0
+scheduler:
+  type: cosine_warmup
+  warmup_fraction: 0.0  # No warmup — resume already at good weights
+  min_lr: 1.0e-6
+loss:
+  lambda_l1: 1.0
+  lambda_perceptual: 0.0
+  lambda_ssim: 0.1
+  use_perceptual: false
+checkpointing:
+  save_every: 5  # Save periodic checkpoint every 5 epochs (for resume)
+  save_best: true
+  keep_last_n: 2  # Keep top 2 best checkpoints by val_loss
+  checkpoint_dir: /mnt/artifacts-datai/checkpoints/project_nott
+early_stopping:
+  enabled: true
+  patience: 30
+  min_delta: 1.0e-4
+logging:
+  backend: console
+  project: anima-nott
+  log_every: 10
+  log_images_every: 50
+  tensorboard_dir: /mnt/artifacts-datai/tensorboard/project_nott
+  log_dir: /mnt/artifacts-datai/logs/project_nott
+data:
+  dataset: vivid_plus_plus
+  root: /mnt/forge-data/datasets/vivid_plus_plus
+  modality: refinement
+  resolution: null  # native 256x320
+  max_depth: 10.0
+  noise_sigma: 0.03
+  augmentation: true
+  seed: 42

configs/training_sol.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# NÓTT Fine-tuning Config — SOL Thermal Synthetic + VIVID++ Combined
+# Resume from VIVID++ checkpoint, fine-tune on combined dataset
+# DDP-safe: BatchNorm frozen (no running stat divergence)
+training:
+  epochs: 50           # Fine-tuning, not from scratch
+  batch_size: 220      # Per-GPU (same as proven single-GPU config)
+  num_workers: 4       # Per process
+  pin_memory: true
+  seed: 42
+  mixed_precision: true
+  freeze_bn: true      # CRITICAL: freeze BatchNorm for DDP compatibility
+optimizer:
+  type: adam
+  lr: 1.0e-4           # 10x lower than pretraining — gentle fine-tuning
+  betas: [0.9, 0.999]
+  weight_decay: 1.0e-5
+  gradient_clip_norm: 1.0
+scheduler:
+  type: cosine_warmup
+  warmup_fraction: 0.04  # ~2 epochs warmup
+  min_lr: 1.0e-7
+loss:
+  lambda_l1: 1.0
+  lambda_perceptual: 0.0
+  lambda_ssim: 0.1
+  use_perceptual: false
+checkpointing:
+  save_every: 5
+  save_best: true
+  keep_last_n: 2
+  checkpoint_dir: /mnt/artifacts-datai/checkpoints/project_nott
+early_stopping:
+  enabled: true
+  patience: 15
+  min_delta: 1.0e-4
+logging:
+  backend: console
+  project: anima-nott-sol
+  log_every: 10
+  tensorboard_dir: /mnt/artifacts-datai/tensorboard/project_nott_sol
+  log_dir: /mnt/artifacts-datai/logs/project_nott
+data:
+  dataset: combined
+  vivid_root: /mnt/forge-data/datasets/vivid_plus_plus
+  sol_root: /mnt/artifacts-datai/datasets/sol_thermal_synthetic
+  modality: refinement
+  resolution: null      # native 256x320
+  max_depth: 10.0
+  noise_sigma: 0.03
+  augmentation: true
+  seed: 42

export_manifest.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "module": "project_nott",
+  "version": "v2",
+  "paper": "arXiv:2603.14998",
+  "best_checkpoint": "v15_epoch0",
+  "val_loss": 0.0366,
+  "val_ARE": 0.0899,
+  "architecture": "T-RefNet (ConvGRU encoder-decoder)",
+  "parameters": 2048320,
+  "input_shape": [
+    1,
+    1,
+    256,
+    320
+  ],
+  "formats": [
+    "pth",
+    "safetensors",
+    "onnx"
+  ]
+}

logs/training_history.json ADDED Viewed

	@@ -0,0 +1,212 @@

+[
+  {
+    "epoch": 1,
+    "train_loss": 0.08995985405735013,
+    "val_loss": 0.11606864236733493,
+    "val_are": 0.24683485486928156,
+    "lr": 0.00055
+  },
+  {
+    "epoch": 2,
+    "train_loss": 0.12767553086183508,
+    "val_loss": 0.10721209130304701,
+    "val_are": 0.2314927643712829,
+    "lr": 0.0007750000000000001
+  },
+  {
+    "epoch": 3,
+    "train_loss": 0.12751515166593247,
+    "val_loss": 0.10882455242030761,
+    "val_are": 0.2113809901125291,
+    "lr": 0.001
+  },
+  {
+    "epoch": 4,
+    "train_loss": 0.12657773051233517,
+    "val_loss": 0.10856413468718529,
+    "val_are": 0.21922510774696574,
+    "lr": 0.0009998843667389555
+  },
+  {
+    "epoch": 5,
+    "train_loss": 0.12593891963261325,
+    "val_loss": 0.10219317510285798,
+    "val_are": 0.23196267468087814,
+    "lr": 0.0009995375204935638
+  },
+  {
+    "epoch": 6,
+    "train_loss": 0.12425504074919791,
+    "val_loss": 0.10901784069617004,
+    "val_are": 0.23595025083597967,
+    "lr": 0.0009989596218522635
+  },
+  {
+    "epoch": 7,
+    "train_loss": 0.12599383525195576,
+    "val_loss": 0.10752283779027708,
+    "val_are": 0.19789807936724493,
+    "lr": 0.0009981509383798367
+  },
+  {
+    "epoch": 8,
+    "train_loss": 0.12460020296022195,
+    "val_loss": 0.11346339686390232,
+    "val_are": 0.22122144085519455,
+    "lr": 0.000997111844493529
+  },
+  {
+    "epoch": 9,
+    "train_loss": 0.12462475721021088,
+    "val_loss": 0.11580582938211806,
+    "val_are": 0.252922221141703,
+    "lr": 0.0009958428212896954
+  },
+  {
+    "epoch": 10,
+    "train_loss": 0.1240629677717783,
+    "val_loss": 0.11680787418256788,
+    "val_are": 0.29679954051971436,
+    "lr": 0.0009943444563210542
+  },
+  {
+    "epoch": 11,
+    "train_loss": 0.1235267119798936,
+    "val_loss": 0.13542297526317484,
+    "val_are": 0.40350808641489816,
+    "lr": 0.0009926174433246525
+  },
+  {
+    "epoch": 12,
+    "train_loss": 0.12494298311419227,
+    "val_loss": 0.10566405962933512,
+    "val_are": 0.20771524529246724,
+    "lr": 0.000990662581900669
+  },
+  {
+    "epoch": 13,
+    "train_loss": 0.1240543836892462,
+    "val_loss": 0.11496848021360005,
+    "val_are": 0.2263655903584817,
+    "lr": 0.0009884807771422025
+  },
+  {
+    "epoch": 14,
+    "train_loss": 0.12463467063851097,
+    "val_loss": 0.11092703652513378,
+    "val_are": 0.21460236871943755,
+    "lr": 0.0009860730392162163
+  },
+  {
+    "epoch": 15,
+    "train_loss": 0.12369378600396266,
+    "val_loss": 0.10656596610651296,
+    "val_are": 0.20960291957153993,
+    "lr": 0.000983440482895836
+  },
+  {
+    "epoch": 16,
+    "train_loss": 0.12360544479927238,
+    "val_loss": 0.10888551471426207,
+    "val_are": 0.2213323598398882,
+    "lr": 0.0009805843270442142
+  },
+  {
+    "epoch": 17,
+    "train_loss": 0.1213240637158861,
+    "val_loss": 0.11186534889480647,
+    "val_are": 0.27077462743310365,
+    "lr": 0.0009775058940502
+  },
+  {
+    "epoch": 18,
+    "train_loss": 0.12229459575649833,
+    "val_loss": 0.11027245683705106,
+    "val_are": 0.24529138558051167,
+    "lr": 0.0009742066092160797
+  },
+  {
+    "epoch": 19,
+    "train_loss": 0.12246036856454245,
+    "val_loss": 0.11329149583573728,
+    "val_are": 0.22803892940282822,
+    "lr": 0.0009706880000976672
+  },
+  {
+    "epoch": 20,
+    "train_loss": 0.1211167235865074,
+    "val_loss": 0.106539951856522,
+    "val_are": 0.21184978108195698,
+    "lr": 0.0009669516957970512
+  },
+  {
+    "epoch": 21,
+    "train_loss": 0.12227428233136936,
+    "val_loss": 0.10863438640337657,
+    "val_are": 0.21262053531758926,
+    "lr": 0.0009629994262083282
+  },
+  {
+    "epoch": 22,
+    "train_loss": 0.12078849018431034,
+    "val_loss": 0.10464029233245288,
+    "val_are": 0.20529338323018131,
+    "lr": 0.0009588330212166673
+  },
+  {
+    "epoch": 23,
+    "train_loss": 0.12076749657692552,
+    "val_loss": 0.10519357121494763,
+    "val_are": 0.1972528174519539,
+    "lr": 0.0009544544098510819
+  },
+  {
+    "epoch": 24,
+    "train_loss": 0.1205340421625546,
+    "val_loss": 0.11209521026295774,
+    "val_are": 0.23057624040281072,
+    "lr": 0.0009498656193912957
+  },
+  {
+    "epoch": 25,
+    "train_loss": 0.12104494641630018,
+    "val_loss": 0.10841717346407034,
+    "val_are": 0.21237122092176886,
+    "lr": 0.0009450687744291213
+  },
+  {
+    "epoch": 26,
+    "train_loss": 0.12009178565777077,
+    "val_loss": 0.11279275761369396,
+    "val_are": 0.2372470002840547,
+    "lr": 0.0009400660958847813
+  },
+  {
+    "epoch": 27,
+    "train_loss": 0.12139157487117515,
+    "val_loss": 0.10750346915686831,
+    "val_are": 0.22311256606789195,
+    "lr": 0.0009348598999786324
+  },
+  {
+    "epoch": 28,
+    "train_loss": 0.12134746196014541,
+    "val_loss": 0.10868307148270748,
+    "val_are": 0.2574108821504256,
+    "lr": 0.0009294525971587638
+  },
+  {
+    "epoch": 29,
+    "train_loss": 0.12045418344387392,
+    "val_loss": 0.11045770336161642,
+    "val_are": 0.2446950358503005,
+    "lr": 0.0009238466909849694
+  },
+  {
+    "epoch": 30,
+    "train_loss": 0.11914502350347382,
+    "val_loss": 0.10676391389878358,
+    "val_are": 0.2178314670043833,
+    "lr": 0.0009180447769696094
+  }
+]

logs/training_history_depth.json ADDED Viewed

	@@ -0,0 +1,22 @@

+[
+  {
+    "epoch": 0,
+    "train_loss": 1.5525195367874638,
+    "train_refine": 0.22647310408853716,
+    "train_depth": 2.6520928798183316,
+    "val_loss": 1.5210160527910506,
+    "val_are_refine": 0.22593858412333898,
+    "val_are_depth": 0.8450886181422642,
+    "lr": 0.001
+  },
+  {
+    "epoch": 1,
+    "train_loss": 1.658759770854827,
+    "train_refine": 0.4102027195115243,
+    "train_depth": 2.497114127682101,
+    "val_loss": 1.7499962193625314,
+    "val_are_refine": 0.6625208514077323,
+    "val_are_depth": 0.793292156287602,
+    "lr": 1e-06
+  }
+]

onnx/nott_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10e25c93cff20ec650b4dada8617f000ab34698b22f5e99e5bdd23c5383a5dae
+size 9035361

onnx/nott_v1.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2059c0151c8870b2c0acba3b7de2cb8c84c8ffdc74d0d542dfd5d0303f05017
+size 9043968

onnx/nott_v2.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3ecabd4b6d5b92b6e0ac53a369276969e4fd5c7b2ac376dd318db78c588e41
+size 8207290

pytorch/nott_v1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b95d48205e45ab83f355c465388d8f17ea06509d82b16b5442423c00b3d55d90
+size 9030828

pytorch/nott_v1.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6723765aa74b399ae5679ff540bd08af3d8e03b589206f50e644cf5cde5a57a3
+size 9015116

pytorch/nott_v2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b241697b2acb980901ca91458ab8fde78b037225c32d76b9b41f9643a49fdb63
+size 8209243

pytorch/nott_v2.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c0fd337dc9bd8d437cfef565a27492b49b89b8962bae06ffb3dfcb278e80026
+size 8199512