Push model using huggingface_hub.

Browse files

Files changed (3) hide show

README.md +12 -0
config.json +233 -0
model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+library_name: VeloDepth
+tags:
+- model_hub_mixin
+- monocular-metric-3D-estimation
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Code: https://github.com/lpiccinelli-eth/VeloDepth
+- Paper: [More Information Needed]
+- Docs: [More Information Needed]

config.json ADDED Viewed

	@@ -0,0 +1,233 @@

+{
+  "data": {
+    "augmentations": {
+      "affine_p": 0.0,
+      "blur_p": 0.1,
+      "cut_p": 0.0,
+      "flip_p": 0.5,
+      "flipt_p": 0.3,
+      "gamma_p": 0.5,
+      "grayscale_p": 0.1,
+      "invert_p": 0.0,
+      "jitter_p": 0.5,
+      "noise_pad": 1.0,
+      "only_zoom": true,
+      "random_blur": 2.0,
+      "random_gamma": 0.2,
+      "random_jitter": 0.1,
+      "random_rotation": 0.0,
+      "random_scale": 2.0,
+      "random_shear": 0.0,
+      "random_translate_x": 0.04,
+      "random_translate_y": 0.01,
+      "rotation_p": 0.0,
+      "scale_p": 0.0,
+      "shape_constraints": {
+        "height_min": 15,
+        "pixels_max": 600000.0,
+        "pixels_min": 200000.0,
+        "ratio_bounds": [
+          0.5,
+          2.5
+        ],
+        "sample": true,
+        "shape_mult": 14,
+        "width_min": 15
+      },
+      "shape_mult": 14,
+      "test_context": 1.0,
+      "translate_p": 0.0
+    },
+    "crop": "garg",
+    "data_root": "datasets",
+    "flow": "of",
+    "image_shape": [
+      518,
+      518
+    ],
+    "keepGT": 0,
+    "mini": 1.0,
+    "normalization": "imagenet",
+    "num_frames": 2,
+    "pair": 1,
+    "resize_method": "contextcrop",
+    "sampling": {},
+    "shape_constraints": {
+      "height_min": 15,
+      "pixels_max": 600000.0,
+      "pixels_min": 200000.0,
+      "ratio_bounds": [
+        0.5,
+        2.5
+      ],
+      "sample": true,
+      "shape_mult": 14,
+      "width_min": 15
+    },
+    "train_datasets": [],
+    "val_datasets": [
+      "ScanNetVid",
+      "VKITTI",
+      "Bonn",
+      "TUM",
+      "Sintel"
+    ]
+  },
+  "eps": 1e-06,
+  "generic": {
+    "deterministic": true,
+    "name_page": "velodepth",
+    "seed": 42
+  },
+  "model": {
+    "expansion": 4,
+    "flow_encoder": {
+      "embed_dims": [
+        80,
+        160
+      ],
+      "frozen_stages": -1,
+      "name": "convnextv2_nano",
+      "num_levels": 2,
+      "pretrained": "timm"
+    },
+    "layer_scale": 1.0,
+    "name": "VeloDepth",
+    "num_heads": 8,
+    "pixel_decoder": {
+      "depths": [
+        2,
+        2,
+        2
+      ],
+      "dropout": 0.0,
+      "hidden_dim": 512,
+      "kernel_size": 3,
+      "name": "Decoder",
+      "num_fusion_block": 1,
+      "num_prompt_blocks": 1,
+      "out_dim": 64
+    },
+    "pixel_encoder": {
+      "cls_token_embed_dims": [
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024
+      ],
+      "depths": [
+        6,
+        12,
+        18,
+        24
+      ],
+      "embed_dim": 1024,
+      "embed_dims": [
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024,
+        1024
+      ],
+      "freeze_norm": true,
+      "frozen_stages": 0,
+      "lr": 3e-06,
+      "name": "dinov2_vitl14",
+      "num_register_tokens": 0,
+      "output_idx": [
+        6,
+        12,
+        18,
+        24
+      ],
+      "patch_size": 14,
+      "pretrained": null,
+      "stacking_fn": "last",
+      "use_norm": true,
+      "wd": 0.1
+    },
+    "residual_encoder": {
+      "embed_dim": 96,
+      "embed_dims": [
+        96,
+        192,
+        384,
+        768
+      ],
+      "frozen_stages": 0,
+      "lr": 0.0001,
+      "name": "convnextv2_tiny",
+      "num_levels": 1,
+      "pretrained": "timm",
+      "wd": 0.01
+    }
+  },
+  "training": {
+    "f16": "f16",
+    "losses": {
+      "camera": {
+        "name": "Dummy",
+        "weight": 1.0
+      },
+      "depth": {
+        "name": "Dummy",
+        "weight": 1.0
+      },
+      "edge": {
+        "name": "Dummy",
+        "weight": 1.0
+      },
+      "features": {
+        "name": "Dummy",
+        "weight": 1.0
+      },
+      "flow": {
+        "name": "Dummy",
+        "weight": 1.0
+      },
+      "self": {
+        "name": "Dummy",
+        "weight": 1.0
+      }
+    }
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:615335cd006663cd567e0a1c74bef7ed622ea733dbff016844ab64e3da22ce3e
+size 1559113196