lpiccinelli commited on
Commit
2b2ee9f
·
verified ·
1 Parent(s): 4be6857

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +12 -0
  2. config.json +233 -0
  3. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: VeloDepth
3
+ tags:
4
+ - model_hub_mixin
5
+ - monocular-metric-3D-estimation
6
+ - pytorch_model_hub_mixin
7
+ ---
8
+
9
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
10
+ - Code: https://github.com/lpiccinelli-eth/VeloDepth
11
+ - Paper: [More Information Needed]
12
+ - Docs: [More Information Needed]
config.json ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "augmentations": {
4
+ "affine_p": 0.0,
5
+ "blur_p": 0.1,
6
+ "cut_p": 0.0,
7
+ "flip_p": 0.5,
8
+ "flipt_p": 0.3,
9
+ "gamma_p": 0.5,
10
+ "grayscale_p": 0.1,
11
+ "invert_p": 0.0,
12
+ "jitter_p": 0.5,
13
+ "noise_pad": 1.0,
14
+ "only_zoom": true,
15
+ "random_blur": 2.0,
16
+ "random_gamma": 0.2,
17
+ "random_jitter": 0.1,
18
+ "random_rotation": 0.0,
19
+ "random_scale": 2.0,
20
+ "random_shear": 0.0,
21
+ "random_translate_x": 0.04,
22
+ "random_translate_y": 0.01,
23
+ "rotation_p": 0.0,
24
+ "scale_p": 0.0,
25
+ "shape_constraints": {
26
+ "height_min": 15,
27
+ "pixels_max": 600000.0,
28
+ "pixels_min": 200000.0,
29
+ "ratio_bounds": [
30
+ 0.5,
31
+ 2.5
32
+ ],
33
+ "sample": true,
34
+ "shape_mult": 14,
35
+ "width_min": 15
36
+ },
37
+ "shape_mult": 14,
38
+ "test_context": 1.0,
39
+ "translate_p": 0.0
40
+ },
41
+ "crop": "garg",
42
+ "data_root": "datasets",
43
+ "flow": "of",
44
+ "image_shape": [
45
+ 518,
46
+ 518
47
+ ],
48
+ "keepGT": 0,
49
+ "mini": 1.0,
50
+ "normalization": "imagenet",
51
+ "num_frames": 2,
52
+ "pair": 1,
53
+ "resize_method": "contextcrop",
54
+ "sampling": {},
55
+ "shape_constraints": {
56
+ "height_min": 15,
57
+ "pixels_max": 600000.0,
58
+ "pixels_min": 200000.0,
59
+ "ratio_bounds": [
60
+ 0.5,
61
+ 2.5
62
+ ],
63
+ "sample": true,
64
+ "shape_mult": 14,
65
+ "width_min": 15
66
+ },
67
+ "train_datasets": [],
68
+ "val_datasets": [
69
+ "ScanNetVid",
70
+ "VKITTI",
71
+ "Bonn",
72
+ "TUM",
73
+ "Sintel"
74
+ ]
75
+ },
76
+ "eps": 1e-06,
77
+ "generic": {
78
+ "deterministic": true,
79
+ "name_page": "velodepth",
80
+ "seed": 42
81
+ },
82
+ "model": {
83
+ "expansion": 4,
84
+ "flow_encoder": {
85
+ "embed_dims": [
86
+ 80,
87
+ 160
88
+ ],
89
+ "frozen_stages": -1,
90
+ "name": "convnextv2_nano",
91
+ "num_levels": 2,
92
+ "pretrained": "timm"
93
+ },
94
+ "layer_scale": 1.0,
95
+ "name": "VeloDepth",
96
+ "num_heads": 8,
97
+ "pixel_decoder": {
98
+ "depths": [
99
+ 2,
100
+ 2,
101
+ 2
102
+ ],
103
+ "dropout": 0.0,
104
+ "hidden_dim": 512,
105
+ "kernel_size": 3,
106
+ "name": "Decoder",
107
+ "num_fusion_block": 1,
108
+ "num_prompt_blocks": 1,
109
+ "out_dim": 64
110
+ },
111
+ "pixel_encoder": {
112
+ "cls_token_embed_dims": [
113
+ 1024,
114
+ 1024,
115
+ 1024,
116
+ 1024,
117
+ 1024,
118
+ 1024,
119
+ 1024,
120
+ 1024,
121
+ 1024,
122
+ 1024,
123
+ 1024,
124
+ 1024,
125
+ 1024,
126
+ 1024,
127
+ 1024,
128
+ 1024,
129
+ 1024,
130
+ 1024,
131
+ 1024,
132
+ 1024,
133
+ 1024,
134
+ 1024,
135
+ 1024,
136
+ 1024
137
+ ],
138
+ "depths": [
139
+ 6,
140
+ 12,
141
+ 18,
142
+ 24
143
+ ],
144
+ "embed_dim": 1024,
145
+ "embed_dims": [
146
+ 1024,
147
+ 1024,
148
+ 1024,
149
+ 1024,
150
+ 1024,
151
+ 1024,
152
+ 1024,
153
+ 1024,
154
+ 1024,
155
+ 1024,
156
+ 1024,
157
+ 1024,
158
+ 1024,
159
+ 1024,
160
+ 1024,
161
+ 1024,
162
+ 1024,
163
+ 1024,
164
+ 1024,
165
+ 1024,
166
+ 1024,
167
+ 1024,
168
+ 1024,
169
+ 1024
170
+ ],
171
+ "freeze_norm": true,
172
+ "frozen_stages": 0,
173
+ "lr": 3e-06,
174
+ "name": "dinov2_vitl14",
175
+ "num_register_tokens": 0,
176
+ "output_idx": [
177
+ 6,
178
+ 12,
179
+ 18,
180
+ 24
181
+ ],
182
+ "patch_size": 14,
183
+ "pretrained": null,
184
+ "stacking_fn": "last",
185
+ "use_norm": true,
186
+ "wd": 0.1
187
+ },
188
+ "residual_encoder": {
189
+ "embed_dim": 96,
190
+ "embed_dims": [
191
+ 96,
192
+ 192,
193
+ 384,
194
+ 768
195
+ ],
196
+ "frozen_stages": 0,
197
+ "lr": 0.0001,
198
+ "name": "convnextv2_tiny",
199
+ "num_levels": 1,
200
+ "pretrained": "timm",
201
+ "wd": 0.01
202
+ }
203
+ },
204
+ "training": {
205
+ "f16": "f16",
206
+ "losses": {
207
+ "camera": {
208
+ "name": "Dummy",
209
+ "weight": 1.0
210
+ },
211
+ "depth": {
212
+ "name": "Dummy",
213
+ "weight": 1.0
214
+ },
215
+ "edge": {
216
+ "name": "Dummy",
217
+ "weight": 1.0
218
+ },
219
+ "features": {
220
+ "name": "Dummy",
221
+ "weight": 1.0
222
+ },
223
+ "flow": {
224
+ "name": "Dummy",
225
+ "weight": 1.0
226
+ },
227
+ "self": {
228
+ "name": "Dummy",
229
+ "weight": 1.0
230
+ }
231
+ }
232
+ }
233
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615335cd006663cd567e0a1c74bef7ed622ea733dbff016844ab64e3da22ce3e
3
+ size 1559113196