tusharsangam commited on
Commit
9205b56
·
verified ·
1 Parent(s): 30f1ff0

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +281 -0
  2. LICENSE.md +83 -0
  3. README.md +230 -0
  4. arguments/__init__.py +116 -0
  5. arguments/__pycache__/__init__.cpython-38.pyc +0 -0
  6. assets/teaser.png +3 -0
  7. autoencoder/__pycache__/dataset.cpython-38.pyc +0 -0
  8. autoencoder/__pycache__/model.cpython-38.pyc +0 -0
  9. autoencoder/ckpt/office_scene_50/best_ckpt.pth +3 -0
  10. autoencoder/ckpt/office_scene_50/events.out.tfevents.1760944914.a100-st-p4de24xlarge-7.434065.0 +3 -0
  11. autoencoder/ckpt/office_scene_50/events.out.tfevents.1760945127.a100-st-p4de24xlarge-7.441630.0 +3 -0
  12. autoencoder/dataset.py +26 -0
  13. autoencoder/model.py +46 -0
  14. autoencoder/test.py +77 -0
  15. autoencoder/train.py +110 -0
  16. ckpts/sam_vit_b_01ec64.pth +3 -0
  17. ckpts/sam_vit_h_4b8939.pth +3 -0
  18. ckpts/sam_vit_l_0b3195.pth +3 -0
  19. convert.py +124 -0
  20. data/40753679.mov +3 -0
  21. data/Tushar.zip +3 -0
  22. data/colmap_scene.zip +3 -0
  23. data/examples/office_scene_50/depths/frame_00059.png +3 -0
  24. data/examples/office_scene_50/depths/frame_00119.png +3 -0
  25. data/examples/office_scene_50/depths/frame_00179.png +3 -0
  26. data/examples/office_scene_50/depths/frame_00239.png +3 -0
  27. data/examples/office_scene_50/depths/frame_00299.png +3 -0
  28. data/examples/office_scene_50/depths/frame_00359.png +3 -0
  29. data/examples/office_scene_50/depths/frame_00419.png +3 -0
  30. data/examples/office_scene_50/depths/frame_00479.png +3 -0
  31. data/examples/office_scene_50/depths/frame_00539.png +3 -0
  32. data/examples/office_scene_50/depths/frame_00599.png +3 -0
  33. data/examples/office_scene_50/depths/frame_00659.png +3 -0
  34. data/examples/office_scene_50/depths/frame_00719.png +3 -0
  35. data/examples/office_scene_50/depths/frame_00779.png +3 -0
  36. data/examples/office_scene_50/depths/frame_00839.png +3 -0
  37. data/examples/office_scene_50/depths/frame_00899.png +3 -0
  38. data/examples/office_scene_50/depths/frame_00959.png +3 -0
  39. data/examples/office_scene_50/depths/frame_01019.png +3 -0
  40. data/examples/office_scene_50/depths/frame_01079.png +3 -0
  41. data/examples/office_scene_50/depths/frame_01139.png +3 -0
  42. data/examples/office_scene_50/depths/frame_01199.png +3 -0
  43. data/examples/office_scene_50/depths/frame_01259.png +3 -0
  44. data/examples/office_scene_50/depths/frame_01319.png +3 -0
  45. data/examples/office_scene_50/depths/frame_01379.png +3 -0
  46. data/examples/office_scene_50/depths/frame_01439.png +3 -0
  47. data/examples/office_scene_50/depths/frame_01499.png +3 -0
  48. data/examples/office_scene_50/depths/frame_01559.png +3 -0
  49. data/examples/office_scene_50/depths/frame_01619.png +3 -0
  50. data/examples/office_scene_50/depths/frame_01679.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,284 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/teaser.png filter=lfs diff=lfs merge=lfs -text
37
+ data/40753679.mov filter=lfs diff=lfs merge=lfs -text
38
+ data/examples/office_scene_50/depths/frame_00059.png filter=lfs diff=lfs merge=lfs -text
39
+ data/examples/office_scene_50/depths/frame_00119.png filter=lfs diff=lfs merge=lfs -text
40
+ data/examples/office_scene_50/depths/frame_00179.png filter=lfs diff=lfs merge=lfs -text
41
+ data/examples/office_scene_50/depths/frame_00239.png filter=lfs diff=lfs merge=lfs -text
42
+ data/examples/office_scene_50/depths/frame_00299.png filter=lfs diff=lfs merge=lfs -text
43
+ data/examples/office_scene_50/depths/frame_00359.png filter=lfs diff=lfs merge=lfs -text
44
+ data/examples/office_scene_50/depths/frame_00419.png filter=lfs diff=lfs merge=lfs -text
45
+ data/examples/office_scene_50/depths/frame_00479.png filter=lfs diff=lfs merge=lfs -text
46
+ data/examples/office_scene_50/depths/frame_00539.png filter=lfs diff=lfs merge=lfs -text
47
+ data/examples/office_scene_50/depths/frame_00599.png filter=lfs diff=lfs merge=lfs -text
48
+ data/examples/office_scene_50/depths/frame_00659.png filter=lfs diff=lfs merge=lfs -text
49
+ data/examples/office_scene_50/depths/frame_00719.png filter=lfs diff=lfs merge=lfs -text
50
+ data/examples/office_scene_50/depths/frame_00779.png filter=lfs diff=lfs merge=lfs -text
51
+ data/examples/office_scene_50/depths/frame_00839.png filter=lfs diff=lfs merge=lfs -text
52
+ data/examples/office_scene_50/depths/frame_00899.png filter=lfs diff=lfs merge=lfs -text
53
+ data/examples/office_scene_50/depths/frame_00959.png filter=lfs diff=lfs merge=lfs -text
54
+ data/examples/office_scene_50/depths/frame_01019.png filter=lfs diff=lfs merge=lfs -text
55
+ data/examples/office_scene_50/depths/frame_01079.png filter=lfs diff=lfs merge=lfs -text
56
+ data/examples/office_scene_50/depths/frame_01139.png filter=lfs diff=lfs merge=lfs -text
57
+ data/examples/office_scene_50/depths/frame_01199.png filter=lfs diff=lfs merge=lfs -text
58
+ data/examples/office_scene_50/depths/frame_01259.png filter=lfs diff=lfs merge=lfs -text
59
+ data/examples/office_scene_50/depths/frame_01319.png filter=lfs diff=lfs merge=lfs -text
60
+ data/examples/office_scene_50/depths/frame_01379.png filter=lfs diff=lfs merge=lfs -text
61
+ data/examples/office_scene_50/depths/frame_01439.png filter=lfs diff=lfs merge=lfs -text
62
+ data/examples/office_scene_50/depths/frame_01499.png filter=lfs diff=lfs merge=lfs -text
63
+ data/examples/office_scene_50/depths/frame_01559.png filter=lfs diff=lfs merge=lfs -text
64
+ data/examples/office_scene_50/depths/frame_01619.png filter=lfs diff=lfs merge=lfs -text
65
+ data/examples/office_scene_50/depths/frame_01679.png filter=lfs diff=lfs merge=lfs -text
66
+ data/examples/office_scene_50/depths/frame_01739.png filter=lfs diff=lfs merge=lfs -text
67
+ data/examples/office_scene_50/depths/frame_01799.png filter=lfs diff=lfs merge=lfs -text
68
+ data/examples/office_scene_50/depths/frame_01859.png filter=lfs diff=lfs merge=lfs -text
69
+ data/examples/office_scene_50/depths/frame_01919.png filter=lfs diff=lfs merge=lfs -text
70
+ data/examples/office_scene_50/depths/frame_01979.png filter=lfs diff=lfs merge=lfs -text
71
+ data/examples/office_scene_50/depths/frame_02039.png filter=lfs diff=lfs merge=lfs -text
72
+ data/examples/office_scene_50/depths/frame_02099.png filter=lfs diff=lfs merge=lfs -text
73
+ data/examples/office_scene_50/depths/frame_02159.png filter=lfs diff=lfs merge=lfs -text
74
+ data/examples/office_scene_50/depths/frame_02219.png filter=lfs diff=lfs merge=lfs -text
75
+ data/examples/office_scene_50/depths/frame_02279.png filter=lfs diff=lfs merge=lfs -text
76
+ data/examples/office_scene_50/depths/frame_02339.png filter=lfs diff=lfs merge=lfs -text
77
+ data/examples/office_scene_50/depths/frame_02459.png filter=lfs diff=lfs merge=lfs -text
78
+ data/examples/office_scene_50/depths/frame_02519.png filter=lfs diff=lfs merge=lfs -text
79
+ data/examples/office_scene_50/depths/frame_02579.png filter=lfs diff=lfs merge=lfs -text
80
+ data/examples/office_scene_50/depths/frame_02639.png filter=lfs diff=lfs merge=lfs -text
81
+ data/examples/office_scene_50/depths/frame_02699.png filter=lfs diff=lfs merge=lfs -text
82
+ data/examples/office_scene_50/depths/frame_02759.png filter=lfs diff=lfs merge=lfs -text
83
+ data/examples/office_scene_50/depths/frame_02819.png filter=lfs diff=lfs merge=lfs -text
84
+ data/examples/office_scene_50/depths/frame_02879.png filter=lfs diff=lfs merge=lfs -text
85
+ data/examples/office_scene_50/depths/frame_02939.png filter=lfs diff=lfs merge=lfs -text
86
+ data/examples/office_scene_50/depths/frame_02999.png filter=lfs diff=lfs merge=lfs -text
87
+ data/examples/office_scene_50/images/frame_00059.png filter=lfs diff=lfs merge=lfs -text
88
+ data/examples/office_scene_50/images/frame_00119.png filter=lfs diff=lfs merge=lfs -text
89
+ data/examples/office_scene_50/images/frame_00179.png filter=lfs diff=lfs merge=lfs -text
90
+ data/examples/office_scene_50/images/frame_00239.png filter=lfs diff=lfs merge=lfs -text
91
+ data/examples/office_scene_50/images/frame_00299.png filter=lfs diff=lfs merge=lfs -text
92
+ data/examples/office_scene_50/images/frame_00359.png filter=lfs diff=lfs merge=lfs -text
93
+ data/examples/office_scene_50/images/frame_00419.png filter=lfs diff=lfs merge=lfs -text
94
+ data/examples/office_scene_50/images/frame_00479.png filter=lfs diff=lfs merge=lfs -text
95
+ data/examples/office_scene_50/images/frame_00539.png filter=lfs diff=lfs merge=lfs -text
96
+ data/examples/office_scene_50/images/frame_00599.png filter=lfs diff=lfs merge=lfs -text
97
+ data/examples/office_scene_50/images/frame_00659.png filter=lfs diff=lfs merge=lfs -text
98
+ data/examples/office_scene_50/images/frame_00719.png filter=lfs diff=lfs merge=lfs -text
99
+ data/examples/office_scene_50/images/frame_00779.png filter=lfs diff=lfs merge=lfs -text
100
+ data/examples/office_scene_50/images/frame_00839.png filter=lfs diff=lfs merge=lfs -text
101
+ data/examples/office_scene_50/images/frame_00899.png filter=lfs diff=lfs merge=lfs -text
102
+ data/examples/office_scene_50/images/frame_00959.png filter=lfs diff=lfs merge=lfs -text
103
+ data/examples/office_scene_50/images/frame_01019.png filter=lfs diff=lfs merge=lfs -text
104
+ data/examples/office_scene_50/images/frame_01079.png filter=lfs diff=lfs merge=lfs -text
105
+ data/examples/office_scene_50/images/frame_01139.png filter=lfs diff=lfs merge=lfs -text
106
+ data/examples/office_scene_50/images/frame_01199.png filter=lfs diff=lfs merge=lfs -text
107
+ data/examples/office_scene_50/images/frame_01259.png filter=lfs diff=lfs merge=lfs -text
108
+ data/examples/office_scene_50/images/frame_01319.png filter=lfs diff=lfs merge=lfs -text
109
+ data/examples/office_scene_50/images/frame_01379.png filter=lfs diff=lfs merge=lfs -text
110
+ data/examples/office_scene_50/images/frame_01439.png filter=lfs diff=lfs merge=lfs -text
111
+ data/examples/office_scene_50/images/frame_01499.png filter=lfs diff=lfs merge=lfs -text
112
+ data/examples/office_scene_50/images/frame_01559.png filter=lfs diff=lfs merge=lfs -text
113
+ data/examples/office_scene_50/images/frame_01619.png filter=lfs diff=lfs merge=lfs -text
114
+ data/examples/office_scene_50/images/frame_01679.png filter=lfs diff=lfs merge=lfs -text
115
+ data/examples/office_scene_50/images/frame_01739.png filter=lfs diff=lfs merge=lfs -text
116
+ data/examples/office_scene_50/images/frame_01799.png filter=lfs diff=lfs merge=lfs -text
117
+ data/examples/office_scene_50/images/frame_01859.png filter=lfs diff=lfs merge=lfs -text
118
+ data/examples/office_scene_50/images/frame_01919.png filter=lfs diff=lfs merge=lfs -text
119
+ data/examples/office_scene_50/images/frame_01979.png filter=lfs diff=lfs merge=lfs -text
120
+ data/examples/office_scene_50/images/frame_02039.png filter=lfs diff=lfs merge=lfs -text
121
+ data/examples/office_scene_50/images/frame_02099.png filter=lfs diff=lfs merge=lfs -text
122
+ data/examples/office_scene_50/images/frame_02159.png filter=lfs diff=lfs merge=lfs -text
123
+ data/examples/office_scene_50/images/frame_02219.png filter=lfs diff=lfs merge=lfs -text
124
+ data/examples/office_scene_50/images/frame_02279.png filter=lfs diff=lfs merge=lfs -text
125
+ data/examples/office_scene_50/images/frame_02339.png filter=lfs diff=lfs merge=lfs -text
126
+ data/examples/office_scene_50/images/frame_02399.png filter=lfs diff=lfs merge=lfs -text
127
+ data/examples/office_scene_50/images/frame_02459.png filter=lfs diff=lfs merge=lfs -text
128
+ data/examples/office_scene_50/images/frame_02519.png filter=lfs diff=lfs merge=lfs -text
129
+ data/examples/office_scene_50/images/frame_02579.png filter=lfs diff=lfs merge=lfs -text
130
+ data/examples/office_scene_50/images/frame_02639.png filter=lfs diff=lfs merge=lfs -text
131
+ data/examples/office_scene_50/images/frame_02699.png filter=lfs diff=lfs merge=lfs -text
132
+ data/examples/office_scene_50/images/frame_02759.png filter=lfs diff=lfs merge=lfs -text
133
+ data/examples/office_scene_50/images/frame_02819.png filter=lfs diff=lfs merge=lfs -text
134
+ data/examples/office_scene_50/images/frame_02879.png filter=lfs diff=lfs merge=lfs -text
135
+ data/examples/office_scene_50/images/frame_02939.png filter=lfs diff=lfs merge=lfs -text
136
+ data/examples/office_scene_50/images/frame_02999.png filter=lfs diff=lfs merge=lfs -text
137
+ data/examples/office_scene_50/office_scene_50/input.ply filter=lfs diff=lfs merge=lfs -text
138
+ data/examples/office_scene_50/office_scene_50/point_cloud/iteration_30000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
139
+ data/examples/office_scene_50/office_scene_50/point_cloud/iteration_7000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
140
+ data/examples/office_scene_50/sparse/0/points.ply filter=lfs diff=lfs merge=lfs -text
141
+ data/examples/office_scene_50/sparse/0/points3D.ply filter=lfs diff=lfs merge=lfs -text
142
+ output/office_scene_50_1/input.ply filter=lfs diff=lfs merge=lfs -text
143
+ output/office_scene_50_1/point_cloud/iteration_30000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
144
+ output/office_scene_50_1/point_cloud/iteration_7000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
145
+ output/office_scene_50_1/train/ours_None/renders/00000.png filter=lfs diff=lfs merge=lfs -text
146
+ output/office_scene_50_1/train/ours_None/renders/00002.png filter=lfs diff=lfs merge=lfs -text
147
+ output/office_scene_50_1/train/ours_None/renders/00003.png filter=lfs diff=lfs merge=lfs -text
148
+ output/office_scene_50_1/train/ours_None/renders/00004.png filter=lfs diff=lfs merge=lfs -text
149
+ output/office_scene_50_1/train/ours_None/renders/00005.png filter=lfs diff=lfs merge=lfs -text
150
+ output/office_scene_50_1/train/ours_None/renders/00006.png filter=lfs diff=lfs merge=lfs -text
151
+ output/office_scene_50_1/train/ours_None/renders/00007.png filter=lfs diff=lfs merge=lfs -text
152
+ output/office_scene_50_1/train/ours_None/renders/00008.png filter=lfs diff=lfs merge=lfs -text
153
+ output/office_scene_50_1/train/ours_None/renders/00009.png filter=lfs diff=lfs merge=lfs -text
154
+ output/office_scene_50_1/train/ours_None/renders/00010.png filter=lfs diff=lfs merge=lfs -text
155
+ output/office_scene_50_1/train/ours_None/renders/00011.png filter=lfs diff=lfs merge=lfs -text
156
+ output/office_scene_50_1/train/ours_None/renders/00012.png filter=lfs diff=lfs merge=lfs -text
157
+ output/office_scene_50_1/train/ours_None/renders/00013.png filter=lfs diff=lfs merge=lfs -text
158
+ output/office_scene_50_1/train/ours_None/renders/00014.png filter=lfs diff=lfs merge=lfs -text
159
+ output/office_scene_50_1/train/ours_None/renders/00015.png filter=lfs diff=lfs merge=lfs -text
160
+ output/office_scene_50_1/train/ours_None/renders/00016.png filter=lfs diff=lfs merge=lfs -text
161
+ output/office_scene_50_1/train/ours_None/renders/00017.png filter=lfs diff=lfs merge=lfs -text
162
+ output/office_scene_50_1/train/ours_None/renders/00018.png filter=lfs diff=lfs merge=lfs -text
163
+ output/office_scene_50_1/train/ours_None/renders/00019.png filter=lfs diff=lfs merge=lfs -text
164
+ output/office_scene_50_1/train/ours_None/renders/00020.png filter=lfs diff=lfs merge=lfs -text
165
+ output/office_scene_50_1/train/ours_None/renders/00021.png filter=lfs diff=lfs merge=lfs -text
166
+ output/office_scene_50_1/train/ours_None/renders/00022.png filter=lfs diff=lfs merge=lfs -text
167
+ output/office_scene_50_1/train/ours_None/renders/00023.png filter=lfs diff=lfs merge=lfs -text
168
+ output/office_scene_50_1/train/ours_None/renders/00024.png filter=lfs diff=lfs merge=lfs -text
169
+ output/office_scene_50_1/train/ours_None/renders/00025.png filter=lfs diff=lfs merge=lfs -text
170
+ output/office_scene_50_1/train/ours_None/renders/00026.png filter=lfs diff=lfs merge=lfs -text
171
+ output/office_scene_50_1/train/ours_None/renders/00027.png filter=lfs diff=lfs merge=lfs -text
172
+ output/office_scene_50_1/train/ours_None/renders/00028.png filter=lfs diff=lfs merge=lfs -text
173
+ output/office_scene_50_1/train/ours_None/renders/00029.png filter=lfs diff=lfs merge=lfs -text
174
+ output/office_scene_50_1/train/ours_None/renders/00030.png filter=lfs diff=lfs merge=lfs -text
175
+ output/office_scene_50_1/train/ours_None/renders/00031.png filter=lfs diff=lfs merge=lfs -text
176
+ output/office_scene_50_1/train/ours_None/renders/00032.png filter=lfs diff=lfs merge=lfs -text
177
+ output/office_scene_50_1/train/ours_None/renders/00033.png filter=lfs diff=lfs merge=lfs -text
178
+ output/office_scene_50_1/train/ours_None/renders/00034.png filter=lfs diff=lfs merge=lfs -text
179
+ output/office_scene_50_1/train/ours_None/renders/00035.png filter=lfs diff=lfs merge=lfs -text
180
+ output/office_scene_50_1/train/ours_None/renders/00036.png filter=lfs diff=lfs merge=lfs -text
181
+ output/office_scene_50_1/train/ours_None/renders/00037.png filter=lfs diff=lfs merge=lfs -text
182
+ output/office_scene_50_1/train/ours_None/renders/00038.png filter=lfs diff=lfs merge=lfs -text
183
+ output/office_scene_50_1/train/ours_None/renders/00039.png filter=lfs diff=lfs merge=lfs -text
184
+ output/office_scene_50_1/train/ours_None/renders/00040.png filter=lfs diff=lfs merge=lfs -text
185
+ output/office_scene_50_1/train/ours_None/renders/00041.png filter=lfs diff=lfs merge=lfs -text
186
+ output/office_scene_50_1/train/ours_None/renders/00042.png filter=lfs diff=lfs merge=lfs -text
187
+ output/office_scene_50_1/train/ours_None/renders/00043.png filter=lfs diff=lfs merge=lfs -text
188
+ output/office_scene_50_1/train/ours_None/renders/00044.png filter=lfs diff=lfs merge=lfs -text
189
+ output/office_scene_50_1/train/ours_None/renders/00045.png filter=lfs diff=lfs merge=lfs -text
190
+ output/office_scene_50_1/train/ours_None/renders/00046.png filter=lfs diff=lfs merge=lfs -text
191
+ output/office_scene_50_1/train/ours_None/renders/00047.png filter=lfs diff=lfs merge=lfs -text
192
+ output/office_scene_50_1/train/ours_None/renders/00048.png filter=lfs diff=lfs merge=lfs -text
193
+ output/office_scene_50_1/train/ours_None/renders/00049.png filter=lfs diff=lfs merge=lfs -text
194
+ output/office_scene_50_2/input.ply filter=lfs diff=lfs merge=lfs -text
195
+ output/office_scene_50_2/point_cloud/iteration_30000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
196
+ output/office_scene_50_2/point_cloud/iteration_7000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
197
+ output/office_scene_50_2/train/ours_None/renders/00000.png filter=lfs diff=lfs merge=lfs -text
198
+ output/office_scene_50_2/train/ours_None/renders/00002.png filter=lfs diff=lfs merge=lfs -text
199
+ output/office_scene_50_2/train/ours_None/renders/00003.png filter=lfs diff=lfs merge=lfs -text
200
+ output/office_scene_50_2/train/ours_None/renders/00004.png filter=lfs diff=lfs merge=lfs -text
201
+ output/office_scene_50_2/train/ours_None/renders/00005.png filter=lfs diff=lfs merge=lfs -text
202
+ output/office_scene_50_2/train/ours_None/renders/00006.png filter=lfs diff=lfs merge=lfs -text
203
+ output/office_scene_50_2/train/ours_None/renders/00007.png filter=lfs diff=lfs merge=lfs -text
204
+ output/office_scene_50_2/train/ours_None/renders/00008.png filter=lfs diff=lfs merge=lfs -text
205
+ output/office_scene_50_2/train/ours_None/renders/00009.png filter=lfs diff=lfs merge=lfs -text
206
+ output/office_scene_50_2/train/ours_None/renders/00010.png filter=lfs diff=lfs merge=lfs -text
207
+ output/office_scene_50_2/train/ours_None/renders/00011.png filter=lfs diff=lfs merge=lfs -text
208
+ output/office_scene_50_2/train/ours_None/renders/00012.png filter=lfs diff=lfs merge=lfs -text
209
+ output/office_scene_50_2/train/ours_None/renders/00013.png filter=lfs diff=lfs merge=lfs -text
210
+ output/office_scene_50_2/train/ours_None/renders/00014.png filter=lfs diff=lfs merge=lfs -text
211
+ output/office_scene_50_2/train/ours_None/renders/00015.png filter=lfs diff=lfs merge=lfs -text
212
+ output/office_scene_50_2/train/ours_None/renders/00016.png filter=lfs diff=lfs merge=lfs -text
213
+ output/office_scene_50_2/train/ours_None/renders/00017.png filter=lfs diff=lfs merge=lfs -text
214
+ output/office_scene_50_2/train/ours_None/renders/00018.png filter=lfs diff=lfs merge=lfs -text
215
+ output/office_scene_50_2/train/ours_None/renders/00019.png filter=lfs diff=lfs merge=lfs -text
216
+ output/office_scene_50_2/train/ours_None/renders/00020.png filter=lfs diff=lfs merge=lfs -text
217
+ output/office_scene_50_2/train/ours_None/renders/00021.png filter=lfs diff=lfs merge=lfs -text
218
+ output/office_scene_50_2/train/ours_None/renders/00022.png filter=lfs diff=lfs merge=lfs -text
219
+ output/office_scene_50_2/train/ours_None/renders/00023.png filter=lfs diff=lfs merge=lfs -text
220
+ output/office_scene_50_2/train/ours_None/renders/00024.png filter=lfs diff=lfs merge=lfs -text
221
+ output/office_scene_50_2/train/ours_None/renders/00025.png filter=lfs diff=lfs merge=lfs -text
222
+ output/office_scene_50_2/train/ours_None/renders/00026.png filter=lfs diff=lfs merge=lfs -text
223
+ output/office_scene_50_2/train/ours_None/renders/00027.png filter=lfs diff=lfs merge=lfs -text
224
+ output/office_scene_50_2/train/ours_None/renders/00028.png filter=lfs diff=lfs merge=lfs -text
225
+ output/office_scene_50_2/train/ours_None/renders/00029.png filter=lfs diff=lfs merge=lfs -text
226
+ output/office_scene_50_2/train/ours_None/renders/00030.png filter=lfs diff=lfs merge=lfs -text
227
+ output/office_scene_50_2/train/ours_None/renders/00031.png filter=lfs diff=lfs merge=lfs -text
228
+ output/office_scene_50_2/train/ours_None/renders/00032.png filter=lfs diff=lfs merge=lfs -text
229
+ output/office_scene_50_2/train/ours_None/renders/00033.png filter=lfs diff=lfs merge=lfs -text
230
+ output/office_scene_50_2/train/ours_None/renders/00034.png filter=lfs diff=lfs merge=lfs -text
231
+ output/office_scene_50_2/train/ours_None/renders/00035.png filter=lfs diff=lfs merge=lfs -text
232
+ output/office_scene_50_2/train/ours_None/renders/00036.png filter=lfs diff=lfs merge=lfs -text
233
+ output/office_scene_50_2/train/ours_None/renders/00037.png filter=lfs diff=lfs merge=lfs -text
234
+ output/office_scene_50_2/train/ours_None/renders/00038.png filter=lfs diff=lfs merge=lfs -text
235
+ output/office_scene_50_2/train/ours_None/renders/00039.png filter=lfs diff=lfs merge=lfs -text
236
+ output/office_scene_50_2/train/ours_None/renders/00040.png filter=lfs diff=lfs merge=lfs -text
237
+ output/office_scene_50_2/train/ours_None/renders/00041.png filter=lfs diff=lfs merge=lfs -text
238
+ output/office_scene_50_2/train/ours_None/renders/00042.png filter=lfs diff=lfs merge=lfs -text
239
+ output/office_scene_50_2/train/ours_None/renders/00043.png filter=lfs diff=lfs merge=lfs -text
240
+ output/office_scene_50_2/train/ours_None/renders/00044.png filter=lfs diff=lfs merge=lfs -text
241
+ output/office_scene_50_2/train/ours_None/renders/00045.png filter=lfs diff=lfs merge=lfs -text
242
+ output/office_scene_50_2/train/ours_None/renders/00046.png filter=lfs diff=lfs merge=lfs -text
243
+ output/office_scene_50_2/train/ours_None/renders/00047.png filter=lfs diff=lfs merge=lfs -text
244
+ output/office_scene_50_2/train/ours_None/renders/00048.png filter=lfs diff=lfs merge=lfs -text
245
+ output/office_scene_50_2/train/ours_None/renders/00049.png filter=lfs diff=lfs merge=lfs -text
246
+ output/office_scene_50_3/input.ply filter=lfs diff=lfs merge=lfs -text
247
+ output/office_scene_50_3/point_cloud/iteration_30000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
248
+ output/office_scene_50_3/point_cloud/iteration_7000/point_cloud.ply filter=lfs diff=lfs merge=lfs -text
249
+ output/office_scene_50_3/train/ours_None/renders/00000.png filter=lfs diff=lfs merge=lfs -text
250
+ output/office_scene_50_3/train/ours_None/renders/00002.png filter=lfs diff=lfs merge=lfs -text
251
+ output/office_scene_50_3/train/ours_None/renders/00003.png filter=lfs diff=lfs merge=lfs -text
252
+ output/office_scene_50_3/train/ours_None/renders/00004.png filter=lfs diff=lfs merge=lfs -text
253
+ output/office_scene_50_3/train/ours_None/renders/00005.png filter=lfs diff=lfs merge=lfs -text
254
+ output/office_scene_50_3/train/ours_None/renders/00006.png filter=lfs diff=lfs merge=lfs -text
255
+ output/office_scene_50_3/train/ours_None/renders/00007.png filter=lfs diff=lfs merge=lfs -text
256
+ output/office_scene_50_3/train/ours_None/renders/00008.png filter=lfs diff=lfs merge=lfs -text
257
+ output/office_scene_50_3/train/ours_None/renders/00009.png filter=lfs diff=lfs merge=lfs -text
258
+ output/office_scene_50_3/train/ours_None/renders/00010.png filter=lfs diff=lfs merge=lfs -text
259
+ output/office_scene_50_3/train/ours_None/renders/00011.png filter=lfs diff=lfs merge=lfs -text
260
+ output/office_scene_50_3/train/ours_None/renders/00012.png filter=lfs diff=lfs merge=lfs -text
261
+ output/office_scene_50_3/train/ours_None/renders/00013.png filter=lfs diff=lfs merge=lfs -text
262
+ output/office_scene_50_3/train/ours_None/renders/00014.png filter=lfs diff=lfs merge=lfs -text
263
+ output/office_scene_50_3/train/ours_None/renders/00015.png filter=lfs diff=lfs merge=lfs -text
264
+ output/office_scene_50_3/train/ours_None/renders/00016.png filter=lfs diff=lfs merge=lfs -text
265
+ output/office_scene_50_3/train/ours_None/renders/00017.png filter=lfs diff=lfs merge=lfs -text
266
+ output/office_scene_50_3/train/ours_None/renders/00018.png filter=lfs diff=lfs merge=lfs -text
267
+ output/office_scene_50_3/train/ours_None/renders/00019.png filter=lfs diff=lfs merge=lfs -text
268
+ output/office_scene_50_3/train/ours_None/renders/00020.png filter=lfs diff=lfs merge=lfs -text
269
+ output/office_scene_50_3/train/ours_None/renders/00021.png filter=lfs diff=lfs merge=lfs -text
270
+ output/office_scene_50_3/train/ours_None/renders/00022.png filter=lfs diff=lfs merge=lfs -text
271
+ output/office_scene_50_3/train/ours_None/renders/00023.png filter=lfs diff=lfs merge=lfs -text
272
+ output/office_scene_50_3/train/ours_None/renders/00024.png filter=lfs diff=lfs merge=lfs -text
273
+ output/office_scene_50_3/train/ours_None/renders/00025.png filter=lfs diff=lfs merge=lfs -text
274
+ output/office_scene_50_3/train/ours_None/renders/00026.png filter=lfs diff=lfs merge=lfs -text
275
+ output/office_scene_50_3/train/ours_None/renders/00027.png filter=lfs diff=lfs merge=lfs -text
276
+ output/office_scene_50_3/train/ours_None/renders/00028.png filter=lfs diff=lfs merge=lfs -text
277
+ output/office_scene_50_3/train/ours_None/renders/00029.png filter=lfs diff=lfs merge=lfs -text
278
+ output/office_scene_50_3/train/ours_None/renders/00030.png filter=lfs diff=lfs merge=lfs -text
279
+ output/office_scene_50_3/train/ours_None/renders/00031.png filter=lfs diff=lfs merge=lfs -text
280
+ output/office_scene_50_3/train/ours_None/renders/00032.png filter=lfs diff=lfs merge=lfs -text
281
+ output/office_scene_50_3/train/ours_None/renders/00033.png filter=lfs diff=lfs merge=lfs -text
282
+ output/office_scene_50_3/train/ours_None/renders/00034.png filter=lfs diff=lfs merge=lfs -text
283
+ output/office_scene_50_3/train/ours_None/renders/00035.png filter=lfs diff=lfs merge=lfs -text
284
+ output/office_scene_50_3/train/ours_None/renders/00036.png filter=lfs diff=lfs merge=lfs -text
285
+ output/office_scene_50_3/train/ours_None/renders/00037.png filter=lfs diff=lfs merge=lfs -text
286
+ output/office_scene_50_3/train/ours_None/renders/00038.png filter=lfs diff=lfs merge=lfs -text
287
+ output/office_scene_50_3/train/ours_None/renders/00039.png filter=lfs diff=lfs merge=lfs -text
288
+ output/office_scene_50_3/train/ours_None/renders/00040.png filter=lfs diff=lfs merge=lfs -text
289
+ output/office_scene_50_3/train/ours_None/renders/00041.png filter=lfs diff=lfs merge=lfs -text
290
+ output/office_scene_50_3/train/ours_None/renders/00042.png filter=lfs diff=lfs merge=lfs -text
291
+ output/office_scene_50_3/train/ours_None/renders/00043.png filter=lfs diff=lfs merge=lfs -text
292
+ output/office_scene_50_3/train/ours_None/renders/00044.png filter=lfs diff=lfs merge=lfs -text
293
+ output/office_scene_50_3/train/ours_None/renders/00045.png filter=lfs diff=lfs merge=lfs -text
294
+ output/office_scene_50_3/train/ours_None/renders/00046.png filter=lfs diff=lfs merge=lfs -text
295
+ output/office_scene_50_3/train/ours_None/renders/00047.png filter=lfs diff=lfs merge=lfs -text
296
+ output/office_scene_50_3/train/ours_None/renders/00048.png filter=lfs diff=lfs merge=lfs -text
297
+ output/office_scene_50_3/train/ours_None/renders/00049.png filter=lfs diff=lfs merge=lfs -text
298
+ submodules/langsplat-rasterization/build/lib.linux-x86_64-cpython-37/diff_gaussian_rasterization/_C.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
299
+ submodules/langsplat-rasterization/build/lib.linux-x86_64-cpython-38/diff_gaussian_rasterization/_C.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
300
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-37/cuda_rasterizer/rasterizer_impl.o filter=lfs diff=lfs merge=lfs -text
301
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-37/ext.o filter=lfs diff=lfs merge=lfs -text
302
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-37/rasterize_points.o filter=lfs diff=lfs merge=lfs -text
303
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-38/cuda_rasterizer/rasterizer_impl.o filter=lfs diff=lfs merge=lfs -text
304
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-38/ext.o filter=lfs diff=lfs merge=lfs -text
305
+ submodules/langsplat-rasterization/build/temp.linux-x86_64-cpython-38/rasterize_points.o filter=lfs diff=lfs merge=lfs -text
306
+ submodules/langsplat-rasterization/third_party/glm/doc/manual/frontpage1.png filter=lfs diff=lfs merge=lfs -text
307
+ submodules/langsplat-rasterization/third_party/glm/doc/manual/frontpage2.png filter=lfs diff=lfs merge=lfs -text
308
+ submodules/langsplat-rasterization/third_party/glm/doc/manual.pdf filter=lfs diff=lfs merge=lfs -text
309
+ submodules/simple-knn/build/lib.linux-x86_64-cpython-37/simple_knn/_C.cpython-37m-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
310
+ submodules/simple-knn/build/lib.linux-x86_64-cpython-38/simple_knn/_C.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
311
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-37/ext.o filter=lfs diff=lfs merge=lfs -text
312
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-37/simple_knn.o filter=lfs diff=lfs merge=lfs -text
313
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-37/spatial.o filter=lfs diff=lfs merge=lfs -text
314
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-38/ext.o filter=lfs diff=lfs merge=lfs -text
315
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-38/simple_knn.o filter=lfs diff=lfs merge=lfs -text
316
+ submodules/simple-knn/build/temp.linux-x86_64-cpython-38/spatial.o filter=lfs diff=lfs merge=lfs -text
LICENSE.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Gaussian-Splatting License
2
+ ===========================
3
+
4
+ **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.
5
+ The *Software* is in the process of being registered with the Agence pour la Protection des
6
+ Programmes (APP).
7
+
8
+ The *Software* is still being developed by the *Licensor*.
9
+
10
+ *Licensor*'s goal is to allow the research community to use, test and evaluate
11
+ the *Software*.
12
+
13
+ ## 1. Definitions
14
+
15
+ *Licensee* means any person or entity that uses the *Software* and distributes
16
+ its *Work*.
17
+
18
+ *Licensor* means the owners of the *Software*, i.e Inria and MPII
19
+
20
+ *Software* means the original work of authorship made available under this
21
+ License ie gaussian-splatting.
22
+
23
+ *Work* means the *Software* and any additions to or derivative works of the
24
+ *Software* that are made available under this License.
25
+
26
+
27
+ ## 2. Purpose
28
+ This license is intended to define the rights granted to the *Licensee* by
29
+ Licensors under the *Software*.
30
+
31
+ ## 3. Rights granted
32
+
33
+ For the above reasons Licensors have decided to distribute the *Software*.
34
+ Licensors grant non-exclusive rights to use the *Software* for research purposes
35
+ to research users (both academic and industrial), free of charge, without right
36
+ to sublicense.. The *Software* may be used "non-commercially", i.e., for research
37
+ and/or evaluation purposes only.
38
+
39
+ Subject to the terms and conditions of this License, you are granted a
40
+ non-exclusive, royalty-free, license to reproduce, prepare derivative works of,
41
+ publicly display, publicly perform and distribute its *Work* and any resulting
42
+ derivative works in any form.
43
+
44
+ ## 4. Limitations
45
+
46
+ **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do
47
+ so under this License, (b) you include a complete copy of this License with
48
+ your distribution, and (c) you retain without modification any copyright,
49
+ patent, trademark, or attribution notices that are present in the *Work*.
50
+
51
+ **4.2 Derivative Works.** You may specify that additional or different terms apply
52
+ to the use, reproduction, and distribution of your derivative works of the *Work*
53
+ ("Your Terms") only if (a) Your Terms provide that the use limitation in
54
+ Section 2 applies to your derivative works, and (b) you identify the specific
55
+ derivative works that are subject to Your Terms. Notwithstanding Your Terms,
56
+ this License (including the redistribution requirements in Section 3.1) will
57
+ continue to apply to the *Work* itself.
58
+
59
+ **4.3** Any other use without of prior consent of Licensors is prohibited. Research
60
+ users explicitly acknowledge having received from Licensors all information
61
+ allowing to appreciate the adequacy between of the *Software* and their needs and
62
+ to undertake all necessary precautions for its execution and use.
63
+
64
+ **4.4** The *Software* is provided both as a compiled library file and as source
65
+ code. In case of using the *Software* for a publication or other results obtained
66
+ through the use of the *Software*, users are strongly encouraged to cite the
67
+ corresponding publications as explained in the documentation of the *Software*.
68
+
69
+ ## 5. Disclaimer
70
+
71
+ THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES
72
+ WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY
73
+ UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL
74
+ CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES
75
+ OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL
76
+ USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR
77
+ ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE
78
+ AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
79
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
80
+ GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)
81
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
82
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR
83
+ IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.
README.md ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [CVPR2024 Highlight] LangSplat: 3D Language Gaussian Splatting
2
+ [Minghan Qin*](https://minghanqin.github.io/), [Wanhua Li*†](https://li-wanhua.github.io/), [Jiawei Zhou*](https://latitudezhou.github.io/), [Haoqian Wang†](https://www.sigs.tsinghua.edu.cn/whq_en/main.htm), [Hanspeter Pfister](https://seas.harvard.edu/person/hanspeter-pfister)<br>(\* indicates equal contribution, † means Co-corresponding author)<br>| [Webpage](https://langsplat.github.io/) | [Full Paper](https://arxiv.org/pdf/2312.16084.pdf) | [Video](https://www.youtube.com/watch?v=XMlyjsei-Es) |<br>
3
+ | Preprocessed Dataset | [BaiduWangpan](https://pan.baidu.com/s/1S_cdmN9EFOlCQ3z1GZR3EA?pwd=lfea) | [GoogleDrive](https://drive.google.com/drive/folders/1Icw5AcQkY_2L_k7ddXrGCJ3z4laa4jg5?usp=sharing) |<br>
4
+ | Pre-trained Models | [BaiduWangpan](https://pan.baidu.com/s/12L83uEi5KlF9ViAZqp0B4w?pwd=dl22) | [GoogleDrive](https://drive.google.com/drive/folders/1ASFXWOwaXP_aSXV2iMDmEfILaDXQXlrE?usp=sharing) |<br>
5
+ | [Datasets](https://drive.google.com/file/d/1QF1Po5p5DwTjFHu6tnTeYs_G0egMVmHt/view?usp=sharing) |<br>
6
+
7
+ ![Teaser image](assets/teaser.png)
8
+
9
+ This repository contains the official authors implementation associated with the paper "LangSplat: 3D Language Gaussian Splatting" (CVPR 2024), which can be found [here](https://arxiv.org/pdf/2312.16084.pdf). We further provide the preprocessed datasets 3D-OVS with language feature, as well as pre-trained models.
10
+
11
+ <section class="section" id="BibTeX">
12
+ <div class="container is-max-desktop content">
13
+ <h2 class="title">😊LangSplat Family</h2>
14
+ <pre><code>@inproceedings{qin2024langsplat,
15
+ title={Langsplat: 3d language gaussian splatting},
16
+ author={Qin, Minghan and Li, Wanhua and Zhou, Jiawei and Wang, Haoqian and Pfister, Hanspeter},
17
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
18
+ pages={20051--20060},
19
+ year={2024}
20
+ }</code></pre>
21
+
22
+ <p><strong>🎉 We have released LangSplat V2!</strong>
23
+ The new version significantly improves performance, achieving over <strong>450+ FPS</strong> in rendering. <a href="https://langsplat-v2.github.io/" target="_blank" style="text-decoration: underline;">[NeurIPS 2025] LangSplat V2</a>
24
+ </p>
25
+
26
+ <pre><code>@article{li2025langsplatv2,
27
+ title={LangSplatV2: High-dimensional 3D Language Gaussian Splatting with 450+ FPS},
28
+ author={Li, Wanhua and Zhao, Yujie and Qin, Minghan and Liu, Yang and Cai, Yuanhao and Gan, Chuang and Pfister, Hanspeter},
29
+ journal={arXiv preprint arXiv:2507.07136},
30
+ year={2025}
31
+ }</code></pre>
32
+
33
+ <p>🎉We also invite everyone to check out our <a href="https://4d-langsplat.github.io/" target="_blank" style="text-decoration: underline;">[CVPR 2025] 4D LangSplat</a>, which is a multimodal, object-wise video prompting approach combined with a status deformable network to learn 4D language fields.
34
+ </p>
35
+
36
+ <pre><code>@inproceedings{li20254d,
37
+ title={4d langsplat: 4d language gaussian splatting via multimodal large language models},
38
+ author={Li, Wanhua and Zhou, Renping and Zhou, Jiawei and Song, Yingwei and Herter, Johannes and Qin, Minghan and Huang, Gao and Pfister, Hanspeter},
39
+ booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference},
40
+ pages={22001--22011},
41
+ year={2025}
42
+ }</code></pre>
43
+
44
+ </div>
45
+ </section>
46
+
47
+
48
+ </div>
49
+ </section>
50
+
51
+ ## Cloning the Repository
52
+
53
+ The repository contains submodules, thus please check it out with
54
+ ```shell
55
+ # SSH
56
+ git clone git@github.com:minghanqin/LangSplat.git --recursive
57
+ ```
58
+ or
59
+ ```shell
60
+ # HTTPS
61
+ git clone https://github.com/minghanqin/LangSplat.git --recursive
62
+ ```
63
+
64
+ ## Overview
65
+
66
+ The codebase has 3 main components:
67
+ - A PyTorch-based optimizer to produce a LangSplat model from SfM datasets with language feature inputs to
68
+ - A scene-wise language autoencode to alleviate substantial memory demands imposed by explicit modeling.
69
+ - A script to help you turn your own images into optimization-ready SfM data sets with language feature
70
+
71
+ The components have been tested on Ubuntu Linux 18.04. Instructions for setting up and running each of them are found in the sections below.
72
+
73
+ ## Datasets
74
+ In the experiments section of our paper, we primarily utilized two datasets: the 3D-OVS dataset and the LERF dataset.
75
+
76
+ The 3D-OVS dataset is accessible for download via the following link: [Download 3D-OVS Dataset](https://drive.google.com/drive/folders/1kdV14Gu5nZX6WOPbccG7t7obP_aXkOuC?usp=sharing) .
77
+
78
+ For the LERF dataset, we have expanded upon its existing collection and also provided the corresponding COLMAP data. These resources can be accessed through this link: [Download Expanded LERF Dataset and COLMAP Data](https://drive.google.com/file/d/1QF1Po5p5DwTjFHu6tnTeYs_G0egMVmHt/view?usp=sharing).
79
+
80
+ ## Optimizer
81
+
82
+ The optimizer uses PyTorch and CUDA extensions in a Python environment to produce trained models.
83
+
84
+ ### Hardware Requirements
85
+
86
+ - CUDA-ready GPU with Compute Capability 7.0+
87
+ - 24 GB VRAM (to train to paper evaluation quality)
88
+
89
+ ### Software Requirements
90
+ - Conda (recommended for easy setup)
91
+ - C++ Compiler for PyTorch extensions (we used VS Code)
92
+ - CUDA SDK 11 for PyTorch extensions (we used 11.8)
93
+ - C++ Compiler and CUDA SDK must be compatible
94
+
95
+ ### Setup
96
+
97
+ #### Environment Setup
98
+
99
+ Our default, provided install method is based on Conda package and environment management:
100
+ ```shell
101
+ conda env create --file environment.yml
102
+ conda activate langsplat
103
+ ```
104
+
105
+ ### QuickStart
106
+
107
+ Download the pretrained model to ```output/```, then simply use
108
+
109
+ ```shell
110
+ python render.py -m output/$CASENAME --include_feature
111
+ ```
112
+
113
+
114
+ ## Processing your own Scenes
115
+
116
+ ### Before getting started
117
+ Firstly, put your images into the data dir.
118
+ ```
119
+ <dataset_name>
120
+ |---input
121
+ | |---<image 0>
122
+ | |---<image 1>
123
+ | |---...
124
+ ```
125
+ Secondly, you need to acquire the following dataset format and a pre-trained RGB model follow the [3dgs](https://github.com/graphdeco-inria/gaussian-splatting) repository.
126
+
127
+ ```
128
+ <dataset_name>
129
+ |---images
130
+ | |---<image 0>
131
+ | |---<image 1>
132
+ | |---...
133
+ |---input
134
+ | |---<image 0>
135
+ | |---<image 1>
136
+ | |---...
137
+ |---output
138
+ | |---<dataset_name>
139
+ | | |---point_cloud/iteration_30000/point_cloud.ply
140
+ | | |---cameras.json
141
+ | | |---cfg_args
142
+ | | |---chkpnt30000.pth
143
+ | | |---input.ply
144
+ |---sparse
145
+ |---0
146
+ |---cameras.bin
147
+ |---images.bin
148
+ |---points3D.bin
149
+ ```
150
+
151
+
152
+ ### Environment setup.
153
+ Please install [segment-anything-langsplat](https://github.com/minghanqin/segment-anything-langsplat) and download the checkpoints of SAM from [here](https://github.com/facebookresearch/segment-anything) to ```ckpts/```.
154
+ ### Pipeline
155
+ Follow the ```process.sh``` and train LangSplat on your own scenes.
156
+ - **Step 1: Generate Language Feature of the Scenes.**
157
+ Put the image data into the "input" directory under the ```<dataset_name>/```, then run the following code.
158
+ ```
159
+ python preprocess.py --dataset_path $dataset_path
160
+ ```
161
+ - **Step 2: Train the Autoencoder and get the lower-dims Feature.**
162
+ ```
163
+ # train the autoencoder
164
+ cd autoencoder
165
+ python train.py --dataset_name $dataset_path --encoder_dims 256 128 64 32 3 --decoder_dims 16 32 64 128 256 256 512 --lr 0.0007 --output ae_ckpt
166
+ # get the 3-dims language feature of the scene
167
+ python test.py --dataset_name $dataset_path --output
168
+ ```
169
+
170
+ Our model expect the following dataset structure in the source path location:
171
+ ```
172
+ <dataset_name>
173
+ |---images
174
+ | |---<image 0>
175
+ | |---<image 1>
176
+ | |---...
177
+ |---language_feature
178
+ | |---00_f.npy
179
+ | |---00_s.npy
180
+ | |---...
181
+ |---language_feature_dim3
182
+ | |---00_f.npy
183
+ | |---00_s.npy
184
+ | |---...
185
+ |---output
186
+ | |---<dataset_name>
187
+ | | |---point_cloud/iteration_30000/point_cloud.ply
188
+ | | |---cameras.json
189
+ | | |---cfg_args
190
+ | | |---chkpnt30000.pth
191
+ | | |---input.ply
192
+ |---sparse
193
+ |---0
194
+ |---cameras.bin
195
+ |---images.bin
196
+ |---points3D.bin
197
+ ```
198
+ - **Step 3: Train the LangSplat.**
199
+ ```
200
+ python train.py -s dataset_path -m output/${casename} --start_checkpoint $dataset_path/output/$casename/chkpnt30000.pth --feature_level ${level}
201
+ ```
202
+ - **Step 4: Render the LangSplat.**
203
+ ```
204
+ python render.py -s dataset_path -m output/${casename} --feature_level ${level}
205
+ ```
206
+ - **Step 5: Eval.**
207
+ First, we generate the 3-dim language feature map through Step 4. Subsequently, the decoder elevates the features from 3 dimensions to 512 dimensions. For further operations and detailed explanations, please refer to the [supplementary materials](https://arxiv.org/pdf/2312.16084.pdf).
208
+
209
+ - 3D Object Localization on LERF and 3D Semantic Segmentation on LERF. Our eval code is based on [LERF](https://github.com/kerrj/lerf) and [NerfStudio](https://github.com/nerfstudio-project/nerfstudio), thanks for these impressive open-source projects!
210
+
211
+ - Please download the [lerf_ovs]((https://drive.google.com/file/d/1QF1Po5p5DwTjFHu6tnTeYs_G0egMVmHt/view?usp=sharing)) first.
212
+
213
+ - Set the ```gt_folder``` as the path to lerf_ovs/label.
214
+
215
+ - Make sure finish the **Step 4** before you run the eval code.
216
+ ```
217
+ cd eval
218
+ sh eval.sh
219
+ ```
220
+
221
+ ## TODO list:
222
+ - [x] release the code of the optimizer
223
+ - [x] release the code of the autoencoder
224
+ - [x] release the code of the segment-anything-langsplat
225
+ - [x] update the arxiv link
226
+ - [x] release the preprocessed dataset and the pretrained model
227
+ - [x] release more preprocessed dataset and the pretrained model (coming soon)
228
+ - [x] release the code of the eval
229
+
230
+ This project is still under development. Please feel free to raise issues or submit pull requests to contribute to our codebase.
arguments/__init__.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ from argparse import ArgumentParser, Namespace
13
+ import sys
14
+ import os
15
+
16
+ class GroupParams:
17
+ pass
18
+
19
+ class ParamGroup:
20
+ def __init__(self, parser: ArgumentParser, name : str, fill_none = False):
21
+ group = parser.add_argument_group(name)
22
+ for key, value in vars(self).items():
23
+ shorthand = False
24
+ if key.startswith("_"):
25
+ shorthand = True
26
+ key = key[1:]
27
+ t = type(value)
28
+ value = value if not fill_none else None
29
+ if shorthand:
30
+ if t == bool:
31
+ group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true")
32
+ else:
33
+ group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t)
34
+ else:
35
+ if t == bool:
36
+ group.add_argument("--" + key, default=value, action="store_true")
37
+ else:
38
+ group.add_argument("--" + key, default=value, type=t)
39
+
40
+ def extract(self, args):
41
+ group = GroupParams()
42
+ for arg in vars(args).items():
43
+ if arg[0] in vars(self) or ("_" + arg[0]) in vars(self):
44
+ setattr(group, arg[0], arg[1])
45
+ return group
46
+
47
+ class ModelParams(ParamGroup):
48
+ def __init__(self, parser, sentinel=False):
49
+ self.sh_degree = 3
50
+ self._source_path = ""
51
+ self._model_path = ""
52
+ self._language_features_name = "language_features_dim3"
53
+ self._images = "images"
54
+ self._resolution = -1
55
+ self._white_background = False
56
+ self._feature_level = -1
57
+ self.data_device = "cuda"
58
+ self.eval = False
59
+ super().__init__(parser, "Loading Parameters", sentinel)
60
+
61
+ def extract(self, args):
62
+ g = super().extract(args)
63
+ g.source_path = os.path.abspath(g.source_path)
64
+ g.lf_path = os.path.join(g.source_path, g.language_features_name)
65
+ return g
66
+
67
+ class PipelineParams(ParamGroup):
68
+ def __init__(self, parser):
69
+ self.convert_SHs_python = False
70
+ self.compute_cov3D_python = False
71
+ self.debug = False
72
+ super().__init__(parser, "Pipeline Parameters")
73
+
74
+ class OptimizationParams(ParamGroup):
75
+ def __init__(self, parser):
76
+ self.iterations = 30_000
77
+ self.position_lr_init = 0.00016
78
+ self.position_lr_final = 0.0000016
79
+ self.position_lr_delay_mult = 0.01
80
+ self.position_lr_max_steps = 30_000
81
+ self.feature_lr = 0.0025
82
+ self.opacity_lr = 0.05
83
+ self.language_feature_lr = 0.0025 # TODO: update
84
+ self.include_feature = True # Set to False if train the original gs
85
+ self.scaling_lr = 0.005
86
+ self.rotation_lr = 0.001
87
+ self.percent_dense = 0.01
88
+ self.lambda_dssim = 0.2
89
+ self.densification_interval = 100
90
+ self.opacity_reset_interval = 3000
91
+ self.densify_from_iter = 500
92
+ self.densify_until_iter = 15_000
93
+ self.densify_grad_threshold = 0.0002
94
+ super().__init__(parser, "Optimization Parameters")
95
+
96
+ def get_combined_args(parser : ArgumentParser):
97
+ cmdlne_string = sys.argv[1:]
98
+ cfgfile_string = "Namespace()"
99
+ args_cmdline = parser.parse_args(cmdlne_string)
100
+
101
+ try:
102
+ cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args")
103
+ print("Looking for config file in", cfgfilepath)
104
+ with open(cfgfilepath) as cfg_file:
105
+ print("Config file found: {}".format(cfgfilepath))
106
+ cfgfile_string = cfg_file.read()
107
+ except TypeError:
108
+ print("Config file not found at")
109
+ pass
110
+ args_cfgfile = eval(cfgfile_string)
111
+
112
+ merged_dict = vars(args_cfgfile).copy()
113
+ for k,v in vars(args_cmdline).items():
114
+ if v != None:
115
+ merged_dict[k] = v
116
+ return Namespace(**merged_dict)
arguments/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (4.24 kB). View file
 
assets/teaser.png ADDED

Git LFS Details

  • SHA256: a6f81aad75adea1f2df5fad18ec78e3d09c7d0adc0ea72bbfd2fdc710bc80094
  • Pointer size: 133 Bytes
  • Size of remote file: 21.9 MB
autoencoder/__pycache__/dataset.cpython-38.pyc ADDED
Binary file (1.24 kB). View file
 
autoencoder/__pycache__/model.cpython-38.pyc ADDED
Binary file (1.64 kB). View file
 
autoencoder/ckpt/office_scene_50/best_ckpt.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a93d4b1ad809b23e9a27c72ea54d10dfe10b96d268ee5b9905947da9196872
3
+ size 1685665
autoencoder/ckpt/office_scene_50/events.out.tfevents.1760944914.a100-st-p4de24xlarge-7.434065.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def6ab9b62040824c06233c1ca9c7eabd7c81ce910c3e64e820f871383ee6a8b
3
+ size 254
autoencoder/ckpt/office_scene_50/events.out.tfevents.1760945127.a100-st-p4de24xlarge-7.441630.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c769f358ebf36d5373d8f41c37899a683622e425fd29308443a4be425fea3f19
3
+ size 58836284
autoencoder/dataset.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import numpy as np
4
+ import torch
5
+ from torch.utils.data import Dataset
6
+
7
+ class Autoencoder_dataset(Dataset):
8
+ def __init__(self, data_dir):
9
+ data_names = glob.glob(os.path.join(data_dir, '*f.npy'))
10
+ self.data_dic = {}
11
+ for i in range(len(data_names)):
12
+ features = np.load(data_names[i])
13
+ name = data_names[i].split('/')[-1].split('.')[0]
14
+ self.data_dic[name] = features.shape[0]
15
+ if i == 0:
16
+ data = features
17
+ else:
18
+ data = np.concatenate([data, features], axis=0)
19
+ self.data = data
20
+
21
+ def __getitem__(self, index):
22
+ data = torch.tensor(self.data[index])
23
+ return data
24
+
25
+ def __len__(self):
26
+ return self.data.shape[0]
autoencoder/model.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+
5
+ class Autoencoder(nn.Module):
6
+ def __init__(self, encoder_hidden_dims, decoder_hidden_dims):
7
+ super(Autoencoder, self).__init__()
8
+ encoder_layers = []
9
+ for i in range(len(encoder_hidden_dims)):
10
+ if i == 0:
11
+ encoder_layers.append(nn.Linear(512, encoder_hidden_dims[i]))
12
+ else:
13
+ encoder_layers.append(torch.nn.BatchNorm1d(encoder_hidden_dims[i-1]))
14
+ encoder_layers.append(nn.ReLU())
15
+ encoder_layers.append(nn.Linear(encoder_hidden_dims[i-1], encoder_hidden_dims[i]))
16
+ self.encoder = nn.ModuleList(encoder_layers)
17
+
18
+ decoder_layers = []
19
+ for i in range(len(decoder_hidden_dims)):
20
+ if i == 0:
21
+ decoder_layers.append(nn.Linear(encoder_hidden_dims[-1], decoder_hidden_dims[i]))
22
+ else:
23
+ decoder_layers.append(nn.ReLU())
24
+ decoder_layers.append(nn.Linear(decoder_hidden_dims[i-1], decoder_hidden_dims[i]))
25
+ self.decoder = nn.ModuleList(decoder_layers)
26
+ print(self.encoder, self.decoder)
27
+ def forward(self, x):
28
+ for m in self.encoder:
29
+ x = m(x)
30
+ x = x / x.norm(dim=-1, keepdim=True)
31
+ for m in self.decoder:
32
+ x = m(x)
33
+ x = x / x.norm(dim=-1, keepdim=True)
34
+ return x
35
+
36
+ def encode(self, x):
37
+ for m in self.encoder:
38
+ x = m(x)
39
+ x = x / x.norm(dim=-1, keepdim=True)
40
+ return x
41
+
42
+ def decode(self, x):
43
+ for m in self.decoder:
44
+ x = m(x)
45
+ x = x / x.norm(dim=-1, keepdim=True)
46
+ return x
autoencoder/test.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ import argparse
5
+ import shutil
6
+ from torch.utils.data import DataLoader
7
+ from tqdm import tqdm
8
+ from dataset import Autoencoder_dataset
9
+ from model import Autoencoder
10
+
11
+ if __name__ == '__main__':
12
+ parser = argparse.ArgumentParser()
13
+ parser.add_argument('--dataset_path', type=str, required=True)
14
+ parser.add_argument('--dataset_name', type=str, required=True)
15
+ parser.add_argument('--encoder_dims',
16
+ nargs = '+',
17
+ type=int,
18
+ default=[256, 128, 64, 32, 3],
19
+ )
20
+ parser.add_argument('--decoder_dims',
21
+ nargs = '+',
22
+ type=int,
23
+ default=[16, 32, 64, 128, 256, 256, 512],
24
+ )
25
+ args = parser.parse_args()
26
+
27
+ dataset_name = args.dataset_name
28
+ encoder_hidden_dims = args.encoder_dims
29
+ decoder_hidden_dims = args.decoder_dims
30
+ dataset_path = args.dataset_path
31
+ ckpt_path = f"ckpt/{dataset_name}/best_ckpt.pth"
32
+
33
+ data_dir = f"{dataset_path}/language_features"
34
+ output_dir = f"{dataset_path}/language_features_dim3"
35
+ os.makedirs(output_dir, exist_ok=True)
36
+
37
+ # copy the segmentation map
38
+ for filename in os.listdir(data_dir):
39
+ if filename.endswith("_s.npy"):
40
+ source_path = os.path.join(data_dir, filename)
41
+ target_path = os.path.join(output_dir, filename)
42
+ shutil.copy(source_path, target_path)
43
+
44
+
45
+ checkpoint = torch.load(ckpt_path)
46
+ train_dataset = Autoencoder_dataset(data_dir)
47
+
48
+ test_loader = DataLoader(
49
+ dataset=train_dataset,
50
+ batch_size=256,
51
+ shuffle=False,
52
+ num_workers=16,
53
+ drop_last=False
54
+ )
55
+
56
+
57
+ model = Autoencoder(encoder_hidden_dims, decoder_hidden_dims).to("cuda:0")
58
+
59
+ model.load_state_dict(checkpoint)
60
+ model.eval()
61
+
62
+ for idx, feature in tqdm(enumerate(test_loader)):
63
+ data = feature.to("cuda:0")
64
+ with torch.no_grad():
65
+ outputs = model.encode(data).to("cpu").numpy()
66
+ if idx == 0:
67
+ features = outputs
68
+ else:
69
+ features = np.concatenate([features, outputs], axis=0)
70
+
71
+ os.makedirs(output_dir, exist_ok=True)
72
+ start = 0
73
+
74
+ for k,v in train_dataset.data_dic.items():
75
+ path = os.path.join(output_dir, k)
76
+ np.save(path, features[start:start+v])
77
+ start += v
autoencoder/train.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn.functional as F
4
+ from torch.utils.data import Dataset, DataLoader
5
+ from tqdm import tqdm
6
+ from dataset import Autoencoder_dataset
7
+ from model import Autoencoder
8
+ from torch.utils.tensorboard import SummaryWriter
9
+ import argparse
10
+
11
+ torch.autograd.set_detect_anomaly(True)
12
+
13
+ def l2_loss(network_output, gt):
14
+ return ((network_output - gt) ** 2).mean()
15
+
16
+ def cos_loss(network_output, gt):
17
+ return 1 - F.cosine_similarity(network_output, gt, dim=0).mean()
18
+
19
+
20
+ if __name__ == '__main__':
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument('--dataset_path', type=str, required=True)
23
+ parser.add_argument('--num_epochs', type=int, default=100)
24
+ parser.add_argument('--lr', type=float, default=0.0001)
25
+ parser.add_argument('--encoder_dims',
26
+ nargs = '+',
27
+ type=int,
28
+ default=[256, 128, 64, 32, 3],
29
+ )
30
+ parser.add_argument('--decoder_dims',
31
+ nargs = '+',
32
+ type=int,
33
+ default=[16, 32, 64, 128, 256, 256, 512],
34
+ )
35
+ parser.add_argument('--dataset_name', type=str, required=True)
36
+ args = parser.parse_args()
37
+ dataset_path = args.dataset_path
38
+ num_epochs = args.num_epochs
39
+ data_dir = f"{dataset_path}/language_features"
40
+ os.makedirs(f'ckpt/{args.dataset_name}', exist_ok=True)
41
+ train_dataset = Autoencoder_dataset(data_dir)
42
+ train_loader = DataLoader(
43
+ dataset=train_dataset,
44
+ batch_size=64,
45
+ shuffle=True,
46
+ num_workers=16,
47
+ drop_last=False
48
+ )
49
+
50
+ test_loader = DataLoader(
51
+ dataset=train_dataset,
52
+ batch_size=256,
53
+ shuffle=False,
54
+ num_workers=16,
55
+ drop_last=False
56
+ )
57
+
58
+ encoder_hidden_dims = args.encoder_dims
59
+ decoder_hidden_dims = args.decoder_dims
60
+
61
+ model = Autoencoder(encoder_hidden_dims, decoder_hidden_dims).to("cuda:0")
62
+
63
+ optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
64
+ logdir = f'ckpt/{args.dataset_name}'
65
+ tb_writer = SummaryWriter(logdir)
66
+
67
+ best_eval_loss = 100.0
68
+ best_epoch = 0
69
+ for epoch in tqdm(range(num_epochs)):
70
+ model.train()
71
+ for idx, feature in enumerate(train_loader):
72
+ data = feature.to("cuda:0")
73
+ outputs_dim3 = model.encode(data)
74
+ outputs = model.decode(outputs_dim3)
75
+
76
+ l2loss = l2_loss(outputs, data)
77
+ cosloss = cos_loss(outputs, data)
78
+ loss = l2loss + cosloss * 0.001
79
+
80
+ optimizer.zero_grad()
81
+ loss.backward()
82
+ optimizer.step()
83
+
84
+ global_iter = epoch * len(train_loader) + idx
85
+ tb_writer.add_scalar('train_loss/l2_loss', l2loss.item(), global_iter)
86
+ tb_writer.add_scalar('train_loss/cos_loss', cosloss.item(), global_iter)
87
+ tb_writer.add_scalar('train_loss/total_loss', loss.item(), global_iter)
88
+ tb_writer.add_histogram("feat", outputs, global_iter)
89
+
90
+ if epoch > 95:
91
+ eval_loss = 0.0
92
+ model.eval()
93
+ for idx, feature in enumerate(test_loader):
94
+ data = feature.to("cuda:0")
95
+ with torch.no_grad():
96
+ outputs = model(data)
97
+ loss = l2_loss(outputs, data) + cos_loss(outputs, data)
98
+ eval_loss += loss * len(feature)
99
+ eval_loss = eval_loss / len(train_dataset)
100
+ print("eval_loss:{:.8f}".format(eval_loss))
101
+ if eval_loss < best_eval_loss:
102
+ best_eval_loss = eval_loss
103
+ best_epoch = epoch
104
+ torch.save(model.state_dict(), f'ckpt/{args.dataset_name}/best_ckpt.pth')
105
+
106
+ if epoch % 10 == 0:
107
+ torch.save(model.state_dict(), f'ckpt/{args.dataset_name}/{epoch}_ckpt.pth')
108
+
109
+ print(f"best_epoch: {best_epoch}")
110
+ print("best_loss: {:.8f}".format(best_eval_loss))
ckpts/sam_vit_b_01ec64.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
3
+ size 375042383
ckpts/sam_vit_h_4b8939.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e
3
+ size 2564550879
ckpts/sam_vit_l_0b3195.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adcc4315b642a4d2101128f611684e8734c41232a17c648ed1693702a49a622
3
+ size 1249524607
convert.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2023, Inria
3
+ # GRAPHDECO research group, https://team.inria.fr/graphdeco
4
+ # All rights reserved.
5
+ #
6
+ # This software is free for non-commercial, research and evaluation use
7
+ # under the terms of the LICENSE.md file.
8
+ #
9
+ # For inquiries contact george.drettakis@inria.fr
10
+ #
11
+
12
+ import os
13
+ import logging
14
+ from argparse import ArgumentParser
15
+ import shutil
16
+
17
+ # This Python script is based on the shell converter script provided in the MipNerF 360 repository.
18
+ parser = ArgumentParser("Colmap converter")
19
+ parser.add_argument("--no_gpu", action='store_true')
20
+ parser.add_argument("--skip_matching", action='store_true')
21
+ parser.add_argument("--source_path", "-s", required=True, type=str)
22
+ parser.add_argument("--camera", default="SIMPLE_PINHOLE", type=str)
23
+ parser.add_argument("--colmap_executable", default="", type=str)
24
+ parser.add_argument("--resize", action="store_true")
25
+ parser.add_argument("--magick_executable", default="", type=str)
26
+ args = parser.parse_args()
27
+ colmap_command = '"{}"'.format(args.colmap_executable) if len(args.colmap_executable) > 0 else "colmap"
28
+ magick_command = '"{}"'.format(args.magick_executable) if len(args.magick_executable) > 0 else "magick"
29
+ use_gpu = 1 if not args.no_gpu else 0
30
+
31
+ if not args.skip_matching:
32
+ os.makedirs(args.source_path + "/distorted/sparse", exist_ok=True)
33
+
34
+ ## Feature extraction
35
+ feat_extracton_cmd = colmap_command + " feature_extractor "\
36
+ "--database_path " + args.source_path + "/distorted/database.db \
37
+ --image_path " + args.source_path + "/input \
38
+ --ImageReader.single_camera 1 \
39
+ --ImageReader.camera_model " + args.camera + " \
40
+ --SiftExtraction.use_gpu " + str(use_gpu)
41
+ exit_code = os.system(feat_extracton_cmd)
42
+ if exit_code != 0:
43
+ logging.error(f"Feature extraction failed with code {exit_code}. Exiting.")
44
+ exit(exit_code)
45
+
46
+ ## Feature matching
47
+ feat_matching_cmd = colmap_command + " exhaustive_matcher \
48
+ --database_path " + args.source_path + "/distorted/database.db \
49
+ --SiftMatching.use_gpu " + str(use_gpu)
50
+ exit_code = os.system(feat_matching_cmd)
51
+ if exit_code != 0:
52
+ logging.error(f"Feature matching failed with code {exit_code}. Exiting.")
53
+ exit(exit_code)
54
+
55
+ ### Bundle adjustment
56
+ # The default Mapper tolerance is unnecessarily large,
57
+ # decreasing it speeds up bundle adjustment steps.
58
+ mapper_cmd = (colmap_command + " mapper \
59
+ --database_path " + args.source_path + "/distorted/database.db \
60
+ --image_path " + args.source_path + "/input \
61
+ --output_path " + args.source_path + "/distorted/sparse \
62
+ --Mapper.ba_global_function_tolerance=0.000001")
63
+ exit_code = os.system(mapper_cmd)
64
+ if exit_code != 0:
65
+ logging.error(f"Mapper failed with code {exit_code}. Exiting.")
66
+ exit(exit_code)
67
+
68
+ ### Image undistortion
69
+ ## We need to undistort our images into ideal pinhole intrinsics.
70
+ img_undist_cmd = (colmap_command + " image_undistorter \
71
+ --image_path " + args.source_path + "/input \
72
+ --input_path " + args.source_path + "/distorted/sparse/0 \
73
+ --output_path " + args.source_path + "\
74
+ --output_type COLMAP")
75
+ exit_code = os.system(img_undist_cmd)
76
+ if exit_code != 0:
77
+ logging.error(f"Mapper failed with code {exit_code}. Exiting.")
78
+ exit(exit_code)
79
+
80
+ files = os.listdir(args.source_path + "/sparse")
81
+ os.makedirs(args.source_path + "/sparse/0", exist_ok=True)
82
+ # Copy each file from the source directory to the destination directory
83
+ for file in files:
84
+ if file == '0':
85
+ continue
86
+ source_file = os.path.join(args.source_path, "sparse", file)
87
+ destination_file = os.path.join(args.source_path, "sparse", "0", file)
88
+ shutil.move(source_file, destination_file)
89
+
90
+ if(args.resize):
91
+ print("Copying and resizing...")
92
+
93
+ # Resize images.
94
+ os.makedirs(args.source_path + "/images_2", exist_ok=True)
95
+ os.makedirs(args.source_path + "/images_4", exist_ok=True)
96
+ os.makedirs(args.source_path + "/images_8", exist_ok=True)
97
+ # Get the list of files in the source directory
98
+ files = os.listdir(args.source_path + "/images")
99
+ # Copy each file from the source directory to the destination directory
100
+ for file in files:
101
+ source_file = os.path.join(args.source_path, "images", file)
102
+
103
+ destination_file = os.path.join(args.source_path, "images_2", file)
104
+ shutil.copy2(source_file, destination_file)
105
+ exit_code = os.system(magick_command + " mogrify -resize 50% " + destination_file)
106
+ if exit_code != 0:
107
+ logging.error(f"50% resize failed with code {exit_code}. Exiting.")
108
+ exit(exit_code)
109
+
110
+ destination_file = os.path.join(args.source_path, "images_4", file)
111
+ shutil.copy2(source_file, destination_file)
112
+ exit_code = os.system(magick_command + " mogrify -resize 25% " + destination_file)
113
+ if exit_code != 0:
114
+ logging.error(f"25% resize failed with code {exit_code}. Exiting.")
115
+ exit(exit_code)
116
+
117
+ destination_file = os.path.join(args.source_path, "images_8", file)
118
+ shutil.copy2(source_file, destination_file)
119
+ exit_code = os.system(magick_command + " mogrify -resize 12.5% " + destination_file)
120
+ if exit_code != 0:
121
+ logging.error(f"12.5% resize failed with code {exit_code}. Exiting.")
122
+ exit(exit_code)
123
+
124
+ print("Done.")
data/40753679.mov ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e5f609239e3aca78d5eae85416e33f490b6ff2dfcafc64f6f65d7be3bdd982b
3
+ size 496585965
data/Tushar.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aee8e7c6cfdddd997dcf65cbee259c1eef7f0870ffd49a477f9e7cdeca1b046c
3
+ size 588993143
data/colmap_scene.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb1436c1ff09c89c8405d8f5863b211aa1e38c9ec6b9bfb4965250e0fb30349b
3
+ size 92406880
data/examples/office_scene_50/depths/frame_00059.png ADDED

Git LFS Details

  • SHA256: 2a188110d2283104bfc65c01cb0956b25a12bdd43a1ac4ca3b1fbde3df426e18
  • Pointer size: 131 Bytes
  • Size of remote file: 490 kB
data/examples/office_scene_50/depths/frame_00119.png ADDED

Git LFS Details

  • SHA256: f1b3efcad35c8723b68fda54661942ba31cd3bd144616185c8eb9df366bb0c10
  • Pointer size: 131 Bytes
  • Size of remote file: 596 kB
data/examples/office_scene_50/depths/frame_00179.png ADDED

Git LFS Details

  • SHA256: b3a9b9aab138ee6d625ad907a346e00c49adc36a91ff782a278382b921b67ee0
  • Pointer size: 131 Bytes
  • Size of remote file: 457 kB
data/examples/office_scene_50/depths/frame_00239.png ADDED

Git LFS Details

  • SHA256: bab2ddde3bf73a29b29578bd41ff29abb45fdf331a6042b2de4d75fe0f1fa443
  • Pointer size: 131 Bytes
  • Size of remote file: 551 kB
data/examples/office_scene_50/depths/frame_00299.png ADDED

Git LFS Details

  • SHA256: 31839b2d5072fdc05a93dcffe72e967d020a5e094d11142df00a2cc2272b4062
  • Pointer size: 131 Bytes
  • Size of remote file: 453 kB
data/examples/office_scene_50/depths/frame_00359.png ADDED

Git LFS Details

  • SHA256: 3f8d18ff4bd03b831dd1d96454b6b017b90362c380932b408813dd501e5b3894
  • Pointer size: 131 Bytes
  • Size of remote file: 406 kB
data/examples/office_scene_50/depths/frame_00419.png ADDED

Git LFS Details

  • SHA256: 8b8469d2adbc8c8af94339c73cb98002459e1b197a844d02f68a77e0fc05e0a0
  • Pointer size: 131 Bytes
  • Size of remote file: 217 kB
data/examples/office_scene_50/depths/frame_00479.png ADDED

Git LFS Details

  • SHA256: e10396593efda33003dee42df63dc261c6c2ac22173f1eb463a9361e49c65d9c
  • Pointer size: 131 Bytes
  • Size of remote file: 349 kB
data/examples/office_scene_50/depths/frame_00539.png ADDED

Git LFS Details

  • SHA256: 494774e15bb336cfb4d0daeb1979a94258ff766fb1bec75c818c32ff7ac9f129
  • Pointer size: 131 Bytes
  • Size of remote file: 351 kB
data/examples/office_scene_50/depths/frame_00599.png ADDED

Git LFS Details

  • SHA256: 0a3226f83320129231d19d517bfb7d884e6a4836400d5718396f19095d47ebad
  • Pointer size: 131 Bytes
  • Size of remote file: 393 kB
data/examples/office_scene_50/depths/frame_00659.png ADDED

Git LFS Details

  • SHA256: 4beadad83f5fd58e8cf0317fb3730af6b4891294e83f753d012057008913a425
  • Pointer size: 131 Bytes
  • Size of remote file: 481 kB
data/examples/office_scene_50/depths/frame_00719.png ADDED

Git LFS Details

  • SHA256: efd656138429f6d010a6cb495d0ea5fb489a2d6934596fbc786624e88b51c4ec
  • Pointer size: 131 Bytes
  • Size of remote file: 562 kB
data/examples/office_scene_50/depths/frame_00779.png ADDED

Git LFS Details

  • SHA256: 325f4ebb1afe42bdb715938792f0bc0644dd7b62345485f86c8c6afea6d9f47a
  • Pointer size: 131 Bytes
  • Size of remote file: 419 kB
data/examples/office_scene_50/depths/frame_00839.png ADDED

Git LFS Details

  • SHA256: dced31332aa7d7e5a34e5412f4320c4412099750bcaf3e0b4650957c05712ebf
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
data/examples/office_scene_50/depths/frame_00899.png ADDED

Git LFS Details

  • SHA256: ab22031f246d131ab44dc484e3b569e2128a07f4de48d8aca31d7b142c3abf1d
  • Pointer size: 131 Bytes
  • Size of remote file: 580 kB
data/examples/office_scene_50/depths/frame_00959.png ADDED

Git LFS Details

  • SHA256: 7003ccedc40c8b259c5354420766f49769e2f30438a7a5dc86eb695a7c93b063
  • Pointer size: 131 Bytes
  • Size of remote file: 448 kB
data/examples/office_scene_50/depths/frame_01019.png ADDED

Git LFS Details

  • SHA256: 5ed874cb1160ecb891bb7c3cf37e5652d37655292fc474dae5485c7d1f513650
  • Pointer size: 131 Bytes
  • Size of remote file: 517 kB
data/examples/office_scene_50/depths/frame_01079.png ADDED

Git LFS Details

  • SHA256: db3b756ae042ca19f49f9952465b6f8ec6df568ffaed9bf55285ed16f4760b72
  • Pointer size: 131 Bytes
  • Size of remote file: 526 kB
data/examples/office_scene_50/depths/frame_01139.png ADDED

Git LFS Details

  • SHA256: b65c7a6a57c910aa2e602776d2de13e61dc0dac307c2753dce2bae934c0d5eec
  • Pointer size: 131 Bytes
  • Size of remote file: 440 kB
data/examples/office_scene_50/depths/frame_01199.png ADDED

Git LFS Details

  • SHA256: 4d8a5e5e259561c02d61b80fdb4cf92da47b079dfa1ae13750ccc2a421d86d8a
  • Pointer size: 131 Bytes
  • Size of remote file: 448 kB
data/examples/office_scene_50/depths/frame_01259.png ADDED

Git LFS Details

  • SHA256: 2c37869750ccefcc6c6903bbdafecd04ff678adbc0de6489abb8ba3edd2b93ec
  • Pointer size: 131 Bytes
  • Size of remote file: 320 kB
data/examples/office_scene_50/depths/frame_01319.png ADDED

Git LFS Details

  • SHA256: e71c814cdc1d5c58cbf0b16eeecff3911924288872507c7b7910629f0d05b67c
  • Pointer size: 131 Bytes
  • Size of remote file: 288 kB
data/examples/office_scene_50/depths/frame_01379.png ADDED

Git LFS Details

  • SHA256: e76c755ed187953a47333c700a5ee9fa6eb8355e8727dc665c641ebf0af5e022
  • Pointer size: 131 Bytes
  • Size of remote file: 430 kB
data/examples/office_scene_50/depths/frame_01439.png ADDED

Git LFS Details

  • SHA256: 818a06e2daa04507d43259fbf5038f9df571714b017bb3ead9e66b2a7ff38595
  • Pointer size: 131 Bytes
  • Size of remote file: 303 kB
data/examples/office_scene_50/depths/frame_01499.png ADDED

Git LFS Details

  • SHA256: 299924297f7f363dec15e3471a4128090f4ace34b8bec0f9f75e40b63f3154cf
  • Pointer size: 131 Bytes
  • Size of remote file: 436 kB
data/examples/office_scene_50/depths/frame_01559.png ADDED

Git LFS Details

  • SHA256: 36871ae984f3d65af63130115766eef27026759bc6acac701649dbd26ffec3a2
  • Pointer size: 131 Bytes
  • Size of remote file: 290 kB
data/examples/office_scene_50/depths/frame_01619.png ADDED

Git LFS Details

  • SHA256: 16f703dd658019856a7e4da4be61207a692d6f0197b4fd2edf19a5e533aed4ea
  • Pointer size: 131 Bytes
  • Size of remote file: 250 kB
data/examples/office_scene_50/depths/frame_01679.png ADDED

Git LFS Details

  • SHA256: 927abb00815cffce2c9eeeb7991b781ba3815ce075a1030315bbec53c71c772b
  • Pointer size: 131 Bytes
  • Size of remote file: 456 kB