Image-to-Text
Transformers
PyTorch
English
Geo-Localization
kevinloeffler commited on
Commit
085b09f
·
verified ·
1 Parent(s): 005270a

upload model

Browse files
Files changed (2) hide show
  1. model-vit-v1-ep16.pt +3 -0
  2. run_config.json +197 -0
model-vit-v1-ep16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c126aa158301fe8b0bfbb94dc4893b9d63e8aa8595d8e7dc665901b8dffe83
3
+ size 1292521336
run_config.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ResumeFromPreviousRun": "/home/tobias.rothlin/data/TrainingSnapshots/Regression_2",
3
+ "DatasetConfig": {
4
+ "base_model": "openai/clip-vit-large-patch14-336",
5
+ "augmentaion_pipeline": [
6
+ {
7
+ "name": "RandomRotation",
8
+ "params": {
9
+ "degrees": 10
10
+ }
11
+ },
12
+ {
13
+ "name": "ColorJitter",
14
+ "params": {
15
+ "brightness": 0.5,
16
+ "contrast": 0.2,
17
+ "saturation": 0.4,
18
+ "hue": 0.01
19
+ }
20
+ },
21
+ {
22
+ "name": "RandomPerspective",
23
+ "params": {
24
+ "distortion_scale": 0.2,
25
+ "p": 0.5
26
+ }
27
+ }
28
+ ],
29
+ "normalize_labels": true,
30
+ "use_cached_dataloader": false,
31
+ "load_for_contrast_learning": false,
32
+ "use_pre_calculated_embeddings": false,
33
+ "load_pooling_output": false,
34
+ "use_gaussian_smoothing": true,
35
+ "workers": 4
36
+ },
37
+ "DataLoaderConfig": {
38
+ "Train": {
39
+ "batch_size": 64,
40
+ "shuffle": true,
41
+ "num_workers": 8,
42
+ "pin_memory": true,
43
+ "prefetch_factor": 20
44
+ },
45
+ "Test": {
46
+ "batch_size": 64,
47
+ "shuffle": true,
48
+ "num_workers": 8,
49
+ "pin_memory": true,
50
+ "prefetch_factor": 20
51
+ }
52
+ },
53
+ "ModelConfig": {
54
+ "use_location_head": true,
55
+ "use_similarity_head": false,
56
+ "freeze_base_model": true,
57
+ "LocationHeadClip": {
58
+ "mean_locatation_head_output": false,
59
+ "layers": [
60
+ {
61
+ "d_model": 1024,
62
+ "nhead": 8
63
+ },
64
+ {
65
+ "d_model": 1024,
66
+ "nhead": 8
67
+ }
68
+ ],
69
+ "linear_layer_mapping": {
70
+ "in_features": 1024,
71
+ "out_features": 1024
72
+ }
73
+ },
74
+ "RegressionHead": {
75
+ "layer_group": [
76
+ [
77
+ {
78
+ "type": "Linear",
79
+ "in_features": 1024,
80
+ "out_features": 1024
81
+ },
82
+ {
83
+ "type": "Dropout",
84
+ "p": 0.3
85
+ },
86
+ {
87
+ "type": "LayerNorm",
88
+ "normalized_shape": 1024
89
+ },
90
+ {
91
+ "type": "ReLU"
92
+ }
93
+ ],
94
+ [
95
+ {
96
+ "type": "Linear",
97
+ "in_features": 1024,
98
+ "out_features": 512
99
+ },
100
+ {
101
+ "type": "Dropout",
102
+ "p": 0.2
103
+ },
104
+ {
105
+ "type": "LayerNorm",
106
+ "normalized_shape": 512
107
+ },
108
+ {
109
+ "type": "ReLU"
110
+ }
111
+ ],
112
+ [
113
+ {
114
+ "type": "Linear",
115
+ "in_features": 512,
116
+ "out_features": 256
117
+ },
118
+ {
119
+ "type": "Dropout",
120
+ "p": 0.1
121
+ },
122
+ {
123
+ "type": "LayerNorm",
124
+ "normalized_shape": 256
125
+ },
126
+ {
127
+ "type": "ReLU"
128
+ }
129
+ ],
130
+ [
131
+ {
132
+ "type": "Linear",
133
+ "in_features": 256,
134
+ "out_features": 64
135
+ },
136
+ {
137
+ "type": "Dropout",
138
+ "p": 0.1
139
+ },
140
+ {
141
+ "type": "LayerNorm",
142
+ "normalized_shape": 64
143
+ },
144
+ {
145
+ "type": "ReLU"
146
+ }
147
+ ],
148
+ [
149
+ {
150
+ "type": "Linear",
151
+ "in_features": 64,
152
+ "out_features": 32
153
+ },
154
+ {
155
+ "type": "Dropout",
156
+ "p": 0.1
157
+ },
158
+ {
159
+ "type": "LayerNorm",
160
+ "normalized_shape": 32
161
+ },
162
+ {
163
+ "type": "ReLU"
164
+ }
165
+ ],
166
+ [
167
+ {
168
+ "type": "Linear",
169
+ "in_features": 32,
170
+ "out_features": 2
171
+ },
172
+ {
173
+ "type": "Tanh"
174
+ }
175
+ ]
176
+ ]
177
+ }
178
+ },
179
+ "TrainingConfig": {
180
+ "Epochs": 6,
181
+ "SaveEvery": 10000,
182
+ "RunName": "Regression_Best_Long",
183
+ "SnapshotPath": "/home/tobias.rothlin/data/TrainingSnapshots",
184
+ "LogMLFlow": false,
185
+ "MLFlowExperimentName": "ClipLocationDecoder",
186
+ "GradientAccumulationSteps": 1,
187
+ "ContrastLearningStrategy": null,
188
+ "LearningRate": 5e-05,
189
+ "Amsgrad": true,
190
+ "WeightDecay": 0.0001,
191
+ "Betas": [
192
+ 0.9,
193
+ 0.98
194
+ ],
195
+ "Gamma": 0.9
196
+ }
197
+ }