OSTswiss
/

ReGeo

+{
+    "ResumeFromPreviousRun": "/home/tobias.rothlin/data/TrainingSnapshots/Regression_2",
+    "DatasetConfig": {
+        "base_model": "openai/clip-vit-large-patch14-336",
+        "augmentaion_pipeline": [
+            {
+                "name": "RandomRotation",
+                "params": {
+                    "degrees": 10
+                }
+            },
+            {
+                "name": "ColorJitter",
+                "params": {
+                    "brightness": 0.5,
+                    "contrast": 0.2,
+                    "saturation": 0.4,
+                    "hue": 0.01
+                }
+            },
+            {
+                "name": "RandomPerspective",
+                "params": {
+                    "distortion_scale": 0.2,
+                    "p": 0.5
+                }
+            }
+        ],
+        "normalize_labels": true,
+        "use_cached_dataloader": false,
+        "load_for_contrast_learning": false,
+        "use_pre_calculated_embeddings": false,
+        "load_pooling_output": false,
+        "use_gaussian_smoothing": true,
+        "workers": 4
+    },
+    "DataLoaderConfig": {
+        "Train": {
+            "batch_size": 64,
+            "shuffle": true,
+            "num_workers": 8,
+            "pin_memory": true,
+            "prefetch_factor": 20
+        },
+        "Test": {
+            "batch_size": 64,
+            "shuffle": true,
+            "num_workers": 8,
+            "pin_memory": true,
+            "prefetch_factor": 20
+        }
+    },
+    "ModelConfig": {
+        "use_location_head": true,
+        "use_similarity_head": false,
+        "freeze_base_model": true,
+        "LocationHeadClip": {
+            "mean_locatation_head_output": false,
+            "layers": [
+                {
+                    "d_model": 1024,
+                    "nhead": 8
+                },
+                {
+                    "d_model": 1024,
+                    "nhead": 8
+                }
+            ],
+            "linear_layer_mapping": {
+                "in_features": 1024,
+                "out_features": 1024
+            }
+        },
+        "RegressionHead": {
+            "layer_group": [
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 1024,
+                        "out_features": 1024
+                    },
+                    {
+                        "type": "Dropout",
+                        "p": 0.3
+                    },
+                    {
+                        "type": "LayerNorm",
+                        "normalized_shape": 1024
+                    },
+                    {
+                        "type": "ReLU"
+                    }
+                ],
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 1024,
+                        "out_features": 512
+                    },
+                    {
+                        "type": "Dropout",
+                        "p": 0.2
+                    },
+                    {
+                        "type": "LayerNorm",
+                        "normalized_shape": 512
+                    },
+                    {
+                        "type": "ReLU"
+                    }
+                ],
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 512,
+                        "out_features": 256
+                    },
+                    {
+                        "type": "Dropout",
+                        "p": 0.1
+                    },
+                    {
+                        "type": "LayerNorm",
+                        "normalized_shape": 256
+                    },
+                    {
+                        "type": "ReLU"
+                    }
+                ],
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 256,
+                        "out_features": 64
+                    },
+                    {
+                        "type": "Dropout",
+                        "p": 0.1
+                    },
+                    {
+                        "type": "LayerNorm",
+                        "normalized_shape": 64
+                    },
+                    {
+                        "type": "ReLU"
+                    }
+                ],
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 64,
+                        "out_features": 32
+                    },
+                    {
+                        "type": "Dropout",
+                        "p": 0.1
+                    },
+                    {
+                        "type": "LayerNorm",
+                        "normalized_shape": 32
+                    },
+                    {
+                        "type": "ReLU"
+                    }
+                ],
+                [
+                    {
+                        "type": "Linear",
+                        "in_features": 32,
+                        "out_features": 2
+                    },
+                    {
+                        "type": "Tanh"
+                    }
+                ]
+            ]
+        }
+    },
+    "TrainingConfig": {
+        "Epochs": 6,
+        "SaveEvery": 10000,
+        "RunName": "Regression_Best_Long",
+        "SnapshotPath": "/home/tobias.rothlin/data/TrainingSnapshots",
+        "LogMLFlow": false,
+        "MLFlowExperimentName": "ClipLocationDecoder",
+        "GradientAccumulationSteps": 1,
+        "ContrastLearningStrategy": null,
+        "LearningRate": 5e-05,
+        "Amsgrad": true,
+        "WeightDecay": 0.0001,
+        "Betas": [
+            0.9,
+            0.98
+        ],
+        "Gamma": 0.9
+    }
+}