Image-to-Text
Transformers
PyTorch
English
Geo-Localization
ReGeo / config.json
kevinloeffler's picture
Update config.json
96d4ee7 verified
{
"ResumeFromPreviousRun": "/home/tobias.rothlin/data/TrainingSnapshots/Regression_2",
"DatasetConfig": {
"base_model": "openai/clip-vit-large-patch14-336",
"augmentaion_pipeline": [
{
"name": "RandomRotation",
"params": {
"degrees": 10
}
},
{
"name": "ColorJitter",
"params": {
"brightness": 0.5,
"contrast": 0.2,
"saturation": 0.4,
"hue": 0.01
}
},
{
"name": "RandomPerspective",
"params": {
"distortion_scale": 0.2,
"p": 0.5
}
}
],
"normalize_labels": true,
"use_cached_dataloader": false,
"load_for_contrast_learning": false,
"use_pre_calculated_embeddings": false,
"load_pooling_output": false,
"use_gaussian_smoothing": true,
"workers": 4
},
"DataLoaderConfig": {
"Train": {
"batch_size": 64,
"shuffle": true,
"num_workers": 8,
"pin_memory": true,
"prefetch_factor": 20
},
"Test": {
"batch_size": 64,
"shuffle": true,
"num_workers": 8,
"pin_memory": true,
"prefetch_factor": 20
}
},
"ModelConfig": {
"use_location_head": true,
"use_similarity_head": false,
"freeze_base_model": true,
"LocationHeadClip": {
"mean_locatation_head_output": false,
"layers": [
{
"d_model": 1024,
"nhead": 8
},
{
"d_model": 1024,
"nhead": 8
}
],
"linear_layer_mapping": {
"in_features": 1024,
"out_features": 1024
}
},
"RegressionHead": {
"layer_group": [
[
{
"type": "Linear",
"in_features": 1024,
"out_features": 1024
},
{
"type": "Dropout",
"p": 0.3
},
{
"type": "LayerNorm",
"normalized_shape": 1024
},
{
"type": "ReLU"
}
],
[
{
"type": "Linear",
"in_features": 1024,
"out_features": 512
},
{
"type": "Dropout",
"p": 0.2
},
{
"type": "LayerNorm",
"normalized_shape": 512
},
{
"type": "ReLU"
}
],
[
{
"type": "Linear",
"in_features": 512,
"out_features": 256
},
{
"type": "Dropout",
"p": 0.1
},
{
"type": "LayerNorm",
"normalized_shape": 256
},
{
"type": "ReLU"
}
],
[
{
"type": "Linear",
"in_features": 256,
"out_features": 64
},
{
"type": "Dropout",
"p": 0.1
},
{
"type": "LayerNorm",
"normalized_shape": 64
},
{
"type": "ReLU"
}
],
[
{
"type": "Linear",
"in_features": 64,
"out_features": 32
},
{
"type": "Dropout",
"p": 0.1
},
{
"type": "LayerNorm",
"normalized_shape": 32
},
{
"type": "ReLU"
}
],
[
{
"type": "Linear",
"in_features": 32,
"out_features": 2
},
{
"type": "Tanh"
}
]
]
}
},
"TrainingConfig": {
"Epochs": 6,
"SaveEvery": 10000,
"RunName": "Regression_Best_Long",
"SnapshotPath": "/home/tobias.rothlin/data/TrainingSnapshots",
"LogMLFlow": false,
"MLFlowExperimentName": "ClipLocationDecoder",
"GradientAccumulationSteps": 1,
"ContrastLearningStrategy": null,
"LearningRate": 5e-05,
"Amsgrad": true,
"WeightDecay": 0.0001,
"Betas": [
0.9,
0.98
],
"Gamma": 0.9
}
}