jialicheng commited on
Commit
c58068c
·
verified ·
1 Parent(s): b481770

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. checkpoint_best.pth +3 -0
  2. evaluate.txt +6 -0
  3. log.txt +133 -0
checkpoint_best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb79d93443190b143a46300721cba4d74d695c7f4d2f0409487b57e9c604f1fb
3
+ size 2650029567
evaluate.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"txt_r1": 77.26, "txt_r5": 93.48, "txt_r10": 96.86, "txt_r_mean": 89.2, "img_r1": 60.035985605757695, "img_r5": 83.80247900839665, "img_r10": 90.76369452219113, "img_r_mean": 78.20071971211514, "r_mean": 83.70035985605757, "agg_metrics": 89.2}
2
+ {"txt_r1": 77.8, "txt_r5": 93.72, "txt_r10": 97.12, "txt_r_mean": 89.54666666666667, "img_r1": 60.519792083166735, "img_r5": 84.406237504998, "img_r10": 90.89964014394242, "img_r_mean": 78.60855657736904, "r_mean": 84.07761162201786, "agg_metrics": 89.54666666666667}
3
+ {"txt_r1": 77.62, "txt_r5": 94.3, "txt_r10": 97.24, "txt_r_mean": 89.72000000000001, "img_r1": 60.59576169532187, "img_r5": 84.43822471011596, "img_r10": 91.05557776889245, "img_r_mean": 78.69652139144343, "r_mean": 84.20826069572172, "agg_metrics": 89.72000000000001}
4
+ {"txt_r1": 78.56, "txt_r5": 94.1, "txt_r10": 97.24, "txt_r_mean": 89.96666666666665, "img_r1": 61.12754898040784, "img_r5": 84.5701719312275, "img_r10": 91.1595361855258, "img_r_mean": 78.95241903238706, "r_mean": 84.45954284952685, "agg_metrics": 89.96666666666665}
5
+ {"txt_r1": 78.36, "txt_r5": 94.36, "txt_r10": 97.36, "txt_r_mean": 90.02666666666666, "img_r1": 61.10355857656937, "img_r5": 84.41423430627749, "img_r10": 91.06357457017194, "img_r_mean": 78.86045581767293, "r_mean": 84.44356124216979, "agg_metrics": 90.02666666666666}
6
+ {"txt_r1": 77.46, "txt_r5": 94.16, "txt_r10": 97.52, "txt_r_mean": 89.71333333333332, "img_r1": 60.57177129148341, "img_r5": 84.12235105957618, "img_r10": 90.51179528188725, "img_r_mean": 78.4019725443156, "r_mean": 84.05765293882446, "agg_metrics": 89.71333333333332}
log.txt ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run": {
3
+ "task": "retrieval",
4
+ "lr_sched": "linear_warmup_cosine_lr",
5
+ "init_lr": 1e-05,
6
+ "min_lr": 1e-06,
7
+ "weight_decay": 0.05,
8
+ "train_splits": [
9
+ "train"
10
+ ],
11
+ "valid_splits": [
12
+ "val"
13
+ ],
14
+ "test_splits": [
15
+ "test"
16
+ ],
17
+ "k_test": 256,
18
+ "output_dir": "./output/original/albef/retrieval_coco",
19
+ "max_epoch": 5,
20
+ "log_freq": 2000,
21
+ "num_workers": 8,
22
+ "batch_size_train": 32,
23
+ "batch_size_eval": 64,
24
+ "device": "cuda",
25
+ "world_size": 2,
26
+ "dist_url": "env://",
27
+ "distributed": true,
28
+ "use_dist_eval_sampler": false,
29
+ "seed": 42,
30
+ "amp": false,
31
+ "resume_ckpt_path": null,
32
+ "evaluate": false,
33
+ "rank": 0,
34
+ "gpu": 0,
35
+ "dist_backend": "nccl"
36
+ },
37
+ "model": {
38
+ "arch": "albef_retrieval",
39
+ "load_finetuned": false,
40
+ "pretrained": "https://storage.googleapis.com/sfr-pcl-data-research/ALBEF/ALBEF.pth",
41
+ "finetuned": "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/ALBEF/albef_coco_retrieval_lavis.pt",
42
+ "queue_size": 65536,
43
+ "vit_type": "base",
44
+ "image_size": 384,
45
+ "vit_ckpt_layer": 0,
46
+ "vit_drop_path_rate": 0,
47
+ "vit_layer_norm_epsilon": 1e-06,
48
+ "vit_grad_ckpt": false,
49
+ "med_config_path": "configs/models/med_config_albef.json",
50
+ "embed_dim": 256,
51
+ "momentum": 0.995,
52
+ "alpha": 0.4,
53
+ "temp": 0.07,
54
+ "use_distill": true,
55
+ "max_txt_len": 30,
56
+ "model_type": "coco"
57
+ },
58
+ "preprocess": {
59
+ "vis_processor": {
60
+ "train": {
61
+ "name": "blip_image_train",
62
+ "image_size": 384
63
+ },
64
+ "eval": {
65
+ "name": "blip_image_eval",
66
+ "image_size": 384
67
+ }
68
+ },
69
+ "text_processor": {
70
+ "train": {
71
+ "name": "blip_caption"
72
+ },
73
+ "eval": {
74
+ "name": "blip_caption"
75
+ }
76
+ }
77
+ },
78
+ "datasets": {
79
+ "coco_retrieval": {
80
+ "data_type": "images",
81
+ "build_info": {
82
+ "annotations": {
83
+ "train": {
84
+ "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json",
85
+ "md5": "aa31ac474cf6250ebb81d18348a07ed8",
86
+ "storage": "coco/annotations/coco_karpathy_train.json"
87
+ },
88
+ "val": {
89
+ "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json",
90
+ "md5": "b273847456ef5580e33713b1f7de52a0",
91
+ "storage": "coco/annotations/coco_karpathy_val.json"
92
+ },
93
+ "test": {
94
+ "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test.json",
95
+ "md5": "3ff34b0ef2db02d01c37399f6a2a6cd1",
96
+ "storage": "coco/annotations/coco_karpathy_test.json"
97
+ }
98
+ },
99
+ "images": {
100
+ "storage": "coco/images/"
101
+ }
102
+ },
103
+ "vis_processor": {
104
+ "train": {
105
+ "name": "blip_image_train",
106
+ "image_size": 384
107
+ },
108
+ "eval": {
109
+ "name": "blip_image_eval",
110
+ "image_size": 384
111
+ }
112
+ },
113
+ "text_processor": {
114
+ "train": {
115
+ "name": "blip_caption"
116
+ },
117
+ "eval": {
118
+ "name": "blip_caption"
119
+ }
120
+ }
121
+ }
122
+ }
123
+ }
124
+ {"train_lr": "0.000", "train_loss": "2.636"}
125
+ {"val_txt_r1": 77.26, "val_txt_r5": 93.48, "val_txt_r10": 96.86, "val_txt_r_mean": 89.2, "val_img_r1": 60.035985605757695, "val_img_r5": 83.80247900839665, "val_img_r10": 90.76369452219113, "val_img_r_mean": 78.20071971211514, "val_r_mean": 83.70035985605757, "val_agg_metrics": 89.2, "val_best_epoch": 0}
126
+ {"train_lr": "0.000", "train_loss": "2.808"}
127
+ {"val_txt_r1": 77.8, "val_txt_r5": 93.72, "val_txt_r10": 97.12, "val_txt_r_mean": 89.54666666666667, "val_img_r1": 60.519792083166735, "val_img_r5": 84.406237504998, "val_img_r10": 90.89964014394242, "val_img_r_mean": 78.60855657736904, "val_r_mean": 84.07761162201786, "val_agg_metrics": 89.54666666666667, "val_best_epoch": 1}
128
+ {"train_lr": "0.000", "train_loss": "2.842"}
129
+ {"val_txt_r1": 77.62, "val_txt_r5": 94.3, "val_txt_r10": 97.24, "val_txt_r_mean": 89.72000000000001, "val_img_r1": 60.59576169532187, "val_img_r5": 84.43822471011596, "val_img_r10": 91.05557776889245, "val_img_r_mean": 78.69652139144343, "val_r_mean": 84.20826069572172, "val_agg_metrics": 89.72000000000001, "val_best_epoch": 2}
130
+ {"train_lr": "0.000", "train_loss": "2.833"}
131
+ {"val_txt_r1": 78.56, "val_txt_r5": 94.1, "val_txt_r10": 97.24, "val_txt_r_mean": 89.96666666666665, "val_img_r1": 61.12754898040784, "val_img_r5": 84.5701719312275, "val_img_r10": 91.1595361855258, "val_img_r_mean": 78.95241903238706, "val_r_mean": 84.45954284952685, "val_agg_metrics": 89.96666666666665, "val_best_epoch": 3}
132
+ {"train_lr": "0.000", "train_loss": "2.738"}
133
+ {"val_txt_r1": 78.36, "val_txt_r5": 94.36, "val_txt_r10": 97.36, "val_txt_r_mean": 90.02666666666666, "val_img_r1": 61.10355857656937, "val_img_r5": 84.41423430627749, "val_img_r10": 91.06357457017194, "val_img_r_mean": 78.86045581767293, "val_r_mean": 84.44356124216979, "val_agg_metrics": 90.02666666666666, "val_best_epoch": 4}