updated results with flickr ablation inline
Browse files- dinov2_bert_large_results.txt +94 -71
dinov2_bert_large_results.txt
CHANGED
|
@@ -4,10 +4,13 @@ STAGE 2: FULL VALIDATION BATTERY
|
|
| 4 |
|
| 5 |
Loading cached datasets...
|
| 6 |
Loading dataset from disk: 100%
|
| 7 |
-
49/49 [00:00<00:00,
|
| 8 |
Loading dataset from disk: 100%
|
| 9 |
-
49/49 [00:00<00:00,
|
| 10 |
COCO train: 40504, COCO test: 40775
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
======================================================================
|
| 13 |
EXP 1: MAIN (5 seeds, FULL train)
|
|
@@ -15,38 +18,43 @@ EXP 1: MAIN (5 seeds, FULL train)
|
|
| 15 |
|
| 16 |
Seed 42:
|
| 17 |
Loading dataset from disk: 100%
|
| 18 |
-
49/49 [00:00<00:00,
|
| 19 |
-
seed_42 pretrain val (N=6076): R@1=0.
|
| 20 |
-
Ep 1: loss=0.
|
| 21 |
-
Seed 42 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
| 22 |
|
| 23 |
Seed 123:
|
| 24 |
Loading dataset from disk: 100%
|
| 25 |
-
49/49 [00:00<00:00,
|
| 26 |
-
seed_123 pretrain val (N=6076): R@1=0.
|
| 27 |
-
Ep 1: loss=0.
|
| 28 |
-
Seed 123 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
| 29 |
|
| 30 |
Seed 456:
|
| 31 |
Loading dataset from disk: 100%
|
| 32 |
-
49/49 [00:00<00:00,
|
| 33 |
-
seed_456 pretrain val (N=6076): R@1=0.0000 R@5=0.0005 cos=0.
|
| 34 |
-
Ep 1: loss=0.
|
| 35 |
-
Seed 456 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
| 36 |
|
| 37 |
Seed 789:
|
| 38 |
Loading dataset from disk: 100%
|
| 39 |
-
49/49 [00:00<00:00,
|
| 40 |
-
seed_789 pretrain val (N=6076): R@1=0.
|
| 41 |
-
Ep 1: loss=0.
|
| 42 |
-
Seed 789 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
| 43 |
|
| 44 |
Seed 2024:
|
| 45 |
Loading dataset from disk: 100%
|
| 46 |
-
49/49 [00:00<00:00,
|
| 47 |
-
seed_2024 pretrain val (N=6076): R@1=0.
|
| 48 |
-
Ep 1: loss=0.
|
| 49 |
-
Seed 2024 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
| 50 |
|
| 51 |
R@1: 1.0000 ± 0.0000
|
| 52 |
|
|
@@ -54,10 +62,10 @@ Loading dataset from disk: 100%
|
|
| 54 |
EXP 2: CONTRASTIVE ONLY
|
| 55 |
======================================================================
|
| 56 |
Loading dataset from disk: 100%
|
| 57 |
-
49/49 [00:00<00:00,
|
| 58 |
-
contrastive_only pretrain val (N=6076): R@1=0.
|
| 59 |
-
Ep 1: loss=0.
|
| 60 |
-
Contrastive only (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 61 |
|
| 62 |
======================================================================
|
| 63 |
EXP 3: DEPTH (1,2,4,6 layers)
|
|
@@ -65,79 +73,89 @@ EXP 3: DEPTH (1,2,4,6 layers)
|
|
| 65 |
|
| 66 |
1 layers:
|
| 67 |
Loading dataset from disk: 100%
|
| 68 |
-
49/49 [00:00<00:00,
|
| 69 |
-
depth_1 pretrain val (N=6076): R@1=0.
|
| 70 |
-
Ep 1: loss=0.
|
| 71 |
-
1L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 72 |
|
| 73 |
2 layers:
|
| 74 |
Loading dataset from disk: 100%
|
| 75 |
-
49/49 [00:00<00:00,
|
| 76 |
-
depth_2 pretrain val (N=6076): R@1=0.
|
| 77 |
-
Ep 1: loss=0.
|
| 78 |
-
2L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 79 |
|
| 80 |
4 layers:
|
| 81 |
Loading dataset from disk: 100%
|
| 82 |
-
49/49 [00:00<00:00,
|
| 83 |
-
depth_4 pretrain val (N=6076): R@1=0.
|
| 84 |
-
Ep 1: loss=0.
|
| 85 |
-
4L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 86 |
|
| 87 |
6 layers:
|
| 88 |
Loading dataset from disk: 100%
|
| 89 |
-
49/49 [00:00<00:00,
|
| 90 |
-
depth_6 pretrain val (N=6076): R@1=0.
|
| 91 |
-
Ep 1: loss=0.
|
| 92 |
-
6L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 93 |
|
| 94 |
======================================================================
|
| 95 |
EXP 4: SCALE (1K → FULL)
|
| 96 |
======================================================================
|
| 97 |
Loading dataset from disk: 100%
|
| 98 |
-
49/49 [00:00<00:00,
|
| 99 |
|
| 100 |
1000 pairs:
|
| 101 |
Loading dataset from disk: 100%
|
| 102 |
-
49/49 [00:00<00:00,
|
| 103 |
-
scale_1000 pretrain val (N=151): R@1=0.0066 R@5=0.
|
| 104 |
-
Ep 1: loss=4.
|
| 105 |
-
1000 (N=40775): R@1=0.
|
| 106 |
|
| 107 |
2000 pairs:
|
| 108 |
Loading dataset from disk: 100%
|
| 109 |
-
49/49 [00:00<00:00,
|
| 110 |
-
scale_2000 pretrain val (N=301): R@1=0.0000 R@5=0.
|
| 111 |
-
Ep 1: loss=3.
|
| 112 |
-
2000 (N=40775): R@1=0.
|
| 113 |
|
| 114 |
5000 pairs:
|
| 115 |
Loading dataset from disk: 100%
|
| 116 |
-
49/49 [00:00<00:00,
|
| 117 |
-
scale_5000 pretrain val (N=751): R@1=0.
|
| 118 |
-
Ep 1: loss=
|
| 119 |
-
5000 (N=40775): R@1=
|
| 120 |
|
| 121 |
10000 pairs:
|
| 122 |
Loading dataset from disk: 100%
|
| 123 |
-
49/49 [00:00<00:00,
|
| 124 |
-
scale_10000 pretrain val (N=1501): R@1=0.0003 R@5=0.
|
| 125 |
-
Ep 1: loss=1.
|
| 126 |
-
10000 (N=40775): R@1=
|
| 127 |
|
| 128 |
20000 pairs:
|
| 129 |
Loading dataset from disk: 100%
|
| 130 |
-
49/49 [00:00<00:00,
|
| 131 |
-
scale_20000 pretrain val (N=3001): R@1=0.0000 R@5=0.
|
| 132 |
-
Ep 1: loss=0.
|
| 133 |
-
20000 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
| 134 |
|
| 135 |
40504 pairs:
|
| 136 |
Loading dataset from disk: 100%
|
| 137 |
-
49/49 [00:00<00:00,
|
| 138 |
-
scale_40504 pretrain val (N=6076): R@1=0.
|
| 139 |
-
Ep 1: loss=0.
|
| 140 |
-
40504 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
Saved to /home/claude/geo_results.json
|
| 143 |
|
|
@@ -147,6 +165,7 @@ SUMMARY
|
|
| 147 |
|
| 148 |
EXP 1 — Multi-seed:
|
| 149 |
R@1: 1.0000 ± 0.0000
|
|
|
|
| 150 |
|
| 151 |
EXP 2 — Ablation:
|
| 152 |
Contrastive only: R@1=1.0000
|
|
@@ -159,12 +178,16 @@ SUMMARY
|
|
| 159 |
6L (60,971,008): R@1=1.0000
|
| 160 |
|
| 161 |
EXP 4 — Scale:
|
| 162 |
-
1000: R@1=0.
|
| 163 |
-
2000: R@1=0.
|
| 164 |
-
5000: R@1=
|
| 165 |
-
10000: R@1=
|
| 166 |
20000: R@1=1.0000
|
| 167 |
40504: R@1=1.0000
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
Ref: chance=0.000025, CLIP~0.60 (400M pairs)
|
| 170 |
Done.
|
|
|
|
| 4 |
|
| 5 |
Loading cached datasets...
|
| 6 |
Loading dataset from disk: 100%
|
| 7 |
+
49/49 [00:00<00:00, 14609.11it/s]
|
| 8 |
Loading dataset from disk: 100%
|
| 9 |
+
49/49 [00:00<00:00, 13073.85it/s]
|
| 10 |
COCO train: 40504, COCO test: 40775
|
| 11 |
+
Loading dataset from disk: 100%
|
| 12 |
+
37/37 [00:00<00:00, 9806.59it/s]
|
| 13 |
+
Flickr30k: 31014
|
| 14 |
|
| 15 |
======================================================================
|
| 16 |
EXP 1: MAIN (5 seeds, FULL train)
|
|
|
|
| 18 |
|
| 19 |
Seed 42:
|
| 20 |
Loading dataset from disk: 100%
|
| 21 |
+
49/49 [00:00<00:00, 15620.65it/s]
|
| 22 |
+
seed_42 pretrain val (N=6076): R@1=0.0002 R@5=0.0016 cos=-0.015 CVj=0.149
|
| 23 |
+
Ep 1: loss=0.3604 c=0.3112 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.543 (38s)
|
| 24 |
+
Seed 42 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.198
|
| 25 |
+
Seed 42 Flickr (N=31014): R@1=1.0000 R@5=1.0000 cos=0.981 CVj=0.248
|
| 26 |
|
| 27 |
Seed 123:
|
| 28 |
Loading dataset from disk: 100%
|
| 29 |
+
49/49 [00:00<00:00, 13374.17it/s]
|
| 30 |
+
seed_123 pretrain val (N=6076): R@1=0.0002 R@5=0.0009 cos=-0.005 CVj=0.161
|
| 31 |
+
Ep 1: loss=0.3313 c=0.2821 p=0.0002 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.537 (38s)
|
| 32 |
+
Seed 123 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.978 CVj=0.193
|
| 33 |
+
Seed 123 Flickr (N=31014): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.207
|
| 34 |
|
| 35 |
Seed 456:
|
| 36 |
Loading dataset from disk: 100%
|
| 37 |
+
49/49 [00:00<00:00, 13673.14it/s]
|
| 38 |
+
seed_456 pretrain val (N=6076): R@1=0.0000 R@5=0.0005 cos=0.011 CVj=0.161
|
| 39 |
+
Ep 1: loss=0.3464 c=0.2972 p=0.0002 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.533 (38s)
|
| 40 |
+
Seed 456 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.192
|
| 41 |
+
Seed 456 Flickr (N=31014): R@1=1.0000 R@5=1.0000 cos=0.981 CVj=0.237
|
| 42 |
|
| 43 |
Seed 789:
|
| 44 |
Loading dataset from disk: 100%
|
| 45 |
+
49/49 [00:00<00:00, 13702.31it/s]
|
| 46 |
+
seed_789 pretrain val (N=6076): R@1=0.0001 R@5=0.0005 cos=-0.013 CVj=0.153
|
| 47 |
+
Ep 1: loss=0.3281 c=0.2789 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.534 (38s)
|
| 48 |
+
Seed 789 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.979 CVj=0.186
|
| 49 |
+
Seed 789 Flickr (N=31014): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.225
|
| 50 |
|
| 51 |
Seed 2024:
|
| 52 |
Loading dataset from disk: 100%
|
| 53 |
+
49/49 [00:00<00:00, 13214.23it/s]
|
| 54 |
+
seed_2024 pretrain val (N=6076): R@1=0.0003 R@5=0.0010 cos=0.024 CVj=0.157
|
| 55 |
+
Ep 1: loss=0.3103 c=0.2610 p=0.0002 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.532 (38s)
|
| 56 |
+
Seed 2024 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.207
|
| 57 |
+
Seed 2024 Flickr (N=31014): R@1=1.0000 R@5=1.0000 cos=0.981 CVj=0.219
|
| 58 |
|
| 59 |
R@1: 1.0000 ± 0.0000
|
| 60 |
|
|
|
|
| 62 |
EXP 2: CONTRASTIVE ONLY
|
| 63 |
======================================================================
|
| 64 |
Loading dataset from disk: 100%
|
| 65 |
+
49/49 [00:00<00:00, 13378.52it/s]
|
| 66 |
+
contrastive_only pretrain val (N=6076): R@1=0.0001 R@5=0.0007 cos=-0.026 CVj=0.161
|
| 67 |
+
Ep 1: loss=0.3136 c=0.3136 p=0.0000 a=0.0000 val_R@1=1.0000 val_R@5=1.0000 temp=14.548 (22s)
|
| 68 |
+
Contrastive only (N=40775): R@1=1.0000 R@5=1.0000 cos=0.976 CVj=0.191
|
| 69 |
|
| 70 |
======================================================================
|
| 71 |
EXP 3: DEPTH (1,2,4,6 layers)
|
|
|
|
| 73 |
|
| 74 |
1 layers:
|
| 75 |
Loading dataset from disk: 100%
|
| 76 |
+
49/49 [00:00<00:00, 12061.79it/s]
|
| 77 |
+
depth_1 pretrain val (N=6076): R@1=0.0002 R@5=0.0012 cos=-0.002 CVj=0.158
|
| 78 |
+
Ep 1: loss=0.2866 c=0.2374 p=0.0002 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.499 (37s)
|
| 79 |
+
1L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.979 CVj=0.162
|
| 80 |
|
| 81 |
2 layers:
|
| 82 |
Loading dataset from disk: 100%
|
| 83 |
+
49/49 [00:00<00:00, 13452.96it/s]
|
| 84 |
+
depth_2 pretrain val (N=6076): R@1=0.0002 R@5=0.0010 cos=-0.014 CVj=0.150
|
| 85 |
+
Ep 1: loss=0.2941 c=0.2449 p=0.0002 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.509 (37s)
|
| 86 |
+
2L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.976 CVj=0.181
|
| 87 |
|
| 88 |
4 layers:
|
| 89 |
Loading dataset from disk: 100%
|
| 90 |
+
49/49 [00:00<00:00, 14176.79it/s]
|
| 91 |
+
depth_4 pretrain val (N=6076): R@1=0.0000 R@5=0.0002 cos=0.005 CVj=0.146
|
| 92 |
+
Ep 1: loss=0.3524 c=0.3031 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.542 (38s)
|
| 93 |
+
4L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.193
|
| 94 |
|
| 95 |
6 layers:
|
| 96 |
Loading dataset from disk: 100%
|
| 97 |
+
49/49 [00:00<00:00, 13204.04it/s]
|
| 98 |
+
depth_6 pretrain val (N=6076): R@1=0.0002 R@5=0.0014 cos=0.011 CVj=0.143
|
| 99 |
+
Ep 1: loss=0.3972 c=0.3479 p=0.0003 a=0.9844 val_R@1=1.0000 val_R@5=1.0000 temp=14.558 (39s)
|
| 100 |
+
6L (N=40775): R@1=1.0000 R@5=1.0000 cos=0.968 CVj=0.169
|
| 101 |
|
| 102 |
======================================================================
|
| 103 |
EXP 4: SCALE (1K → FULL)
|
| 104 |
======================================================================
|
| 105 |
Loading dataset from disk: 100%
|
| 106 |
+
49/49 [00:00<00:00, 12187.68it/s]
|
| 107 |
|
| 108 |
1000 pairs:
|
| 109 |
Loading dataset from disk: 100%
|
| 110 |
+
49/49 [00:00<00:00, 14658.08it/s]
|
| 111 |
+
scale_1000 pretrain val (N=151): R@1=0.0066 R@5=0.0364 cos=0.015 CVj=0.231
|
| 112 |
+
Ep 1: loss=4.7240 c=4.6747 p=0.0000 a=0.9849 val_R@1=0.7053 val_R@5=0.9139 temp=14.284 (3s)
|
| 113 |
+
1000 (N=40775): R@1=0.1257 R@5=0.2825 cos=0.551 CVj=0.417
|
| 114 |
|
| 115 |
2000 pairs:
|
| 116 |
Loading dataset from disk: 100%
|
| 117 |
+
49/49 [00:00<00:00, 14147.51it/s]
|
| 118 |
+
scale_2000 pretrain val (N=301): R@1=0.0000 R@5=0.0116 cos=-0.014 CVj=0.193
|
| 119 |
+
Ep 1: loss=3.6022 c=3.5529 p=0.0001 a=0.9847 val_R@1=0.9651 val_R@5=1.0000 temp=14.287 (5s)
|
| 120 |
+
2000 (N=40775): R@1=0.4119 R@5=0.7025 cos=0.804 CVj=0.618
|
| 121 |
|
| 122 |
5000 pairs:
|
| 123 |
Loading dataset from disk: 100%
|
| 124 |
+
49/49 [00:00<00:00, 13074.68it/s]
|
| 125 |
+
scale_5000 pretrain val (N=751): R@1=0.0020 R@5=0.0113 cos=0.014 CVj=0.204
|
| 126 |
+
Ep 1: loss=2.0504 c=2.0012 p=0.0003 a=0.9846 val_R@1=1.0000 val_R@5=1.0000 temp=14.306 (8s)
|
| 127 |
+
5000 (N=40775): R@1=0.9942 R@5=1.0000 cos=0.907 CVj=0.364
|
| 128 |
|
| 129 |
10000 pairs:
|
| 130 |
Loading dataset from disk: 100%
|
| 131 |
+
49/49 [00:00<00:00, 14572.85it/s]
|
| 132 |
+
scale_10000 pretrain val (N=1501): R@1=0.0003 R@5=0.0050 cos=0.004 CVj=0.179
|
| 133 |
+
Ep 1: loss=1.1547 c=1.1055 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.341 (12s)
|
| 134 |
+
10000 (N=40775): R@1=0.9996 R@5=1.0000 cos=0.959 CVj=0.351
|
| 135 |
|
| 136 |
20000 pairs:
|
| 137 |
Loading dataset from disk: 100%
|
| 138 |
+
49/49 [00:00<00:00, 13250.01it/s]
|
| 139 |
+
scale_20000 pretrain val (N=3001): R@1=0.0000 R@5=0.0005 cos=-0.011 CVj=0.177
|
| 140 |
+
Ep 1: loss=0.5679 c=0.5186 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.415 (21s)
|
| 141 |
+
20000 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.977 CVj=0.271
|
| 142 |
|
| 143 |
40504 pairs:
|
| 144 |
Loading dataset from disk: 100%
|
| 145 |
+
49/49 [00:00<00:00, 13208.28it/s]
|
| 146 |
+
scale_40504 pretrain val (N=6076): R@1=0.0002 R@5=0.0005 cos=0.001 CVj=0.153
|
| 147 |
+
Ep 1: loss=0.3731 c=0.3238 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.552 (38s)
|
| 148 |
+
40504 (N=40775): R@1=1.0000 R@5=1.0000 cos=0.979 CVj=0.198
|
| 149 |
+
|
| 150 |
+
======================================================================
|
| 151 |
+
EXP 5: CROSS-DATASET
|
| 152 |
+
======================================================================
|
| 153 |
+
Loading dataset from disk: 100%
|
| 154 |
+
49/49 [00:00<00:00, 12989.56it/s]
|
| 155 |
+
cross_dataset pretrain val (N=6076): R@1=0.0000 R@5=0.0002 cos=0.005 CVj=0.146
|
| 156 |
+
Ep 1: loss=0.3524 c=0.3031 p=0.0003 a=0.9845 val_R@1=1.0000 val_R@5=1.0000 temp=14.542 (38s)
|
| 157 |
+
COCO test (full) (N=40775): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.193
|
| 158 |
+
Flickr30k (zero-shot, full) (N=31014): R@1=1.0000 R@5=1.0000 cos=0.980 CVj=0.236
|
| 159 |
|
| 160 |
Saved to /home/claude/geo_results.json
|
| 161 |
|
|
|
|
| 165 |
|
| 166 |
EXP 1 — Multi-seed:
|
| 167 |
R@1: 1.0000 ± 0.0000
|
| 168 |
+
Flickr R@1: 1.0000 ± 0.0000
|
| 169 |
|
| 170 |
EXP 2 — Ablation:
|
| 171 |
Contrastive only: R@1=1.0000
|
|
|
|
| 178 |
6L (60,971,008): R@1=1.0000
|
| 179 |
|
| 180 |
EXP 4 — Scale:
|
| 181 |
+
1000: R@1=0.1257
|
| 182 |
+
2000: R@1=0.4119
|
| 183 |
+
5000: R@1=0.9942
|
| 184 |
+
10000: R@1=0.9996
|
| 185 |
20000: R@1=1.0000
|
| 186 |
40504: R@1=1.0000
|
| 187 |
|
| 188 |
+
EXP 5 — Transfer:
|
| 189 |
+
COCO: R@1=1.0000
|
| 190 |
+
Flickr: R@1=1.0000
|
| 191 |
+
|
| 192 |
Ref: chance=0.000025, CLIP~0.60 (400M pairs)
|
| 193 |
Done.
|