added more trained models
Browse files- breaking_0.2_trained/20_most_difficult/checkpoints/epoch_5.pt +3 -0
- breaking_0.2_trained/20_most_difficult/checkpoints/epoch_latest.pt +3 -0
- breaking_0.2_trained/20_most_difficult/info.pkl +3 -0
- breaking_0.2_trained/20_most_difficult/out.log +497 -0
- breaking_0.2_trained/20_most_difficult/params.txt +91 -0
- breaking_0.3_trained/30_most_difficult/checkpoints/epoch_5.pt +3 -0
- breaking_0.3_trained/30_most_difficult/checkpoints/epoch_latest.pt +3 -0
- breaking_0.3_trained/30_most_difficult/info.pkl +3 -0
- breaking_0.3_trained/30_most_difficult/out.log +497 -0
- breaking_0.3_trained/30_most_difficult/params.txt +91 -0
- breaking_0.7_trained/70_most_difficult/checkpoints/epoch_5.pt +3 -0
- breaking_0.7_trained/70_most_difficult/checkpoints/epoch_latest.pt +3 -0
- breaking_0.7_trained/70_most_difficult/info.pkl +3 -0
- breaking_0.7_trained/70_most_difficult/out.log +497 -0
- breaking_0.7_trained/70_most_difficult/params.txt +91 -0
- breaking_0.9_trained/90_most_difficult/checkpoints/epoch_5.pt +3 -0
- breaking_0.9_trained/90_most_difficult/checkpoints/epoch_latest.pt +3 -0
- breaking_0.9_trained/90_most_difficult/info.pkl +3 -0
- breaking_0.9_trained/90_most_difficult/out.log +497 -0
- breaking_0.9_trained/90_most_difficult/params.txt +91 -0
breaking_0.2_trained/20_most_difficult/checkpoints/epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:677af8c29c8be40887616015bf5d8d41ab562351cf71f037f2651f6e8b45d77f
|
| 3 |
+
size 1815701601
|
breaking_0.2_trained/20_most_difficult/checkpoints/epoch_latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:448f8f84352a9c8f41df8aa124708898de3a777613e59dbc9a594befb5283d3b
|
| 3 |
+
size 1815639289
|
breaking_0.2_trained/20_most_difficult/info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c5ec2de9dd3837c0bbfd15d07d911b04aa4588bbc2b6e2182df239784caa842
|
| 3 |
+
size 321
|
breaking_0.2_trained/20_most_difficult/out.log
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-07,15:01:55 | INFO | No latest resume checkpoint found in /home/breaking_0.2_trained/20_most_difficult/checkpoints.
|
| 2 |
+
2024-09-07,15:01:57 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 3 |
+
2024-09-07,15:01:57 | INFO | Loaded ViT-B-32 model config.
|
| 4 |
+
2024-09-07,15:01:58 | INFO | Model:
|
| 5 |
+
2024-09-07,15:01:58 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(patchnorm_pre_ln): Identity()
|
| 8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): Transformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0): ResidualAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): MultiheadAttention(
|
| 16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 17 |
+
)
|
| 18 |
+
(ls_1): Identity()
|
| 19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 20 |
+
(mlp): Sequential(
|
| 21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 22 |
+
(gelu): GELU(approximate='none')
|
| 23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 24 |
+
)
|
| 25 |
+
(ls_2): Identity()
|
| 26 |
+
)
|
| 27 |
+
(1): ResidualAttentionBlock(
|
| 28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
(attn): MultiheadAttention(
|
| 30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 31 |
+
)
|
| 32 |
+
(ls_1): Identity()
|
| 33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(mlp): Sequential(
|
| 35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 36 |
+
(gelu): GELU(approximate='none')
|
| 37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_2): Identity()
|
| 40 |
+
)
|
| 41 |
+
(2): ResidualAttentionBlock(
|
| 42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 43 |
+
(attn): MultiheadAttention(
|
| 44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_1): Identity()
|
| 47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 48 |
+
(mlp): Sequential(
|
| 49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 50 |
+
(gelu): GELU(approximate='none')
|
| 51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 52 |
+
)
|
| 53 |
+
(ls_2): Identity()
|
| 54 |
+
)
|
| 55 |
+
(3): ResidualAttentionBlock(
|
| 56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 57 |
+
(attn): MultiheadAttention(
|
| 58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 59 |
+
)
|
| 60 |
+
(ls_1): Identity()
|
| 61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 62 |
+
(mlp): Sequential(
|
| 63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 64 |
+
(gelu): GELU(approximate='none')
|
| 65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 66 |
+
)
|
| 67 |
+
(ls_2): Identity()
|
| 68 |
+
)
|
| 69 |
+
(4): ResidualAttentionBlock(
|
| 70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 71 |
+
(attn): MultiheadAttention(
|
| 72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(ls_1): Identity()
|
| 75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 76 |
+
(mlp): Sequential(
|
| 77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 78 |
+
(gelu): GELU(approximate='none')
|
| 79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 80 |
+
)
|
| 81 |
+
(ls_2): Identity()
|
| 82 |
+
)
|
| 83 |
+
(5): ResidualAttentionBlock(
|
| 84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 85 |
+
(attn): MultiheadAttention(
|
| 86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 87 |
+
)
|
| 88 |
+
(ls_1): Identity()
|
| 89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 90 |
+
(mlp): Sequential(
|
| 91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 92 |
+
(gelu): GELU(approximate='none')
|
| 93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 94 |
+
)
|
| 95 |
+
(ls_2): Identity()
|
| 96 |
+
)
|
| 97 |
+
(6): ResidualAttentionBlock(
|
| 98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 99 |
+
(attn): MultiheadAttention(
|
| 100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 101 |
+
)
|
| 102 |
+
(ls_1): Identity()
|
| 103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 104 |
+
(mlp): Sequential(
|
| 105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 106 |
+
(gelu): GELU(approximate='none')
|
| 107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 108 |
+
)
|
| 109 |
+
(ls_2): Identity()
|
| 110 |
+
)
|
| 111 |
+
(7): ResidualAttentionBlock(
|
| 112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 113 |
+
(attn): MultiheadAttention(
|
| 114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 115 |
+
)
|
| 116 |
+
(ls_1): Identity()
|
| 117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 118 |
+
(mlp): Sequential(
|
| 119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 120 |
+
(gelu): GELU(approximate='none')
|
| 121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 122 |
+
)
|
| 123 |
+
(ls_2): Identity()
|
| 124 |
+
)
|
| 125 |
+
(8): ResidualAttentionBlock(
|
| 126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 127 |
+
(attn): MultiheadAttention(
|
| 128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 129 |
+
)
|
| 130 |
+
(ls_1): Identity()
|
| 131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 132 |
+
(mlp): Sequential(
|
| 133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 134 |
+
(gelu): GELU(approximate='none')
|
| 135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 136 |
+
)
|
| 137 |
+
(ls_2): Identity()
|
| 138 |
+
)
|
| 139 |
+
(9): ResidualAttentionBlock(
|
| 140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 141 |
+
(attn): MultiheadAttention(
|
| 142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 143 |
+
)
|
| 144 |
+
(ls_1): Identity()
|
| 145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 146 |
+
(mlp): Sequential(
|
| 147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 148 |
+
(gelu): GELU(approximate='none')
|
| 149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 150 |
+
)
|
| 151 |
+
(ls_2): Identity()
|
| 152 |
+
)
|
| 153 |
+
(10): ResidualAttentionBlock(
|
| 154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 155 |
+
(attn): MultiheadAttention(
|
| 156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 157 |
+
)
|
| 158 |
+
(ls_1): Identity()
|
| 159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 160 |
+
(mlp): Sequential(
|
| 161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 162 |
+
(gelu): GELU(approximate='none')
|
| 163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 164 |
+
)
|
| 165 |
+
(ls_2): Identity()
|
| 166 |
+
)
|
| 167 |
+
(11): ResidualAttentionBlock(
|
| 168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 169 |
+
(attn): MultiheadAttention(
|
| 170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 171 |
+
)
|
| 172 |
+
(ls_1): Identity()
|
| 173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 174 |
+
(mlp): Sequential(
|
| 175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 176 |
+
(gelu): GELU(approximate='none')
|
| 177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 178 |
+
)
|
| 179 |
+
(ls_2): Identity()
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 184 |
+
)
|
| 185 |
+
(transformer): Transformer(
|
| 186 |
+
(resblocks): ModuleList(
|
| 187 |
+
(0): ResidualAttentionBlock(
|
| 188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(attn): MultiheadAttention(
|
| 190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(ls_1): Identity()
|
| 193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 194 |
+
(mlp): Sequential(
|
| 195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 196 |
+
(gelu): GELU(approximate='none')
|
| 197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(ls_2): Identity()
|
| 200 |
+
)
|
| 201 |
+
(1): ResidualAttentionBlock(
|
| 202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(attn): MultiheadAttention(
|
| 204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(ls_1): Identity()
|
| 207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 208 |
+
(mlp): Sequential(
|
| 209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 210 |
+
(gelu): GELU(approximate='none')
|
| 211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 212 |
+
)
|
| 213 |
+
(ls_2): Identity()
|
| 214 |
+
)
|
| 215 |
+
(2): ResidualAttentionBlock(
|
| 216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 217 |
+
(attn): MultiheadAttention(
|
| 218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 219 |
+
)
|
| 220 |
+
(ls_1): Identity()
|
| 221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(mlp): Sequential(
|
| 223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 224 |
+
(gelu): GELU(approximate='none')
|
| 225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 226 |
+
)
|
| 227 |
+
(ls_2): Identity()
|
| 228 |
+
)
|
| 229 |
+
(3): ResidualAttentionBlock(
|
| 230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(attn): MultiheadAttention(
|
| 232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 233 |
+
)
|
| 234 |
+
(ls_1): Identity()
|
| 235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 236 |
+
(mlp): Sequential(
|
| 237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 238 |
+
(gelu): GELU(approximate='none')
|
| 239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 240 |
+
)
|
| 241 |
+
(ls_2): Identity()
|
| 242 |
+
)
|
| 243 |
+
(4): ResidualAttentionBlock(
|
| 244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 245 |
+
(attn): MultiheadAttention(
|
| 246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 247 |
+
)
|
| 248 |
+
(ls_1): Identity()
|
| 249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(mlp): Sequential(
|
| 251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 252 |
+
(gelu): GELU(approximate='none')
|
| 253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 254 |
+
)
|
| 255 |
+
(ls_2): Identity()
|
| 256 |
+
)
|
| 257 |
+
(5): ResidualAttentionBlock(
|
| 258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 259 |
+
(attn): MultiheadAttention(
|
| 260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 261 |
+
)
|
| 262 |
+
(ls_1): Identity()
|
| 263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 264 |
+
(mlp): Sequential(
|
| 265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 266 |
+
(gelu): GELU(approximate='none')
|
| 267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 268 |
+
)
|
| 269 |
+
(ls_2): Identity()
|
| 270 |
+
)
|
| 271 |
+
(6): ResidualAttentionBlock(
|
| 272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 273 |
+
(attn): MultiheadAttention(
|
| 274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 275 |
+
)
|
| 276 |
+
(ls_1): Identity()
|
| 277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 278 |
+
(mlp): Sequential(
|
| 279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 280 |
+
(gelu): GELU(approximate='none')
|
| 281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 282 |
+
)
|
| 283 |
+
(ls_2): Identity()
|
| 284 |
+
)
|
| 285 |
+
(7): ResidualAttentionBlock(
|
| 286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 287 |
+
(attn): MultiheadAttention(
|
| 288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 289 |
+
)
|
| 290 |
+
(ls_1): Identity()
|
| 291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 292 |
+
(mlp): Sequential(
|
| 293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 294 |
+
(gelu): GELU(approximate='none')
|
| 295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 296 |
+
)
|
| 297 |
+
(ls_2): Identity()
|
| 298 |
+
)
|
| 299 |
+
(8): ResidualAttentionBlock(
|
| 300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 301 |
+
(attn): MultiheadAttention(
|
| 302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 303 |
+
)
|
| 304 |
+
(ls_1): Identity()
|
| 305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 306 |
+
(mlp): Sequential(
|
| 307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 308 |
+
(gelu): GELU(approximate='none')
|
| 309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 310 |
+
)
|
| 311 |
+
(ls_2): Identity()
|
| 312 |
+
)
|
| 313 |
+
(9): ResidualAttentionBlock(
|
| 314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 315 |
+
(attn): MultiheadAttention(
|
| 316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 317 |
+
)
|
| 318 |
+
(ls_1): Identity()
|
| 319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 320 |
+
(mlp): Sequential(
|
| 321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 322 |
+
(gelu): GELU(approximate='none')
|
| 323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 324 |
+
)
|
| 325 |
+
(ls_2): Identity()
|
| 326 |
+
)
|
| 327 |
+
(10): ResidualAttentionBlock(
|
| 328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 329 |
+
(attn): MultiheadAttention(
|
| 330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 331 |
+
)
|
| 332 |
+
(ls_1): Identity()
|
| 333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 334 |
+
(mlp): Sequential(
|
| 335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 336 |
+
(gelu): GELU(approximate='none')
|
| 337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 338 |
+
)
|
| 339 |
+
(ls_2): Identity()
|
| 340 |
+
)
|
| 341 |
+
(11): ResidualAttentionBlock(
|
| 342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 343 |
+
(attn): MultiheadAttention(
|
| 344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 345 |
+
)
|
| 346 |
+
(ls_1): Identity()
|
| 347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 348 |
+
(mlp): Sequential(
|
| 349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 350 |
+
(gelu): GELU(approximate='none')
|
| 351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 352 |
+
)
|
| 353 |
+
(ls_2): Identity()
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
(token_embedding): Embedding(49408, 512)
|
| 358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 359 |
+
)
|
| 360 |
+
2024-09-07,15:01:58 | INFO | Params:
|
| 361 |
+
2024-09-07,15:01:58 | INFO | accum_freq: 1
|
| 362 |
+
2024-09-07,15:01:58 | INFO | aug_cfg: {}
|
| 363 |
+
2024-09-07,15:01:58 | INFO | batch_size: 2048
|
| 364 |
+
2024-09-07,15:01:58 | INFO | beta1: 0.9
|
| 365 |
+
2024-09-07,15:01:58 | INFO | beta2: 0.98
|
| 366 |
+
2024-09-07,15:01:58 | INFO | checkpoint_path: /home/breaking_0.2_trained/20_most_difficult/checkpoints
|
| 367 |
+
2024-09-07,15:01:58 | INFO | coca_caption_loss_weight: 2.0
|
| 368 |
+
2024-09-07,15:01:58 | INFO | coca_contrastive_loss_weight: 1.0
|
| 369 |
+
2024-09-07,15:01:58 | INFO | copy_codebase: False
|
| 370 |
+
2024-09-07,15:01:58 | INFO | csv_caption_key: title
|
| 371 |
+
2024-09-07,15:01:58 | INFO | csv_img_key: filepath
|
| 372 |
+
2024-09-07,15:01:58 | INFO | csv_separator:
|
| 373 |
+
2024-09-07,15:01:58 | INFO | dataset_resampled: True
|
| 374 |
+
2024-09-07,15:01:58 | INFO | dataset_type: webdataset
|
| 375 |
+
2024-09-07,15:01:58 | INFO | ddp_static_graph: True
|
| 376 |
+
2024-09-07,15:01:58 | INFO | debug: False
|
| 377 |
+
2024-09-07,15:01:58 | INFO | delete_previous_checkpoint: False
|
| 378 |
+
2024-09-07,15:01:58 | INFO | device: cuda:0
|
| 379 |
+
2024-09-07,15:01:58 | INFO | dist_backend: nccl
|
| 380 |
+
2024-09-07,15:01:58 | INFO | dist_url: env://
|
| 381 |
+
2024-09-07,15:01:58 | INFO | distill: False
|
| 382 |
+
2024-09-07,15:01:58 | INFO | distill_model: None
|
| 383 |
+
2024-09-07,15:01:58 | INFO | distill_pretrained: None
|
| 384 |
+
2024-09-07,15:01:58 | INFO | distributed: True
|
| 385 |
+
2024-09-07,15:01:58 | INFO | epochs: 5
|
| 386 |
+
2024-09-07,15:01:58 | INFO | epochs_cooldown: None
|
| 387 |
+
2024-09-07,15:01:58 | INFO | eps: 1e-06
|
| 388 |
+
2024-09-07,15:01:58 | INFO | force_custom_text: False
|
| 389 |
+
2024-09-07,15:01:58 | INFO | force_image_size: None
|
| 390 |
+
2024-09-07,15:01:58 | INFO | force_patch_dropout: None
|
| 391 |
+
2024-09-07,15:01:58 | INFO | force_quick_gelu: False
|
| 392 |
+
2024-09-07,15:01:58 | INFO | gather_with_grad: True
|
| 393 |
+
2024-09-07,15:01:58 | INFO | grad_checkpointing: True
|
| 394 |
+
2024-09-07,15:01:58 | INFO | grad_clip_norm: None
|
| 395 |
+
2024-09-07,15:01:58 | INFO | horovod: False
|
| 396 |
+
2024-09-07,15:01:58 | INFO | image_mean: None
|
| 397 |
+
2024-09-07,15:01:58 | INFO | image_std: None
|
| 398 |
+
2024-09-07,15:01:58 | INFO | imagenet_v2: None
|
| 399 |
+
2024-09-07,15:01:58 | INFO | imagenet_val: None
|
| 400 |
+
2024-09-07,15:01:58 | INFO | local_loss: True
|
| 401 |
+
2024-09-07,15:01:58 | INFO | local_rank: 0
|
| 402 |
+
2024-09-07,15:01:58 | INFO | lock_image: False
|
| 403 |
+
2024-09-07,15:01:58 | INFO | lock_image_freeze_bn_stats: False
|
| 404 |
+
2024-09-07,15:01:58 | INFO | lock_image_unlocked_groups: 0
|
| 405 |
+
2024-09-07,15:01:58 | INFO | lock_text: False
|
| 406 |
+
2024-09-07,15:01:58 | INFO | lock_text_freeze_layer_norm: False
|
| 407 |
+
2024-09-07,15:01:58 | INFO | lock_text_unlocked_layers: 0
|
| 408 |
+
2024-09-07,15:01:58 | INFO | log_every_n_steps: 100
|
| 409 |
+
2024-09-07,15:01:58 | INFO | log_level: 20
|
| 410 |
+
2024-09-07,15:01:58 | INFO | log_local: False
|
| 411 |
+
2024-09-07,15:01:58 | INFO | log_path: /home/breaking_0.2_trained/20_most_difficult/out.log
|
| 412 |
+
2024-09-07,15:01:58 | INFO | logs: /home/breaking_0.2_trained
|
| 413 |
+
2024-09-07,15:01:58 | INFO | lr: 0.0005
|
| 414 |
+
2024-09-07,15:01:58 | INFO | lr_cooldown_end: 0.0
|
| 415 |
+
2024-09-07,15:01:58 | INFO | lr_cooldown_power: 1.0
|
| 416 |
+
2024-09-07,15:01:58 | INFO | lr_scheduler: cosine
|
| 417 |
+
2024-09-07,15:01:58 | INFO | model: ViT-B-32
|
| 418 |
+
2024-09-07,15:01:58 | INFO | name: 20_most_difficult
|
| 419 |
+
2024-09-07,15:01:58 | INFO | no_set_device_rank: False
|
| 420 |
+
2024-09-07,15:01:58 | INFO | precision: amp
|
| 421 |
+
2024-09-07,15:01:58 | INFO | pretrained:
|
| 422 |
+
2024-09-07,15:01:58 | INFO | pretrained_image: False
|
| 423 |
+
2024-09-07,15:01:58 | INFO | rank: 0
|
| 424 |
+
2024-09-07,15:01:58 | INFO | remote_sync: None
|
| 425 |
+
2024-09-07,15:01:58 | INFO | remote_sync_frequency: 300
|
| 426 |
+
2024-09-07,15:01:58 | INFO | remote_sync_protocol: s3
|
| 427 |
+
2024-09-07,15:01:58 | INFO | report_to: wandb
|
| 428 |
+
2024-09-07,15:01:58 | INFO | resume: None
|
| 429 |
+
2024-09-07,15:01:58 | INFO | save_frequency: 0
|
| 430 |
+
2024-09-07,15:01:58 | INFO | save_most_recent: True
|
| 431 |
+
2024-09-07,15:01:58 | INFO | seed: 0
|
| 432 |
+
2024-09-07,15:01:58 | INFO | skip_scheduler: False
|
| 433 |
+
2024-09-07,15:01:58 | INFO | tensorboard: False
|
| 434 |
+
2024-09-07,15:01:58 | INFO | tensorboard_path:
|
| 435 |
+
2024-09-07,15:01:58 | INFO | torchscript: False
|
| 436 |
+
2024-09-07,15:01:58 | INFO | trace: False
|
| 437 |
+
2024-09-07,15:01:58 | INFO | train_data: /home/breaking_0.2/{00000000..00000255}.tar
|
| 438 |
+
2024-09-07,15:01:58 | INFO | train_data_upsampling_factors: None
|
| 439 |
+
2024-09-07,15:01:58 | INFO | train_num_samples: 2560000
|
| 440 |
+
2024-09-07,15:01:58 | INFO | use_bn_sync: False
|
| 441 |
+
2024-09-07,15:01:58 | INFO | val_data: None
|
| 442 |
+
2024-09-07,15:01:58 | INFO | val_frequency: 1
|
| 443 |
+
2024-09-07,15:01:58 | INFO | val_num_samples: None
|
| 444 |
+
2024-09-07,15:01:58 | INFO | wandb: True
|
| 445 |
+
2024-09-07,15:01:58 | INFO | wandb_notes:
|
| 446 |
+
2024-09-07,15:01:58 | INFO | wandb_project_name: clip_text_hq_clusters
|
| 447 |
+
2024-09-07,15:01:58 | INFO | warmup: 500
|
| 448 |
+
2024-09-07,15:01:58 | INFO | wd: 0.2
|
| 449 |
+
2024-09-07,15:01:58 | INFO | workers: 4
|
| 450 |
+
2024-09-07,15:01:58 | INFO | world_size: 2
|
| 451 |
+
2024-09-07,15:01:58 | INFO | zeroshot_frequency: 2
|
| 452 |
+
2024-09-07,15:02:05 | INFO | Start epoch 0
|
| 453 |
+
2024-09-07,15:02:21 | INFO | Train Epoch: 0 [ 4096/2572288 (0%)] Data (t): 11.718 Batch (t): 16.351, 250.499/s, 125.250/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.3781 (8.3781) Loss: 8.3781 (8.3781)
|
| 454 |
+
2024-09-07,15:02:24 | INFO | Reducer buckets have been rebuilt in this iteration.
|
| 455 |
+
2024-09-07,15:06:41 | INFO | Train Epoch: 0 [ 413696/2572288 (16%)] Data (t): 0.550 Batch (t): 2.603, 1570.18/s, 785.090/s/gpu LR: 0.000101 Logit Scale: 14.268 Contrastive_loss: 8.2138 (8.2960) Loss: 8.2138 (8.2960)
|
| 456 |
+
2024-09-07,15:11:03 | INFO | Train Epoch: 0 [ 823296/2572288 (32%)] Data (t): 0.571 Batch (t): 2.618, 1567.20/s, 783.598/s/gpu LR: 0.000201 Logit Scale: 14.239 Contrastive_loss: 8.0085 (8.2002) Loss: 8.0085 (8.2002)
|
| 457 |
+
2024-09-07,15:15:25 | INFO | Train Epoch: 0 [1232896/2572288 (48%)] Data (t): 0.570 Batch (t): 2.619, 1562.40/s, 781.198/s/gpu LR: 0.000301 Logit Scale: 14.215 Contrastive_loss: 7.9885 (8.1472) Loss: 7.9885 (8.1472)
|
| 458 |
+
2024-09-07,15:19:47 | INFO | Train Epoch: 0 [1642496/2572288 (64%)] Data (t): 0.572 Batch (t): 2.621, 1569.02/s, 784.509/s/gpu LR: 0.000401 Logit Scale: 14.186 Contrastive_loss: 7.8949 (8.0968) Loss: 7.8949 (8.0968)
|
| 459 |
+
2024-09-07,15:24:09 | INFO | Train Epoch: 0 [2052096/2572288 (80%)] Data (t): 0.574 Batch (t): 2.623, 1559.36/s, 779.679/s/gpu LR: 0.000500 Logit Scale: 14.166 Contrastive_loss: 7.7856 (8.0449) Loss: 7.7856 (8.0449)
|
| 460 |
+
2024-09-07,15:28:32 | INFO | Train Epoch: 0 [2461696/2572288 (96%)] Data (t): 0.571 Batch (t): 2.621, 1563.18/s, 781.592/s/gpu LR: 0.000498 Logit Scale: 14.163 Contrastive_loss: 7.7069 (7.9966) Loss: 7.7069 (7.9966)
|
| 461 |
+
2024-09-07,15:29:42 | INFO | Train Epoch: 0 [2572288/2572288 (100%)] Data (t): 0.568 Batch (t): 2.617, 1567.22/s, 783.609/s/gpu LR: 0.000497 Logit Scale: 14.165 Contrastive_loss: 7.6217 (7.9498) Loss: 7.6217 (7.9498)
|
| 462 |
+
2024-09-07,15:29:45 | INFO | Start epoch 1
|
| 463 |
+
2024-09-07,15:29:56 | INFO | Train Epoch: 1 [ 4096/2572288 (0%)] Data (t): 9.731 Batch (t): 11.775, 347.851/s, 173.926/s/gpu LR: 0.000497 Logit Scale: 14.166 Contrastive_loss: 7.7216 (7.7216) Loss: 7.7216 (7.7216)
|
| 464 |
+
2024-09-07,15:34:16 | INFO | Train Epoch: 1 [ 413696/2572288 (16%)] Data (t): 0.536 Batch (t): 2.594, 1568.09/s, 784.043/s/gpu LR: 0.000491 Logit Scale: 14.175 Contrastive_loss: 7.4182 (7.5699) Loss: 7.4182 (7.5699)
|
| 465 |
+
2024-09-07,15:38:37 | INFO | Train Epoch: 1 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.615, 1567.48/s, 783.738/s/gpu LR: 0.000481 Logit Scale: 14.196 Contrastive_loss: 7.2808 (7.4736) Loss: 7.2808 (7.4736)
|
| 466 |
+
2024-09-07,15:42:59 | INFO | Train Epoch: 1 [1232896/2572288 (48%)] Data (t): 0.568 Batch (t): 2.619, 1562.06/s, 781.028/s/gpu LR: 0.000468 Logit Scale: 14.261 Contrastive_loss: 7.1869 (7.4019) Loss: 7.1869 (7.4019)
|
| 467 |
+
2024-09-07,15:47:21 | INFO | Train Epoch: 1 [1642496/2572288 (64%)] Data (t): 0.569 Batch (t): 2.620, 1557.55/s, 778.773/s/gpu LR: 0.000452 Logit Scale: 14.318 Contrastive_loss: 7.4643 (7.4144) Loss: 7.4643 (7.4144)
|
| 468 |
+
2024-09-07,15:51:44 | INFO | Train Epoch: 1 [2052096/2572288 (80%)] Data (t): 0.571 Batch (t): 2.623, 1564.56/s, 782.281/s/gpu LR: 0.000433 Logit Scale: 14.381 Contrastive_loss: 7.1883 (7.3767) Loss: 7.1883 (7.3767)
|
| 469 |
+
2024-09-07,15:56:06 | INFO | Train Epoch: 1 [2461696/2572288 (96%)] Data (t): 0.567 Batch (t): 2.619, 1567.35/s, 783.676/s/gpu LR: 0.000412 Logit Scale: 14.487 Contrastive_loss: 6.9582 (7.3169) Loss: 6.9582 (7.3169)
|
| 470 |
+
2024-09-07,15:57:16 | INFO | Train Epoch: 1 [2572288/2572288 (100%)] Data (t): 0.562 Batch (t): 2.613, 1570.30/s, 785.148/s/gpu LR: 0.000406 Logit Scale: 14.515 Contrastive_loss: 7.2587 (7.3096) Loss: 7.2587 (7.3096)
|
| 471 |
+
2024-09-07,15:57:19 | INFO | Start epoch 2
|
| 472 |
+
2024-09-07,15:57:30 | INFO | Train Epoch: 2 [ 4096/2572288 (0%)] Data (t): 9.654 Batch (t): 11.700, 350.096/s, 175.048/s/gpu LR: 0.000405 Logit Scale: 14.516 Contrastive_loss: 6.3539 (6.3539) Loss: 6.3539 (6.3539)
|
| 473 |
+
2024-09-07,16:01:51 | INFO | Train Epoch: 2 [ 413696/2572288 (16%)] Data (t): 0.545 Batch (t): 2.603, 1569.08/s, 784.538/s/gpu LR: 0.000381 Logit Scale: 14.594 Contrastive_loss: 6.7739 (6.5639) Loss: 6.7739 (6.5639)
|
| 474 |
+
2024-09-07,16:06:12 | INFO | Train Epoch: 2 [ 823296/2572288 (32%)] Data (t): 0.562 Batch (t): 2.614, 1562.66/s, 781.332/s/gpu LR: 0.000355 Logit Scale: 14.707 Contrastive_loss: 7.1770 (6.7683) Loss: 7.1770 (6.7683)
|
| 475 |
+
2024-09-07,16:10:34 | INFO | Train Epoch: 2 [1232896/2572288 (48%)] Data (t): 0.564 Batch (t): 2.616, 1568.04/s, 784.021/s/gpu LR: 0.000327 Logit Scale: 14.817 Contrastive_loss: 5.8889 (6.5484) Loss: 5.8889 (6.5484)
|
| 476 |
+
2024-09-07,16:14:55 | INFO | Train Epoch: 2 [1642496/2572288 (64%)] Data (t): 0.565 Batch (t): 2.617, 1552.16/s, 776.082/s/gpu LR: 0.000298 Logit Scale: 14.941 Contrastive_loss: 6.4689 (6.5325) Loss: 6.4689 (6.5325)
|
| 477 |
+
2024-09-07,16:19:17 | INFO | Train Epoch: 2 [2052096/2572288 (80%)] Data (t): 0.568 Batch (t): 2.619, 1567.95/s, 783.973/s/gpu LR: 0.000269 Logit Scale: 15.081 Contrastive_loss: 6.7018 (6.5607) Loss: 6.7018 (6.5607)
|
| 478 |
+
2024-09-07,16:23:39 | INFO | Train Epoch: 2 [2461696/2572288 (96%)] Data (t): 0.567 Batch (t): 2.619, 1570.34/s, 785.171/s/gpu LR: 0.000239 Logit Scale: 15.253 Contrastive_loss: 5.6558 (6.4315) Loss: 5.6558 (6.4315)
|
| 479 |
+
2024-09-07,16:24:50 | INFO | Train Epoch: 2 [2572288/2572288 (100%)] Data (t): 0.569 Batch (t): 2.619, 1577.17/s, 788.586/s/gpu LR: 0.000231 Logit Scale: 15.288 Contrastive_loss: 5.9572 (6.3722) Loss: 5.9572 (6.3722)
|
| 480 |
+
2024-09-07,16:24:53 | INFO | Start epoch 3
|
| 481 |
+
2024-09-07,16:25:04 | INFO | Train Epoch: 3 [ 4096/2572288 (0%)] Data (t): 9.601 Batch (t): 11.647, 351.669/s, 175.834/s/gpu LR: 0.000231 Logit Scale: 15.289 Contrastive_loss: 5.2771 (5.2771) Loss: 5.2771 (5.2771)
|
| 482 |
+
2024-09-07,16:29:25 | INFO | Train Epoch: 3 [ 413696/2572288 (16%)] Data (t): 0.545 Batch (t): 2.604, 1568.38/s, 784.188/s/gpu LR: 0.000202 Logit Scale: 15.412 Contrastive_loss: 5.9988 (5.6380) Loss: 5.9988 (5.6380)
|
| 483 |
+
2024-09-07,16:33:47 | INFO | Train Epoch: 3 [ 823296/2572288 (32%)] Data (t): 0.566 Batch (t): 2.618, 1560.75/s, 780.377/s/gpu LR: 0.000173 Logit Scale: 15.521 Contrastive_loss: 5.4016 (5.5592) Loss: 5.4016 (5.5592)
|
| 484 |
+
2024-09-07,16:38:09 | INFO | Train Epoch: 3 [1232896/2572288 (48%)] Data (t): 0.570 Batch (t): 2.623, 1564.46/s, 782.230/s/gpu LR: 0.000145 Logit Scale: 15.619 Contrastive_loss: 5.8930 (5.6426) Loss: 5.8930 (5.6426)
|
| 485 |
+
2024-09-07,16:42:31 | INFO | Train Epoch: 3 [1642496/2572288 (64%)] Data (t): 0.571 Batch (t): 2.624, 1563.74/s, 781.871/s/gpu LR: 0.000119 Logit Scale: 15.715 Contrastive_loss: 5.6896 (5.6520) Loss: 5.6896 (5.6520)
|
| 486 |
+
2024-09-07,16:46:53 | INFO | Train Epoch: 3 [2052096/2572288 (80%)] Data (t): 0.570 Batch (t): 2.622, 1563.71/s, 781.856/s/gpu LR: 0.000095 Logit Scale: 15.825 Contrastive_loss: 4.6158 (5.4793) Loss: 4.6158 (5.4793)
|
| 487 |
+
2024-09-07,16:51:16 | INFO | Train Epoch: 3 [2461696/2572288 (96%)] Data (t): 0.569 Batch (t): 2.621, 1568.02/s, 784.009/s/gpu LR: 0.000072 Logit Scale: 15.916 Contrastive_loss: 7.0717 (5.7068) Loss: 7.0717 (5.7068)
|
| 488 |
+
2024-09-07,16:52:26 | INFO | Train Epoch: 3 [2572288/2572288 (100%)] Data (t): 0.568 Batch (t): 2.619, 1574.39/s, 787.197/s/gpu LR: 0.000067 Logit Scale: 15.930 Contrastive_loss: 4.7214 (5.5836) Loss: 4.7214 (5.5836)
|
| 489 |
+
2024-09-07,16:52:29 | INFO | Start epoch 4
|
| 490 |
+
2024-09-07,16:52:41 | INFO | Train Epoch: 4 [ 4096/2572288 (0%)] Data (t): 9.741 Batch (t): 11.786, 347.518/s, 173.759/s/gpu LR: 0.000067 Logit Scale: 15.931 Contrastive_loss: 5.0148 (5.0148) Loss: 5.0148 (5.0148)
|
| 491 |
+
2024-09-07,16:57:02 | INFO | Train Epoch: 4 [ 413696/2572288 (16%)] Data (t): 0.547 Batch (t): 2.608, 1560.72/s, 780.360/s/gpu LR: 0.000048 Logit Scale: 16.003 Contrastive_loss: 4.1838 (4.5993) Loss: 4.1838 (4.5993)
|
| 492 |
+
2024-09-07,17:01:23 | INFO | Train Epoch: 4 [ 823296/2572288 (32%)] Data (t): 0.565 Batch (t): 2.618, 1567.23/s, 783.616/s/gpu LR: 0.000032 Logit Scale: 16.047 Contrastive_loss: 6.0231 (5.0739) Loss: 6.0231 (5.0739)
|
| 493 |
+
2024-09-07,17:05:45 | INFO | Train Epoch: 4 [1232896/2572288 (48%)] Data (t): 0.567 Batch (t): 2.619, 1568.47/s, 784.237/s/gpu LR: 0.000019 Logit Scale: 16.070 Contrastive_loss: 3.8941 (4.7790) Loss: 3.8941 (4.7790)
|
| 494 |
+
2024-09-07,17:10:07 | INFO | Train Epoch: 4 [1642496/2572288 (64%)] Data (t): 0.569 Batch (t): 2.622, 1563.62/s, 781.809/s/gpu LR: 0.000009 Logit Scale: 16.084 Contrastive_loss: 4.7862 (4.7804) Loss: 4.7862 (4.7804)
|
| 495 |
+
2024-09-07,17:14:29 | INFO | Train Epoch: 4 [2052096/2572288 (80%)] Data (t): 0.561 Batch (t): 2.613, 1568.88/s, 784.442/s/gpu LR: 0.000003 Logit Scale: 16.091 Contrastive_loss: 4.5866 (4.7481) Loss: 4.5866 (4.7481)
|
| 496 |
+
2024-09-07,17:18:51 | INFO | Train Epoch: 4 [2461696/2572288 (96%)] Data (t): 0.566 Batch (t): 2.618, 1566.74/s, 783.371/s/gpu LR: 0.000000 Logit Scale: 16.092 Contrastive_loss: 4.5607 (4.7213) Loss: 4.5607 (4.7213)
|
| 497 |
+
2024-09-07,17:20:01 | INFO | Train Epoch: 4 [2572288/2572288 (100%)] Data (t): 0.563 Batch (t): 2.615, 1575.18/s, 787.591/s/gpu LR: 0.000000 Logit Scale: 16.092 Contrastive_loss: 4.8199 (4.7336) Loss: 4.8199 (4.7336)
|
breaking_0.2_trained/20_most_difficult/params.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 1
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
checkpoint_path: /home/breaking_0.2_trained/20_most_difficult/checkpoints
|
| 7 |
+
coca_caption_loss_weight: 2.0
|
| 8 |
+
coca_contrastive_loss_weight: 1.0
|
| 9 |
+
copy_codebase: False
|
| 10 |
+
csv_caption_key: title
|
| 11 |
+
csv_img_key: filepath
|
| 12 |
+
csv_separator:
|
| 13 |
+
dataset_resampled: True
|
| 14 |
+
dataset_type: webdataset
|
| 15 |
+
ddp_static_graph: True
|
| 16 |
+
debug: False
|
| 17 |
+
delete_previous_checkpoint: False
|
| 18 |
+
device: cuda:0
|
| 19 |
+
dist_backend: nccl
|
| 20 |
+
dist_url: env://
|
| 21 |
+
distill: False
|
| 22 |
+
distill_model: None
|
| 23 |
+
distill_pretrained: None
|
| 24 |
+
distributed: True
|
| 25 |
+
epochs: 5
|
| 26 |
+
epochs_cooldown: None
|
| 27 |
+
eps: 1e-06
|
| 28 |
+
force_custom_text: False
|
| 29 |
+
force_image_size: None
|
| 30 |
+
force_patch_dropout: None
|
| 31 |
+
force_quick_gelu: False
|
| 32 |
+
gather_with_grad: True
|
| 33 |
+
grad_checkpointing: True
|
| 34 |
+
grad_clip_norm: None
|
| 35 |
+
horovod: False
|
| 36 |
+
image_mean: None
|
| 37 |
+
image_std: None
|
| 38 |
+
imagenet_v2: None
|
| 39 |
+
imagenet_val: None
|
| 40 |
+
local_loss: True
|
| 41 |
+
local_rank: 0
|
| 42 |
+
lock_image: False
|
| 43 |
+
lock_image_freeze_bn_stats: False
|
| 44 |
+
lock_image_unlocked_groups: 0
|
| 45 |
+
lock_text: False
|
| 46 |
+
lock_text_freeze_layer_norm: False
|
| 47 |
+
lock_text_unlocked_layers: 0
|
| 48 |
+
log_every_n_steps: 100
|
| 49 |
+
log_level: 20
|
| 50 |
+
log_local: False
|
| 51 |
+
log_path: /home/breaking_0.2_trained/20_most_difficult/out.log
|
| 52 |
+
logs: /home/breaking_0.2_trained
|
| 53 |
+
lr: 0.0005
|
| 54 |
+
lr_cooldown_end: 0.0
|
| 55 |
+
lr_cooldown_power: 1.0
|
| 56 |
+
lr_scheduler: cosine
|
| 57 |
+
model: ViT-B-32
|
| 58 |
+
name: 20_most_difficult
|
| 59 |
+
no_set_device_rank: False
|
| 60 |
+
precision: amp
|
| 61 |
+
pretrained:
|
| 62 |
+
pretrained_image: False
|
| 63 |
+
rank: 0
|
| 64 |
+
remote_sync: None
|
| 65 |
+
remote_sync_frequency: 300
|
| 66 |
+
remote_sync_protocol: s3
|
| 67 |
+
report_to: wandb
|
| 68 |
+
resume: None
|
| 69 |
+
save_frequency: 0
|
| 70 |
+
save_most_recent: True
|
| 71 |
+
seed: 0
|
| 72 |
+
skip_scheduler: False
|
| 73 |
+
tensorboard: False
|
| 74 |
+
tensorboard_path:
|
| 75 |
+
torchscript: False
|
| 76 |
+
trace: False
|
| 77 |
+
train_data: /home/breaking_0.2/{00000000..00000255}.tar
|
| 78 |
+
train_data_upsampling_factors: None
|
| 79 |
+
train_num_samples: 2560000
|
| 80 |
+
use_bn_sync: False
|
| 81 |
+
val_data: None
|
| 82 |
+
val_frequency: 1
|
| 83 |
+
val_num_samples: None
|
| 84 |
+
wandb: True
|
| 85 |
+
wandb_notes:
|
| 86 |
+
wandb_project_name: clip_text_hq_clusters
|
| 87 |
+
warmup: 500
|
| 88 |
+
wd: 0.2
|
| 89 |
+
workers: 4
|
| 90 |
+
world_size: 2
|
| 91 |
+
zeroshot_frequency: 2
|
breaking_0.3_trained/30_most_difficult/checkpoints/epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3f0e909e344a50a759f490b53fbaf3ab0bb0fb2364fd4cec2500135d0a4c9b3
|
| 3 |
+
size 1815701601
|
breaking_0.3_trained/30_most_difficult/checkpoints/epoch_latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34575a2cda748dc2c29bba7ac0443fa8015e858659b59c886e3e1b0646d6a3fb
|
| 3 |
+
size 1815639289
|
breaking_0.3_trained/30_most_difficult/info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e3ea02871cba186f32fa465005ea43a506242185c4f13a99549cfaec303d622
|
| 3 |
+
size 321
|
breaking_0.3_trained/30_most_difficult/out.log
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-07,07:15:56 | INFO | No latest resume checkpoint found in /home/breaking_0.3_trained/30_most_difficult/checkpoints.
|
| 2 |
+
2024-09-07,07:15:57 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 3 |
+
2024-09-07,07:15:57 | INFO | Loaded ViT-B-32 model config.
|
| 4 |
+
2024-09-07,07:15:58 | INFO | Model:
|
| 5 |
+
2024-09-07,07:15:58 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(patchnorm_pre_ln): Identity()
|
| 8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): Transformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0): ResidualAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): MultiheadAttention(
|
| 16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 17 |
+
)
|
| 18 |
+
(ls_1): Identity()
|
| 19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 20 |
+
(mlp): Sequential(
|
| 21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 22 |
+
(gelu): GELU(approximate='none')
|
| 23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 24 |
+
)
|
| 25 |
+
(ls_2): Identity()
|
| 26 |
+
)
|
| 27 |
+
(1): ResidualAttentionBlock(
|
| 28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
(attn): MultiheadAttention(
|
| 30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 31 |
+
)
|
| 32 |
+
(ls_1): Identity()
|
| 33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(mlp): Sequential(
|
| 35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 36 |
+
(gelu): GELU(approximate='none')
|
| 37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_2): Identity()
|
| 40 |
+
)
|
| 41 |
+
(2): ResidualAttentionBlock(
|
| 42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 43 |
+
(attn): MultiheadAttention(
|
| 44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_1): Identity()
|
| 47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 48 |
+
(mlp): Sequential(
|
| 49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 50 |
+
(gelu): GELU(approximate='none')
|
| 51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 52 |
+
)
|
| 53 |
+
(ls_2): Identity()
|
| 54 |
+
)
|
| 55 |
+
(3): ResidualAttentionBlock(
|
| 56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 57 |
+
(attn): MultiheadAttention(
|
| 58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 59 |
+
)
|
| 60 |
+
(ls_1): Identity()
|
| 61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 62 |
+
(mlp): Sequential(
|
| 63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 64 |
+
(gelu): GELU(approximate='none')
|
| 65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 66 |
+
)
|
| 67 |
+
(ls_2): Identity()
|
| 68 |
+
)
|
| 69 |
+
(4): ResidualAttentionBlock(
|
| 70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 71 |
+
(attn): MultiheadAttention(
|
| 72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(ls_1): Identity()
|
| 75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 76 |
+
(mlp): Sequential(
|
| 77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 78 |
+
(gelu): GELU(approximate='none')
|
| 79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 80 |
+
)
|
| 81 |
+
(ls_2): Identity()
|
| 82 |
+
)
|
| 83 |
+
(5): ResidualAttentionBlock(
|
| 84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 85 |
+
(attn): MultiheadAttention(
|
| 86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 87 |
+
)
|
| 88 |
+
(ls_1): Identity()
|
| 89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 90 |
+
(mlp): Sequential(
|
| 91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 92 |
+
(gelu): GELU(approximate='none')
|
| 93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 94 |
+
)
|
| 95 |
+
(ls_2): Identity()
|
| 96 |
+
)
|
| 97 |
+
(6): ResidualAttentionBlock(
|
| 98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 99 |
+
(attn): MultiheadAttention(
|
| 100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 101 |
+
)
|
| 102 |
+
(ls_1): Identity()
|
| 103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 104 |
+
(mlp): Sequential(
|
| 105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 106 |
+
(gelu): GELU(approximate='none')
|
| 107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 108 |
+
)
|
| 109 |
+
(ls_2): Identity()
|
| 110 |
+
)
|
| 111 |
+
(7): ResidualAttentionBlock(
|
| 112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 113 |
+
(attn): MultiheadAttention(
|
| 114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 115 |
+
)
|
| 116 |
+
(ls_1): Identity()
|
| 117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 118 |
+
(mlp): Sequential(
|
| 119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 120 |
+
(gelu): GELU(approximate='none')
|
| 121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 122 |
+
)
|
| 123 |
+
(ls_2): Identity()
|
| 124 |
+
)
|
| 125 |
+
(8): ResidualAttentionBlock(
|
| 126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 127 |
+
(attn): MultiheadAttention(
|
| 128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 129 |
+
)
|
| 130 |
+
(ls_1): Identity()
|
| 131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 132 |
+
(mlp): Sequential(
|
| 133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 134 |
+
(gelu): GELU(approximate='none')
|
| 135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 136 |
+
)
|
| 137 |
+
(ls_2): Identity()
|
| 138 |
+
)
|
| 139 |
+
(9): ResidualAttentionBlock(
|
| 140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 141 |
+
(attn): MultiheadAttention(
|
| 142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 143 |
+
)
|
| 144 |
+
(ls_1): Identity()
|
| 145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 146 |
+
(mlp): Sequential(
|
| 147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 148 |
+
(gelu): GELU(approximate='none')
|
| 149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 150 |
+
)
|
| 151 |
+
(ls_2): Identity()
|
| 152 |
+
)
|
| 153 |
+
(10): ResidualAttentionBlock(
|
| 154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 155 |
+
(attn): MultiheadAttention(
|
| 156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 157 |
+
)
|
| 158 |
+
(ls_1): Identity()
|
| 159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 160 |
+
(mlp): Sequential(
|
| 161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 162 |
+
(gelu): GELU(approximate='none')
|
| 163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 164 |
+
)
|
| 165 |
+
(ls_2): Identity()
|
| 166 |
+
)
|
| 167 |
+
(11): ResidualAttentionBlock(
|
| 168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 169 |
+
(attn): MultiheadAttention(
|
| 170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 171 |
+
)
|
| 172 |
+
(ls_1): Identity()
|
| 173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 174 |
+
(mlp): Sequential(
|
| 175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 176 |
+
(gelu): GELU(approximate='none')
|
| 177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 178 |
+
)
|
| 179 |
+
(ls_2): Identity()
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 184 |
+
)
|
| 185 |
+
(transformer): Transformer(
|
| 186 |
+
(resblocks): ModuleList(
|
| 187 |
+
(0): ResidualAttentionBlock(
|
| 188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(attn): MultiheadAttention(
|
| 190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(ls_1): Identity()
|
| 193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 194 |
+
(mlp): Sequential(
|
| 195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 196 |
+
(gelu): GELU(approximate='none')
|
| 197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(ls_2): Identity()
|
| 200 |
+
)
|
| 201 |
+
(1): ResidualAttentionBlock(
|
| 202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(attn): MultiheadAttention(
|
| 204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(ls_1): Identity()
|
| 207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 208 |
+
(mlp): Sequential(
|
| 209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 210 |
+
(gelu): GELU(approximate='none')
|
| 211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 212 |
+
)
|
| 213 |
+
(ls_2): Identity()
|
| 214 |
+
)
|
| 215 |
+
(2): ResidualAttentionBlock(
|
| 216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 217 |
+
(attn): MultiheadAttention(
|
| 218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 219 |
+
)
|
| 220 |
+
(ls_1): Identity()
|
| 221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(mlp): Sequential(
|
| 223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 224 |
+
(gelu): GELU(approximate='none')
|
| 225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 226 |
+
)
|
| 227 |
+
(ls_2): Identity()
|
| 228 |
+
)
|
| 229 |
+
(3): ResidualAttentionBlock(
|
| 230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(attn): MultiheadAttention(
|
| 232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 233 |
+
)
|
| 234 |
+
(ls_1): Identity()
|
| 235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 236 |
+
(mlp): Sequential(
|
| 237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 238 |
+
(gelu): GELU(approximate='none')
|
| 239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 240 |
+
)
|
| 241 |
+
(ls_2): Identity()
|
| 242 |
+
)
|
| 243 |
+
(4): ResidualAttentionBlock(
|
| 244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 245 |
+
(attn): MultiheadAttention(
|
| 246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 247 |
+
)
|
| 248 |
+
(ls_1): Identity()
|
| 249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(mlp): Sequential(
|
| 251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 252 |
+
(gelu): GELU(approximate='none')
|
| 253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 254 |
+
)
|
| 255 |
+
(ls_2): Identity()
|
| 256 |
+
)
|
| 257 |
+
(5): ResidualAttentionBlock(
|
| 258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 259 |
+
(attn): MultiheadAttention(
|
| 260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 261 |
+
)
|
| 262 |
+
(ls_1): Identity()
|
| 263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 264 |
+
(mlp): Sequential(
|
| 265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 266 |
+
(gelu): GELU(approximate='none')
|
| 267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 268 |
+
)
|
| 269 |
+
(ls_2): Identity()
|
| 270 |
+
)
|
| 271 |
+
(6): ResidualAttentionBlock(
|
| 272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 273 |
+
(attn): MultiheadAttention(
|
| 274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 275 |
+
)
|
| 276 |
+
(ls_1): Identity()
|
| 277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 278 |
+
(mlp): Sequential(
|
| 279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 280 |
+
(gelu): GELU(approximate='none')
|
| 281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 282 |
+
)
|
| 283 |
+
(ls_2): Identity()
|
| 284 |
+
)
|
| 285 |
+
(7): ResidualAttentionBlock(
|
| 286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 287 |
+
(attn): MultiheadAttention(
|
| 288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 289 |
+
)
|
| 290 |
+
(ls_1): Identity()
|
| 291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 292 |
+
(mlp): Sequential(
|
| 293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 294 |
+
(gelu): GELU(approximate='none')
|
| 295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 296 |
+
)
|
| 297 |
+
(ls_2): Identity()
|
| 298 |
+
)
|
| 299 |
+
(8): ResidualAttentionBlock(
|
| 300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 301 |
+
(attn): MultiheadAttention(
|
| 302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 303 |
+
)
|
| 304 |
+
(ls_1): Identity()
|
| 305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 306 |
+
(mlp): Sequential(
|
| 307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 308 |
+
(gelu): GELU(approximate='none')
|
| 309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 310 |
+
)
|
| 311 |
+
(ls_2): Identity()
|
| 312 |
+
)
|
| 313 |
+
(9): ResidualAttentionBlock(
|
| 314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 315 |
+
(attn): MultiheadAttention(
|
| 316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 317 |
+
)
|
| 318 |
+
(ls_1): Identity()
|
| 319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 320 |
+
(mlp): Sequential(
|
| 321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 322 |
+
(gelu): GELU(approximate='none')
|
| 323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 324 |
+
)
|
| 325 |
+
(ls_2): Identity()
|
| 326 |
+
)
|
| 327 |
+
(10): ResidualAttentionBlock(
|
| 328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 329 |
+
(attn): MultiheadAttention(
|
| 330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 331 |
+
)
|
| 332 |
+
(ls_1): Identity()
|
| 333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 334 |
+
(mlp): Sequential(
|
| 335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 336 |
+
(gelu): GELU(approximate='none')
|
| 337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 338 |
+
)
|
| 339 |
+
(ls_2): Identity()
|
| 340 |
+
)
|
| 341 |
+
(11): ResidualAttentionBlock(
|
| 342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 343 |
+
(attn): MultiheadAttention(
|
| 344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 345 |
+
)
|
| 346 |
+
(ls_1): Identity()
|
| 347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 348 |
+
(mlp): Sequential(
|
| 349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 350 |
+
(gelu): GELU(approximate='none')
|
| 351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 352 |
+
)
|
| 353 |
+
(ls_2): Identity()
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
(token_embedding): Embedding(49408, 512)
|
| 358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 359 |
+
)
|
| 360 |
+
2024-09-07,07:15:58 | INFO | Params:
|
| 361 |
+
2024-09-07,07:15:58 | INFO | accum_freq: 1
|
| 362 |
+
2024-09-07,07:15:58 | INFO | aug_cfg: {}
|
| 363 |
+
2024-09-07,07:15:58 | INFO | batch_size: 2048
|
| 364 |
+
2024-09-07,07:15:58 | INFO | beta1: 0.9
|
| 365 |
+
2024-09-07,07:15:58 | INFO | beta2: 0.98
|
| 366 |
+
2024-09-07,07:15:58 | INFO | checkpoint_path: /home/breaking_0.3_trained/30_most_difficult/checkpoints
|
| 367 |
+
2024-09-07,07:15:58 | INFO | coca_caption_loss_weight: 2.0
|
| 368 |
+
2024-09-07,07:15:58 | INFO | coca_contrastive_loss_weight: 1.0
|
| 369 |
+
2024-09-07,07:15:58 | INFO | copy_codebase: False
|
| 370 |
+
2024-09-07,07:15:58 | INFO | csv_caption_key: title
|
| 371 |
+
2024-09-07,07:15:58 | INFO | csv_img_key: filepath
|
| 372 |
+
2024-09-07,07:15:58 | INFO | csv_separator:
|
| 373 |
+
2024-09-07,07:15:58 | INFO | dataset_resampled: True
|
| 374 |
+
2024-09-07,07:15:58 | INFO | dataset_type: webdataset
|
| 375 |
+
2024-09-07,07:15:58 | INFO | ddp_static_graph: True
|
| 376 |
+
2024-09-07,07:15:58 | INFO | debug: False
|
| 377 |
+
2024-09-07,07:15:58 | INFO | delete_previous_checkpoint: False
|
| 378 |
+
2024-09-07,07:15:58 | INFO | device: cuda:0
|
| 379 |
+
2024-09-07,07:15:58 | INFO | dist_backend: nccl
|
| 380 |
+
2024-09-07,07:15:58 | INFO | dist_url: env://
|
| 381 |
+
2024-09-07,07:15:58 | INFO | distill: False
|
| 382 |
+
2024-09-07,07:15:58 | INFO | distill_model: None
|
| 383 |
+
2024-09-07,07:15:58 | INFO | distill_pretrained: None
|
| 384 |
+
2024-09-07,07:15:58 | INFO | distributed: True
|
| 385 |
+
2024-09-07,07:15:58 | INFO | epochs: 5
|
| 386 |
+
2024-09-07,07:15:58 | INFO | epochs_cooldown: None
|
| 387 |
+
2024-09-07,07:15:58 | INFO | eps: 1e-06
|
| 388 |
+
2024-09-07,07:15:58 | INFO | force_custom_text: False
|
| 389 |
+
2024-09-07,07:15:58 | INFO | force_image_size: None
|
| 390 |
+
2024-09-07,07:15:58 | INFO | force_patch_dropout: None
|
| 391 |
+
2024-09-07,07:15:58 | INFO | force_quick_gelu: False
|
| 392 |
+
2024-09-07,07:15:58 | INFO | gather_with_grad: True
|
| 393 |
+
2024-09-07,07:15:58 | INFO | grad_checkpointing: True
|
| 394 |
+
2024-09-07,07:15:58 | INFO | grad_clip_norm: None
|
| 395 |
+
2024-09-07,07:15:58 | INFO | horovod: False
|
| 396 |
+
2024-09-07,07:15:58 | INFO | image_mean: None
|
| 397 |
+
2024-09-07,07:15:58 | INFO | image_std: None
|
| 398 |
+
2024-09-07,07:15:58 | INFO | imagenet_v2: None
|
| 399 |
+
2024-09-07,07:15:58 | INFO | imagenet_val: None
|
| 400 |
+
2024-09-07,07:15:58 | INFO | local_loss: True
|
| 401 |
+
2024-09-07,07:15:58 | INFO | local_rank: 0
|
| 402 |
+
2024-09-07,07:15:58 | INFO | lock_image: False
|
| 403 |
+
2024-09-07,07:15:58 | INFO | lock_image_freeze_bn_stats: False
|
| 404 |
+
2024-09-07,07:15:58 | INFO | lock_image_unlocked_groups: 0
|
| 405 |
+
2024-09-07,07:15:58 | INFO | lock_text: False
|
| 406 |
+
2024-09-07,07:15:58 | INFO | lock_text_freeze_layer_norm: False
|
| 407 |
+
2024-09-07,07:15:58 | INFO | lock_text_unlocked_layers: 0
|
| 408 |
+
2024-09-07,07:15:58 | INFO | log_every_n_steps: 100
|
| 409 |
+
2024-09-07,07:15:58 | INFO | log_level: 20
|
| 410 |
+
2024-09-07,07:15:58 | INFO | log_local: False
|
| 411 |
+
2024-09-07,07:15:58 | INFO | log_path: /home/breaking_0.3_trained/30_most_difficult/out.log
|
| 412 |
+
2024-09-07,07:15:58 | INFO | logs: /home/breaking_0.3_trained
|
| 413 |
+
2024-09-07,07:15:58 | INFO | lr: 0.0005
|
| 414 |
+
2024-09-07,07:15:58 | INFO | lr_cooldown_end: 0.0
|
| 415 |
+
2024-09-07,07:15:58 | INFO | lr_cooldown_power: 1.0
|
| 416 |
+
2024-09-07,07:15:58 | INFO | lr_scheduler: cosine
|
| 417 |
+
2024-09-07,07:15:58 | INFO | model: ViT-B-32
|
| 418 |
+
2024-09-07,07:15:58 | INFO | name: 30_most_difficult
|
| 419 |
+
2024-09-07,07:15:58 | INFO | no_set_device_rank: False
|
| 420 |
+
2024-09-07,07:15:58 | INFO | precision: amp
|
| 421 |
+
2024-09-07,07:15:58 | INFO | pretrained:
|
| 422 |
+
2024-09-07,07:15:58 | INFO | pretrained_image: False
|
| 423 |
+
2024-09-07,07:15:58 | INFO | rank: 0
|
| 424 |
+
2024-09-07,07:15:58 | INFO | remote_sync: None
|
| 425 |
+
2024-09-07,07:15:58 | INFO | remote_sync_frequency: 300
|
| 426 |
+
2024-09-07,07:15:58 | INFO | remote_sync_protocol: s3
|
| 427 |
+
2024-09-07,07:15:58 | INFO | report_to: wandb
|
| 428 |
+
2024-09-07,07:15:58 | INFO | resume: None
|
| 429 |
+
2024-09-07,07:15:58 | INFO | save_frequency: 0
|
| 430 |
+
2024-09-07,07:15:58 | INFO | save_most_recent: True
|
| 431 |
+
2024-09-07,07:15:58 | INFO | seed: 0
|
| 432 |
+
2024-09-07,07:15:58 | INFO | skip_scheduler: False
|
| 433 |
+
2024-09-07,07:15:58 | INFO | tensorboard: False
|
| 434 |
+
2024-09-07,07:15:58 | INFO | tensorboard_path:
|
| 435 |
+
2024-09-07,07:15:58 | INFO | torchscript: False
|
| 436 |
+
2024-09-07,07:15:58 | INFO | trace: False
|
| 437 |
+
2024-09-07,07:15:58 | INFO | train_data: /home/breaking_0.3/{00000000..00000335}.tar
|
| 438 |
+
2024-09-07,07:15:58 | INFO | train_data_upsampling_factors: None
|
| 439 |
+
2024-09-07,07:15:58 | INFO | train_num_samples: 2560000
|
| 440 |
+
2024-09-07,07:15:58 | INFO | use_bn_sync: False
|
| 441 |
+
2024-09-07,07:15:58 | INFO | val_data: None
|
| 442 |
+
2024-09-07,07:15:58 | INFO | val_frequency: 1
|
| 443 |
+
2024-09-07,07:15:58 | INFO | val_num_samples: None
|
| 444 |
+
2024-09-07,07:15:58 | INFO | wandb: True
|
| 445 |
+
2024-09-07,07:15:58 | INFO | wandb_notes:
|
| 446 |
+
2024-09-07,07:15:58 | INFO | wandb_project_name: clip_text_hq_clusters
|
| 447 |
+
2024-09-07,07:15:58 | INFO | warmup: 500
|
| 448 |
+
2024-09-07,07:15:58 | INFO | wd: 0.2
|
| 449 |
+
2024-09-07,07:15:58 | INFO | workers: 4
|
| 450 |
+
2024-09-07,07:15:58 | INFO | world_size: 2
|
| 451 |
+
2024-09-07,07:15:58 | INFO | zeroshot_frequency: 2
|
| 452 |
+
2024-09-07,07:16:05 | INFO | Start epoch 0
|
| 453 |
+
2024-09-07,07:16:21 | INFO | Train Epoch: 0 [ 4096/2572288 (0%)] Data (t): 11.587 Batch (t): 16.138, 253.817/s, 126.909/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.3770 (8.3770) Loss: 8.3770 (8.3770)
|
| 454 |
+
2024-09-07,07:16:24 | INFO | Reducer buckets have been rebuilt in this iteration.
|
| 455 |
+
2024-09-07,07:20:41 | INFO | Train Epoch: 0 [ 413696/2572288 (16%)] Data (t): 0.546 Batch (t): 2.597, 1566.76/s, 783.379/s/gpu LR: 0.000101 Logit Scale: 14.267 Contrastive_loss: 8.2295 (8.3032) Loss: 8.2295 (8.3032)
|
| 456 |
+
2024-09-07,07:25:02 | INFO | Train Epoch: 0 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.611, 1568.76/s, 784.381/s/gpu LR: 0.000201 Logit Scale: 14.229 Contrastive_loss: 8.0991 (8.2352) Loss: 8.0991 (8.2352)
|
| 457 |
+
2024-09-07,07:29:23 | INFO | Train Epoch: 0 [1232896/2572288 (48%)] Data (t): 0.567 Batch (t): 2.615, 1561.64/s, 780.819/s/gpu LR: 0.000301 Logit Scale: 14.203 Contrastive_loss: 8.0503 (8.1890) Loss: 8.0503 (8.1890)
|
| 458 |
+
2024-09-07,07:33:45 | INFO | Train Epoch: 0 [1642496/2572288 (64%)] Data (t): 0.570 Batch (t): 2.618, 1564.16/s, 782.079/s/gpu LR: 0.000401 Logit Scale: 14.176 Contrastive_loss: 7.9354 (8.1382) Loss: 7.9354 (8.1382)
|
| 459 |
+
2024-09-07,07:38:07 | INFO | Train Epoch: 0 [2052096/2572288 (80%)] Data (t): 0.568 Batch (t): 2.615, 1569.69/s, 784.843/s/gpu LR: 0.000500 Logit Scale: 14.147 Contrastive_loss: 7.8547 (8.0910) Loss: 7.8547 (8.0910)
|
| 460 |
+
2024-09-07,07:42:28 | INFO | Train Epoch: 0 [2461696/2572288 (96%)] Data (t): 0.567 Batch (t): 2.616, 1570.76/s, 785.381/s/gpu LR: 0.000498 Logit Scale: 14.128 Contrastive_loss: 7.7547 (8.0430) Loss: 7.7547 (8.0430)
|
| 461 |
+
2024-09-07,07:43:39 | INFO | Train Epoch: 0 [2572288/2572288 (100%)] Data (t): 0.565 Batch (t): 2.613, 1578.30/s, 789.150/s/gpu LR: 0.000497 Logit Scale: 14.121 Contrastive_loss: 7.7028 (8.0004) Loss: 7.7028 (8.0004)
|
| 462 |
+
2024-09-07,07:43:41 | INFO | Start epoch 1
|
| 463 |
+
2024-09-07,07:43:53 | INFO | Train Epoch: 1 [ 4096/2572288 (0%)] Data (t): 9.696 Batch (t): 11.740, 348.903/s, 174.452/s/gpu LR: 0.000497 Logit Scale: 14.122 Contrastive_loss: 7.7750 (7.7750) Loss: 7.7750 (7.7750)
|
| 464 |
+
2024-09-07,07:48:12 | INFO | Train Epoch: 1 [ 413696/2572288 (16%)] Data (t): 0.538 Batch (t): 2.594, 1571.17/s, 785.586/s/gpu LR: 0.000491 Logit Scale: 14.130 Contrastive_loss: 7.5770 (7.6760) Loss: 7.5770 (7.6760)
|
| 465 |
+
2024-09-07,07:52:34 | INFO | Train Epoch: 1 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.613, 1567.68/s, 783.840/s/gpu LR: 0.000481 Logit Scale: 14.150 Contrastive_loss: 7.4687 (7.6069) Loss: 7.4687 (7.6069)
|
| 466 |
+
2024-09-07,07:56:55 | INFO | Train Epoch: 1 [1232896/2572288 (48%)] Data (t): 0.567 Batch (t): 2.616, 1565.50/s, 782.748/s/gpu LR: 0.000468 Logit Scale: 14.191 Contrastive_loss: 7.3471 (7.5420) Loss: 7.3471 (7.5420)
|
| 467 |
+
2024-09-07,08:01:17 | INFO | Train Epoch: 1 [1642496/2572288 (64%)] Data (t): 0.566 Batch (t): 2.615, 1568.25/s, 784.125/s/gpu LR: 0.000452 Logit Scale: 14.263 Contrastive_loss: 7.2163 (7.4768) Loss: 7.2163 (7.4768)
|
| 468 |
+
2024-09-07,08:05:39 | INFO | Train Epoch: 1 [2052096/2572288 (80%)] Data (t): 0.570 Batch (t): 2.618, 1569.50/s, 784.748/s/gpu LR: 0.000433 Logit Scale: 14.350 Contrastive_loss: 7.1304 (7.4191) Loss: 7.1304 (7.4191)
|
| 469 |
+
2024-09-07,08:10:00 | INFO | Train Epoch: 1 [2461696/2572288 (96%)] Data (t): 0.568 Batch (t): 2.617, 1565.34/s, 782.672/s/gpu LR: 0.000412 Logit Scale: 14.430 Contrastive_loss: 7.1217 (7.3766) Loss: 7.1217 (7.3766)
|
| 470 |
+
2024-09-07,08:11:11 | INFO | Train Epoch: 1 [2572288/2572288 (100%)] Data (t): 0.565 Batch (t): 2.614, 1562.51/s, 781.255/s/gpu LR: 0.000406 Logit Scale: 14.452 Contrastive_loss: 7.2079 (7.3555) Loss: 7.2079 (7.3555)
|
| 471 |
+
2024-09-07,08:11:14 | INFO | Start epoch 2
|
| 472 |
+
2024-09-07,08:11:25 | INFO | Train Epoch: 2 [ 4096/2572288 (0%)] Data (t): 9.554 Batch (t): 11.600, 353.101/s, 176.550/s/gpu LR: 0.000405 Logit Scale: 14.452 Contrastive_loss: 6.6379 (6.6379) Loss: 6.6379 (6.6379)
|
| 473 |
+
2024-09-07,08:15:46 | INFO | Train Epoch: 2 [ 413696/2572288 (16%)] Data (t): 0.555 Batch (t): 2.611, 1572.32/s, 786.161/s/gpu LR: 0.000381 Logit Scale: 14.566 Contrastive_loss: 7.0270 (6.8325) Loss: 7.0270 (6.8325)
|
| 474 |
+
2024-09-07,08:20:08 | INFO | Train Epoch: 2 [ 823296/2572288 (32%)] Data (t): 0.565 Batch (t): 2.615, 1554.74/s, 777.370/s/gpu LR: 0.000355 Logit Scale: 14.681 Contrastive_loss: 7.1672 (6.9440) Loss: 7.1672 (6.9440)
|
| 475 |
+
2024-09-07,08:24:29 | INFO | Train Epoch: 2 [1232896/2572288 (48%)] Data (t): 0.566 Batch (t): 2.615, 1570.16/s, 785.078/s/gpu LR: 0.000327 Logit Scale: 14.790 Contrastive_loss: 6.8110 (6.9108) Loss: 6.8110 (6.9108)
|
| 476 |
+
2024-09-07,08:28:51 | INFO | Train Epoch: 2 [1642496/2572288 (64%)] Data (t): 0.566 Batch (t): 2.614, 1569.48/s, 784.739/s/gpu LR: 0.000298 Logit Scale: 14.901 Contrastive_loss: 6.6025 (6.8491) Loss: 6.6025 (6.8491)
|
| 477 |
+
2024-09-07,08:33:12 | INFO | Train Epoch: 2 [2052096/2572288 (80%)] Data (t): 0.566 Batch (t): 2.615, 1565.61/s, 782.805/s/gpu LR: 0.000269 Logit Scale: 15.018 Contrastive_loss: 6.7172 (6.8271) Loss: 6.7172 (6.8271)
|
| 478 |
+
2024-09-07,08:37:34 | INFO | Train Epoch: 2 [2461696/2572288 (96%)] Data (t): 0.565 Batch (t): 2.615, 1566.20/s, 783.102/s/gpu LR: 0.000239 Logit Scale: 15.130 Contrastive_loss: 6.4575 (6.7743) Loss: 6.4575 (6.7743)
|
| 479 |
+
2024-09-07,08:38:44 | INFO | Train Epoch: 2 [2572288/2572288 (100%)] Data (t): 0.563 Batch (t): 2.612, 1576.61/s, 788.306/s/gpu LR: 0.000231 Logit Scale: 15.173 Contrastive_loss: 6.3482 (6.7211) Loss: 6.3482 (6.7211)
|
| 480 |
+
2024-09-07,08:38:47 | INFO | Start epoch 3
|
| 481 |
+
2024-09-07,08:38:59 | INFO | Train Epoch: 3 [ 4096/2572288 (0%)] Data (t): 9.551 Batch (t): 11.600, 353.118/s, 176.559/s/gpu LR: 0.000231 Logit Scale: 15.175 Contrastive_loss: 6.4882 (6.4882) Loss: 6.4882 (6.4882)
|
| 482 |
+
2024-09-07,08:43:19 | INFO | Train Epoch: 3 [ 413696/2572288 (16%)] Data (t): 0.543 Batch (t): 2.601, 1571.86/s, 785.932/s/gpu LR: 0.000202 Logit Scale: 15.290 Contrastive_loss: 6.4287 (6.4584) Loss: 6.4287 (6.4584)
|
| 483 |
+
2024-09-07,08:47:40 | INFO | Train Epoch: 3 [ 823296/2572288 (32%)] Data (t): 0.562 Batch (t): 2.613, 1568.66/s, 784.328/s/gpu LR: 0.000173 Logit Scale: 15.426 Contrastive_loss: 6.5816 (6.4995) Loss: 6.5816 (6.4995)
|
| 484 |
+
2024-09-07,08:52:02 | INFO | Train Epoch: 3 [1232896/2572288 (48%)] Data (t): 0.567 Batch (t): 2.617, 1558.61/s, 779.304/s/gpu LR: 0.000145 Logit Scale: 15.514 Contrastive_loss: 6.4312 (6.4824) Loss: 6.4312 (6.4824)
|
| 485 |
+
2024-09-07,08:56:24 | INFO | Train Epoch: 3 [1642496/2572288 (64%)] Data (t): 0.566 Batch (t): 2.618, 1567.58/s, 783.790/s/gpu LR: 0.000119 Logit Scale: 15.599 Contrastive_loss: 5.8314 (6.3522) Loss: 5.8314 (6.3522)
|
| 486 |
+
2024-09-07,09:00:46 | INFO | Train Epoch: 3 [2052096/2572288 (80%)] Data (t): 0.570 Batch (t): 2.621, 1558.78/s, 779.390/s/gpu LR: 0.000095 Logit Scale: 15.691 Contrastive_loss: 5.5672 (6.2214) Loss: 5.5672 (6.2214)
|
| 487 |
+
2024-09-07,09:05:08 | INFO | Train Epoch: 3 [2461696/2572288 (96%)] Data (t): 0.566 Batch (t): 2.618, 1566.44/s, 783.219/s/gpu LR: 0.000072 Logit Scale: 15.787 Contrastive_loss: 5.9069 (6.1765) Loss: 5.9069 (6.1765)
|
| 488 |
+
2024-09-07,09:06:18 | INFO | Train Epoch: 3 [2572288/2572288 (100%)] Data (t): 0.562 Batch (t): 2.614, 1575.77/s, 787.886/s/gpu LR: 0.000067 Logit Scale: 15.805 Contrastive_loss: 5.5870 (6.1028) Loss: 5.5870 (6.1028)
|
| 489 |
+
2024-09-07,09:06:21 | INFO | Start epoch 4
|
| 490 |
+
2024-09-07,09:06:33 | INFO | Train Epoch: 4 [ 4096/2572288 (0%)] Data (t): 9.714 Batch (t): 11.763, 348.210/s, 174.105/s/gpu LR: 0.000067 Logit Scale: 15.806 Contrastive_loss: 5.4202 (5.4202) Loss: 5.4202 (5.4202)
|
| 491 |
+
2024-09-07,09:10:53 | INFO | Train Epoch: 4 [ 413696/2572288 (16%)] Data (t): 0.549 Batch (t): 2.607, 1561.80/s, 780.900/s/gpu LR: 0.000048 Logit Scale: 15.859 Contrastive_loss: 6.5509 (5.9855) Loss: 6.5509 (5.9855)
|
| 492 |
+
2024-09-07,09:15:15 | INFO | Train Epoch: 4 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.615, 1561.51/s, 780.753/s/gpu LR: 0.000032 Logit Scale: 15.902 Contrastive_loss: 5.7273 (5.8995) Loss: 5.7273 (5.8995)
|
| 493 |
+
2024-09-07,09:19:37 | INFO | Train Epoch: 4 [1232896/2572288 (48%)] Data (t): 0.568 Batch (t): 2.618, 1571.58/s, 785.789/s/gpu LR: 0.000019 Logit Scale: 15.925 Contrastive_loss: 6.0029 (5.9253) Loss: 6.0029 (5.9253)
|
| 494 |
+
2024-09-07,09:23:58 | INFO | Train Epoch: 4 [1642496/2572288 (64%)] Data (t): 0.564 Batch (t): 2.615, 1567.93/s, 783.965/s/gpu LR: 0.000009 Logit Scale: 15.935 Contrastive_loss: 5.0497 (5.7502) Loss: 5.0497 (5.7502)
|
| 495 |
+
2024-09-07,09:28:20 | INFO | Train Epoch: 4 [2052096/2572288 (80%)] Data (t): 0.565 Batch (t): 2.616, 1563.19/s, 781.593/s/gpu LR: 0.000003 Logit Scale: 15.940 Contrastive_loss: 5.5695 (5.7201) Loss: 5.5695 (5.7201)
|
| 496 |
+
2024-09-07,09:32:42 | INFO | Train Epoch: 4 [2461696/2572288 (96%)] Data (t): 0.565 Batch (t): 2.616, 1563.88/s, 781.939/s/gpu LR: 0.000000 Logit Scale: 15.942 Contrastive_loss: 5.5382 (5.6941) Loss: 5.5382 (5.6941)
|
| 497 |
+
2024-09-07,09:33:52 | INFO | Train Epoch: 4 [2572288/2572288 (100%)] Data (t): 0.558 Batch (t): 2.610, 1579.28/s, 789.638/s/gpu LR: 0.000000 Logit Scale: 15.942 Contrastive_loss: 5.7904 (5.7061) Loss: 5.7904 (5.7061)
|
breaking_0.3_trained/30_most_difficult/params.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 1
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
checkpoint_path: /home/breaking_0.3_trained/30_most_difficult/checkpoints
|
| 7 |
+
coca_caption_loss_weight: 2.0
|
| 8 |
+
coca_contrastive_loss_weight: 1.0
|
| 9 |
+
copy_codebase: False
|
| 10 |
+
csv_caption_key: title
|
| 11 |
+
csv_img_key: filepath
|
| 12 |
+
csv_separator:
|
| 13 |
+
dataset_resampled: True
|
| 14 |
+
dataset_type: webdataset
|
| 15 |
+
ddp_static_graph: True
|
| 16 |
+
debug: False
|
| 17 |
+
delete_previous_checkpoint: False
|
| 18 |
+
device: cuda:0
|
| 19 |
+
dist_backend: nccl
|
| 20 |
+
dist_url: env://
|
| 21 |
+
distill: False
|
| 22 |
+
distill_model: None
|
| 23 |
+
distill_pretrained: None
|
| 24 |
+
distributed: True
|
| 25 |
+
epochs: 5
|
| 26 |
+
epochs_cooldown: None
|
| 27 |
+
eps: 1e-06
|
| 28 |
+
force_custom_text: False
|
| 29 |
+
force_image_size: None
|
| 30 |
+
force_patch_dropout: None
|
| 31 |
+
force_quick_gelu: False
|
| 32 |
+
gather_with_grad: True
|
| 33 |
+
grad_checkpointing: True
|
| 34 |
+
grad_clip_norm: None
|
| 35 |
+
horovod: False
|
| 36 |
+
image_mean: None
|
| 37 |
+
image_std: None
|
| 38 |
+
imagenet_v2: None
|
| 39 |
+
imagenet_val: None
|
| 40 |
+
local_loss: True
|
| 41 |
+
local_rank: 0
|
| 42 |
+
lock_image: False
|
| 43 |
+
lock_image_freeze_bn_stats: False
|
| 44 |
+
lock_image_unlocked_groups: 0
|
| 45 |
+
lock_text: False
|
| 46 |
+
lock_text_freeze_layer_norm: False
|
| 47 |
+
lock_text_unlocked_layers: 0
|
| 48 |
+
log_every_n_steps: 100
|
| 49 |
+
log_level: 20
|
| 50 |
+
log_local: False
|
| 51 |
+
log_path: /home/breaking_0.3_trained/30_most_difficult/out.log
|
| 52 |
+
logs: /home/breaking_0.3_trained
|
| 53 |
+
lr: 0.0005
|
| 54 |
+
lr_cooldown_end: 0.0
|
| 55 |
+
lr_cooldown_power: 1.0
|
| 56 |
+
lr_scheduler: cosine
|
| 57 |
+
model: ViT-B-32
|
| 58 |
+
name: 30_most_difficult
|
| 59 |
+
no_set_device_rank: False
|
| 60 |
+
precision: amp
|
| 61 |
+
pretrained:
|
| 62 |
+
pretrained_image: False
|
| 63 |
+
rank: 0
|
| 64 |
+
remote_sync: None
|
| 65 |
+
remote_sync_frequency: 300
|
| 66 |
+
remote_sync_protocol: s3
|
| 67 |
+
report_to: wandb
|
| 68 |
+
resume: None
|
| 69 |
+
save_frequency: 0
|
| 70 |
+
save_most_recent: True
|
| 71 |
+
seed: 0
|
| 72 |
+
skip_scheduler: False
|
| 73 |
+
tensorboard: False
|
| 74 |
+
tensorboard_path:
|
| 75 |
+
torchscript: False
|
| 76 |
+
trace: False
|
| 77 |
+
train_data: /home/breaking_0.3/{00000000..00000335}.tar
|
| 78 |
+
train_data_upsampling_factors: None
|
| 79 |
+
train_num_samples: 2560000
|
| 80 |
+
use_bn_sync: False
|
| 81 |
+
val_data: None
|
| 82 |
+
val_frequency: 1
|
| 83 |
+
val_num_samples: None
|
| 84 |
+
wandb: True
|
| 85 |
+
wandb_notes:
|
| 86 |
+
wandb_project_name: clip_text_hq_clusters
|
| 87 |
+
warmup: 500
|
| 88 |
+
wd: 0.2
|
| 89 |
+
workers: 4
|
| 90 |
+
world_size: 2
|
| 91 |
+
zeroshot_frequency: 2
|
breaking_0.7_trained/70_most_difficult/checkpoints/epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bb482d574f3d17eb109f38c88aa44c6056e5419572e1314cd9a141eac31ad0d
|
| 3 |
+
size 1815701601
|
breaking_0.7_trained/70_most_difficult/checkpoints/epoch_latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6c9016a6da6cf6eaa7d3877aacabb7bf98f490fa87f0a510e65242347cd4fef
|
| 3 |
+
size 1815639289
|
breaking_0.7_trained/70_most_difficult/info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c48f712fec688cd15e54a48e3189fb86c429a8f964036f18b0b9edcaa9934fe6
|
| 3 |
+
size 321
|
breaking_0.7_trained/70_most_difficult/out.log
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-07,10:00:53 | INFO | No latest resume checkpoint found in /home/breaking_0.7_trained/70_most_difficult/checkpoints.
|
| 2 |
+
2024-09-07,10:00:55 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 3 |
+
2024-09-07,10:00:55 | INFO | Loaded ViT-B-32 model config.
|
| 4 |
+
2024-09-07,10:00:56 | INFO | Model:
|
| 5 |
+
2024-09-07,10:00:56 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(patchnorm_pre_ln): Identity()
|
| 8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): Transformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0): ResidualAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): MultiheadAttention(
|
| 16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 17 |
+
)
|
| 18 |
+
(ls_1): Identity()
|
| 19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 20 |
+
(mlp): Sequential(
|
| 21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 22 |
+
(gelu): GELU(approximate='none')
|
| 23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 24 |
+
)
|
| 25 |
+
(ls_2): Identity()
|
| 26 |
+
)
|
| 27 |
+
(1): ResidualAttentionBlock(
|
| 28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
(attn): MultiheadAttention(
|
| 30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 31 |
+
)
|
| 32 |
+
(ls_1): Identity()
|
| 33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(mlp): Sequential(
|
| 35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 36 |
+
(gelu): GELU(approximate='none')
|
| 37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_2): Identity()
|
| 40 |
+
)
|
| 41 |
+
(2): ResidualAttentionBlock(
|
| 42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 43 |
+
(attn): MultiheadAttention(
|
| 44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_1): Identity()
|
| 47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 48 |
+
(mlp): Sequential(
|
| 49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 50 |
+
(gelu): GELU(approximate='none')
|
| 51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 52 |
+
)
|
| 53 |
+
(ls_2): Identity()
|
| 54 |
+
)
|
| 55 |
+
(3): ResidualAttentionBlock(
|
| 56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 57 |
+
(attn): MultiheadAttention(
|
| 58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 59 |
+
)
|
| 60 |
+
(ls_1): Identity()
|
| 61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 62 |
+
(mlp): Sequential(
|
| 63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 64 |
+
(gelu): GELU(approximate='none')
|
| 65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 66 |
+
)
|
| 67 |
+
(ls_2): Identity()
|
| 68 |
+
)
|
| 69 |
+
(4): ResidualAttentionBlock(
|
| 70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 71 |
+
(attn): MultiheadAttention(
|
| 72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(ls_1): Identity()
|
| 75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 76 |
+
(mlp): Sequential(
|
| 77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 78 |
+
(gelu): GELU(approximate='none')
|
| 79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 80 |
+
)
|
| 81 |
+
(ls_2): Identity()
|
| 82 |
+
)
|
| 83 |
+
(5): ResidualAttentionBlock(
|
| 84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 85 |
+
(attn): MultiheadAttention(
|
| 86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 87 |
+
)
|
| 88 |
+
(ls_1): Identity()
|
| 89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 90 |
+
(mlp): Sequential(
|
| 91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 92 |
+
(gelu): GELU(approximate='none')
|
| 93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 94 |
+
)
|
| 95 |
+
(ls_2): Identity()
|
| 96 |
+
)
|
| 97 |
+
(6): ResidualAttentionBlock(
|
| 98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 99 |
+
(attn): MultiheadAttention(
|
| 100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 101 |
+
)
|
| 102 |
+
(ls_1): Identity()
|
| 103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 104 |
+
(mlp): Sequential(
|
| 105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 106 |
+
(gelu): GELU(approximate='none')
|
| 107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 108 |
+
)
|
| 109 |
+
(ls_2): Identity()
|
| 110 |
+
)
|
| 111 |
+
(7): ResidualAttentionBlock(
|
| 112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 113 |
+
(attn): MultiheadAttention(
|
| 114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 115 |
+
)
|
| 116 |
+
(ls_1): Identity()
|
| 117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 118 |
+
(mlp): Sequential(
|
| 119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 120 |
+
(gelu): GELU(approximate='none')
|
| 121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 122 |
+
)
|
| 123 |
+
(ls_2): Identity()
|
| 124 |
+
)
|
| 125 |
+
(8): ResidualAttentionBlock(
|
| 126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 127 |
+
(attn): MultiheadAttention(
|
| 128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 129 |
+
)
|
| 130 |
+
(ls_1): Identity()
|
| 131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 132 |
+
(mlp): Sequential(
|
| 133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 134 |
+
(gelu): GELU(approximate='none')
|
| 135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 136 |
+
)
|
| 137 |
+
(ls_2): Identity()
|
| 138 |
+
)
|
| 139 |
+
(9): ResidualAttentionBlock(
|
| 140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 141 |
+
(attn): MultiheadAttention(
|
| 142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 143 |
+
)
|
| 144 |
+
(ls_1): Identity()
|
| 145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 146 |
+
(mlp): Sequential(
|
| 147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 148 |
+
(gelu): GELU(approximate='none')
|
| 149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 150 |
+
)
|
| 151 |
+
(ls_2): Identity()
|
| 152 |
+
)
|
| 153 |
+
(10): ResidualAttentionBlock(
|
| 154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 155 |
+
(attn): MultiheadAttention(
|
| 156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 157 |
+
)
|
| 158 |
+
(ls_1): Identity()
|
| 159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 160 |
+
(mlp): Sequential(
|
| 161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 162 |
+
(gelu): GELU(approximate='none')
|
| 163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 164 |
+
)
|
| 165 |
+
(ls_2): Identity()
|
| 166 |
+
)
|
| 167 |
+
(11): ResidualAttentionBlock(
|
| 168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 169 |
+
(attn): MultiheadAttention(
|
| 170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 171 |
+
)
|
| 172 |
+
(ls_1): Identity()
|
| 173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 174 |
+
(mlp): Sequential(
|
| 175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 176 |
+
(gelu): GELU(approximate='none')
|
| 177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 178 |
+
)
|
| 179 |
+
(ls_2): Identity()
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 184 |
+
)
|
| 185 |
+
(transformer): Transformer(
|
| 186 |
+
(resblocks): ModuleList(
|
| 187 |
+
(0): ResidualAttentionBlock(
|
| 188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(attn): MultiheadAttention(
|
| 190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(ls_1): Identity()
|
| 193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 194 |
+
(mlp): Sequential(
|
| 195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 196 |
+
(gelu): GELU(approximate='none')
|
| 197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(ls_2): Identity()
|
| 200 |
+
)
|
| 201 |
+
(1): ResidualAttentionBlock(
|
| 202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(attn): MultiheadAttention(
|
| 204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(ls_1): Identity()
|
| 207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 208 |
+
(mlp): Sequential(
|
| 209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 210 |
+
(gelu): GELU(approximate='none')
|
| 211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 212 |
+
)
|
| 213 |
+
(ls_2): Identity()
|
| 214 |
+
)
|
| 215 |
+
(2): ResidualAttentionBlock(
|
| 216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 217 |
+
(attn): MultiheadAttention(
|
| 218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 219 |
+
)
|
| 220 |
+
(ls_1): Identity()
|
| 221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(mlp): Sequential(
|
| 223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 224 |
+
(gelu): GELU(approximate='none')
|
| 225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 226 |
+
)
|
| 227 |
+
(ls_2): Identity()
|
| 228 |
+
)
|
| 229 |
+
(3): ResidualAttentionBlock(
|
| 230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(attn): MultiheadAttention(
|
| 232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 233 |
+
)
|
| 234 |
+
(ls_1): Identity()
|
| 235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 236 |
+
(mlp): Sequential(
|
| 237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 238 |
+
(gelu): GELU(approximate='none')
|
| 239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 240 |
+
)
|
| 241 |
+
(ls_2): Identity()
|
| 242 |
+
)
|
| 243 |
+
(4): ResidualAttentionBlock(
|
| 244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 245 |
+
(attn): MultiheadAttention(
|
| 246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 247 |
+
)
|
| 248 |
+
(ls_1): Identity()
|
| 249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(mlp): Sequential(
|
| 251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 252 |
+
(gelu): GELU(approximate='none')
|
| 253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 254 |
+
)
|
| 255 |
+
(ls_2): Identity()
|
| 256 |
+
)
|
| 257 |
+
(5): ResidualAttentionBlock(
|
| 258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 259 |
+
(attn): MultiheadAttention(
|
| 260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 261 |
+
)
|
| 262 |
+
(ls_1): Identity()
|
| 263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 264 |
+
(mlp): Sequential(
|
| 265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 266 |
+
(gelu): GELU(approximate='none')
|
| 267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 268 |
+
)
|
| 269 |
+
(ls_2): Identity()
|
| 270 |
+
)
|
| 271 |
+
(6): ResidualAttentionBlock(
|
| 272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 273 |
+
(attn): MultiheadAttention(
|
| 274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 275 |
+
)
|
| 276 |
+
(ls_1): Identity()
|
| 277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 278 |
+
(mlp): Sequential(
|
| 279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 280 |
+
(gelu): GELU(approximate='none')
|
| 281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 282 |
+
)
|
| 283 |
+
(ls_2): Identity()
|
| 284 |
+
)
|
| 285 |
+
(7): ResidualAttentionBlock(
|
| 286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 287 |
+
(attn): MultiheadAttention(
|
| 288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 289 |
+
)
|
| 290 |
+
(ls_1): Identity()
|
| 291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 292 |
+
(mlp): Sequential(
|
| 293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 294 |
+
(gelu): GELU(approximate='none')
|
| 295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 296 |
+
)
|
| 297 |
+
(ls_2): Identity()
|
| 298 |
+
)
|
| 299 |
+
(8): ResidualAttentionBlock(
|
| 300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 301 |
+
(attn): MultiheadAttention(
|
| 302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 303 |
+
)
|
| 304 |
+
(ls_1): Identity()
|
| 305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 306 |
+
(mlp): Sequential(
|
| 307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 308 |
+
(gelu): GELU(approximate='none')
|
| 309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 310 |
+
)
|
| 311 |
+
(ls_2): Identity()
|
| 312 |
+
)
|
| 313 |
+
(9): ResidualAttentionBlock(
|
| 314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 315 |
+
(attn): MultiheadAttention(
|
| 316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 317 |
+
)
|
| 318 |
+
(ls_1): Identity()
|
| 319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 320 |
+
(mlp): Sequential(
|
| 321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 322 |
+
(gelu): GELU(approximate='none')
|
| 323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 324 |
+
)
|
| 325 |
+
(ls_2): Identity()
|
| 326 |
+
)
|
| 327 |
+
(10): ResidualAttentionBlock(
|
| 328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 329 |
+
(attn): MultiheadAttention(
|
| 330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 331 |
+
)
|
| 332 |
+
(ls_1): Identity()
|
| 333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 334 |
+
(mlp): Sequential(
|
| 335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 336 |
+
(gelu): GELU(approximate='none')
|
| 337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 338 |
+
)
|
| 339 |
+
(ls_2): Identity()
|
| 340 |
+
)
|
| 341 |
+
(11): ResidualAttentionBlock(
|
| 342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 343 |
+
(attn): MultiheadAttention(
|
| 344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 345 |
+
)
|
| 346 |
+
(ls_1): Identity()
|
| 347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 348 |
+
(mlp): Sequential(
|
| 349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 350 |
+
(gelu): GELU(approximate='none')
|
| 351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 352 |
+
)
|
| 353 |
+
(ls_2): Identity()
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
(token_embedding): Embedding(49408, 512)
|
| 358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 359 |
+
)
|
| 360 |
+
2024-09-07,10:00:56 | INFO | Params:
|
| 361 |
+
2024-09-07,10:00:56 | INFO | accum_freq: 1
|
| 362 |
+
2024-09-07,10:00:56 | INFO | aug_cfg: {}
|
| 363 |
+
2024-09-07,10:00:56 | INFO | batch_size: 2048
|
| 364 |
+
2024-09-07,10:00:56 | INFO | beta1: 0.9
|
| 365 |
+
2024-09-07,10:00:56 | INFO | beta2: 0.98
|
| 366 |
+
2024-09-07,10:00:56 | INFO | checkpoint_path: /home/breaking_0.7_trained/70_most_difficult/checkpoints
|
| 367 |
+
2024-09-07,10:00:56 | INFO | coca_caption_loss_weight: 2.0
|
| 368 |
+
2024-09-07,10:00:56 | INFO | coca_contrastive_loss_weight: 1.0
|
| 369 |
+
2024-09-07,10:00:56 | INFO | copy_codebase: False
|
| 370 |
+
2024-09-07,10:00:56 | INFO | csv_caption_key: title
|
| 371 |
+
2024-09-07,10:00:56 | INFO | csv_img_key: filepath
|
| 372 |
+
2024-09-07,10:00:56 | INFO | csv_separator:
|
| 373 |
+
2024-09-07,10:00:56 | INFO | dataset_resampled: True
|
| 374 |
+
2024-09-07,10:00:56 | INFO | dataset_type: webdataset
|
| 375 |
+
2024-09-07,10:00:56 | INFO | ddp_static_graph: True
|
| 376 |
+
2024-09-07,10:00:56 | INFO | debug: False
|
| 377 |
+
2024-09-07,10:00:56 | INFO | delete_previous_checkpoint: False
|
| 378 |
+
2024-09-07,10:00:56 | INFO | device: cuda:0
|
| 379 |
+
2024-09-07,10:00:56 | INFO | dist_backend: nccl
|
| 380 |
+
2024-09-07,10:00:56 | INFO | dist_url: env://
|
| 381 |
+
2024-09-07,10:00:56 | INFO | distill: False
|
| 382 |
+
2024-09-07,10:00:56 | INFO | distill_model: None
|
| 383 |
+
2024-09-07,10:00:56 | INFO | distill_pretrained: None
|
| 384 |
+
2024-09-07,10:00:56 | INFO | distributed: True
|
| 385 |
+
2024-09-07,10:00:56 | INFO | epochs: 5
|
| 386 |
+
2024-09-07,10:00:56 | INFO | epochs_cooldown: None
|
| 387 |
+
2024-09-07,10:00:56 | INFO | eps: 1e-06
|
| 388 |
+
2024-09-07,10:00:56 | INFO | force_custom_text: False
|
| 389 |
+
2024-09-07,10:00:56 | INFO | force_image_size: None
|
| 390 |
+
2024-09-07,10:00:56 | INFO | force_patch_dropout: None
|
| 391 |
+
2024-09-07,10:00:56 | INFO | force_quick_gelu: False
|
| 392 |
+
2024-09-07,10:00:56 | INFO | gather_with_grad: True
|
| 393 |
+
2024-09-07,10:00:56 | INFO | grad_checkpointing: True
|
| 394 |
+
2024-09-07,10:00:56 | INFO | grad_clip_norm: None
|
| 395 |
+
2024-09-07,10:00:56 | INFO | horovod: False
|
| 396 |
+
2024-09-07,10:00:56 | INFO | image_mean: None
|
| 397 |
+
2024-09-07,10:00:56 | INFO | image_std: None
|
| 398 |
+
2024-09-07,10:00:56 | INFO | imagenet_v2: None
|
| 399 |
+
2024-09-07,10:00:56 | INFO | imagenet_val: None
|
| 400 |
+
2024-09-07,10:00:56 | INFO | local_loss: True
|
| 401 |
+
2024-09-07,10:00:56 | INFO | local_rank: 0
|
| 402 |
+
2024-09-07,10:00:56 | INFO | lock_image: False
|
| 403 |
+
2024-09-07,10:00:56 | INFO | lock_image_freeze_bn_stats: False
|
| 404 |
+
2024-09-07,10:00:56 | INFO | lock_image_unlocked_groups: 0
|
| 405 |
+
2024-09-07,10:00:56 | INFO | lock_text: False
|
| 406 |
+
2024-09-07,10:00:56 | INFO | lock_text_freeze_layer_norm: False
|
| 407 |
+
2024-09-07,10:00:56 | INFO | lock_text_unlocked_layers: 0
|
| 408 |
+
2024-09-07,10:00:56 | INFO | log_every_n_steps: 100
|
| 409 |
+
2024-09-07,10:00:56 | INFO | log_level: 20
|
| 410 |
+
2024-09-07,10:00:56 | INFO | log_local: False
|
| 411 |
+
2024-09-07,10:00:56 | INFO | log_path: /home/breaking_0.7_trained/70_most_difficult/out.log
|
| 412 |
+
2024-09-07,10:00:56 | INFO | logs: /home/breaking_0.7_trained
|
| 413 |
+
2024-09-07,10:00:56 | INFO | lr: 0.0005
|
| 414 |
+
2024-09-07,10:00:56 | INFO | lr_cooldown_end: 0.0
|
| 415 |
+
2024-09-07,10:00:56 | INFO | lr_cooldown_power: 1.0
|
| 416 |
+
2024-09-07,10:00:56 | INFO | lr_scheduler: cosine
|
| 417 |
+
2024-09-07,10:00:56 | INFO | model: ViT-B-32
|
| 418 |
+
2024-09-07,10:00:56 | INFO | name: 70_most_difficult
|
| 419 |
+
2024-09-07,10:00:56 | INFO | no_set_device_rank: False
|
| 420 |
+
2024-09-07,10:00:56 | INFO | precision: amp
|
| 421 |
+
2024-09-07,10:00:56 | INFO | pretrained:
|
| 422 |
+
2024-09-07,10:00:56 | INFO | pretrained_image: False
|
| 423 |
+
2024-09-07,10:00:56 | INFO | rank: 0
|
| 424 |
+
2024-09-07,10:00:56 | INFO | remote_sync: None
|
| 425 |
+
2024-09-07,10:00:56 | INFO | remote_sync_frequency: 300
|
| 426 |
+
2024-09-07,10:00:56 | INFO | remote_sync_protocol: s3
|
| 427 |
+
2024-09-07,10:00:56 | INFO | report_to: wandb
|
| 428 |
+
2024-09-07,10:00:56 | INFO | resume: None
|
| 429 |
+
2024-09-07,10:00:56 | INFO | save_frequency: 0
|
| 430 |
+
2024-09-07,10:00:56 | INFO | save_most_recent: True
|
| 431 |
+
2024-09-07,10:00:56 | INFO | seed: 0
|
| 432 |
+
2024-09-07,10:00:56 | INFO | skip_scheduler: False
|
| 433 |
+
2024-09-07,10:00:56 | INFO | tensorboard: False
|
| 434 |
+
2024-09-07,10:00:56 | INFO | tensorboard_path:
|
| 435 |
+
2024-09-07,10:00:56 | INFO | torchscript: False
|
| 436 |
+
2024-09-07,10:00:56 | INFO | trace: False
|
| 437 |
+
2024-09-07,10:00:56 | INFO | train_data: /home/breaking_0.7/{00000000..00000763}.tar
|
| 438 |
+
2024-09-07,10:00:56 | INFO | train_data_upsampling_factors: None
|
| 439 |
+
2024-09-07,10:00:56 | INFO | train_num_samples: 2560000
|
| 440 |
+
2024-09-07,10:00:56 | INFO | use_bn_sync: False
|
| 441 |
+
2024-09-07,10:00:56 | INFO | val_data: None
|
| 442 |
+
2024-09-07,10:00:56 | INFO | val_frequency: 1
|
| 443 |
+
2024-09-07,10:00:56 | INFO | val_num_samples: None
|
| 444 |
+
2024-09-07,10:00:56 | INFO | wandb: True
|
| 445 |
+
2024-09-07,10:00:56 | INFO | wandb_notes:
|
| 446 |
+
2024-09-07,10:00:56 | INFO | wandb_project_name: clip_text_hq_clusters
|
| 447 |
+
2024-09-07,10:00:56 | INFO | warmup: 500
|
| 448 |
+
2024-09-07,10:00:56 | INFO | wd: 0.2
|
| 449 |
+
2024-09-07,10:00:56 | INFO | workers: 4
|
| 450 |
+
2024-09-07,10:00:56 | INFO | world_size: 2
|
| 451 |
+
2024-09-07,10:00:56 | INFO | zeroshot_frequency: 2
|
| 452 |
+
2024-09-07,10:01:02 | INFO | Start epoch 0
|
| 453 |
+
2024-09-07,10:01:19 | INFO | Train Epoch: 0 [ 4096/2572288 (0%)] Data (t): 12.092 Batch (t): 16.765, 244.323/s, 122.161/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.3788 (8.3788) Loss: 8.3788 (8.3788)
|
| 454 |
+
2024-09-07,10:01:22 | INFO | Reducer buckets have been rebuilt in this iteration.
|
| 455 |
+
2024-09-07,10:05:39 | INFO | Train Epoch: 0 [ 413696/2572288 (16%)] Data (t): 0.547 Batch (t): 2.602, 1574.29/s, 787.144/s/gpu LR: 0.000101 Logit Scale: 14.266 Contrastive_loss: 8.2004 (8.2896) Loss: 8.2004 (8.2896)
|
| 456 |
+
2024-09-07,10:10:01 | INFO | Train Epoch: 0 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.614, 1566.47/s, 783.235/s/gpu LR: 0.000201 Logit Scale: 14.231 Contrastive_loss: 8.0909 (8.2234) Loss: 8.0909 (8.2234)
|
| 457 |
+
2024-09-07,10:14:22 | INFO | Train Epoch: 0 [1232896/2572288 (48%)] Data (t): 0.566 Batch (t): 2.616, 1572.98/s, 786.491/s/gpu LR: 0.000301 Logit Scale: 14.194 Contrastive_loss: 7.9780 (8.1620) Loss: 7.9780 (8.1620)
|
| 458 |
+
2024-09-07,10:18:44 | INFO | Train Epoch: 0 [1642496/2572288 (64%)] Data (t): 0.564 Batch (t): 2.615, 1563.77/s, 781.886/s/gpu LR: 0.000401 Logit Scale: 14.152 Contrastive_loss: 7.8635 (8.1023) Loss: 7.8635 (8.1023)
|
| 459 |
+
2024-09-07,10:23:06 | INFO | Train Epoch: 0 [2052096/2572288 (80%)] Data (t): 0.565 Batch (t): 2.616, 1563.40/s, 781.699/s/gpu LR: 0.000500 Logit Scale: 14.108 Contrastive_loss: 7.8084 (8.0534) Loss: 7.8084 (8.0534)
|
| 460 |
+
2024-09-07,10:27:28 | INFO | Train Epoch: 0 [2461696/2572288 (96%)] Data (t): 0.573 Batch (t): 2.623, 1561.55/s, 780.775/s/gpu LR: 0.000498 Logit Scale: 14.085 Contrastive_loss: 7.7462 (8.0095) Loss: 7.7462 (8.0095)
|
| 461 |
+
2024-09-07,10:28:38 | INFO | Train Epoch: 0 [2572288/2572288 (100%)] Data (t): 0.566 Batch (t): 2.617, 1568.93/s, 784.465/s/gpu LR: 0.000497 Logit Scale: 14.085 Contrastive_loss: 7.6566 (7.9654) Loss: 7.6566 (7.9654)
|
| 462 |
+
2024-09-07,10:28:41 | INFO | Start epoch 1
|
| 463 |
+
2024-09-07,10:28:53 | INFO | Train Epoch: 1 [ 4096/2572288 (0%)] Data (t): 9.646 Batch (t): 11.692, 350.330/s, 175.165/s/gpu LR: 0.000497 Logit Scale: 14.085 Contrastive_loss: 7.6610 (7.6610) Loss: 7.6610 (7.6610)
|
| 464 |
+
2024-09-07,10:33:13 | INFO | Train Epoch: 1 [ 413696/2572288 (16%)] Data (t): 0.548 Batch (t): 2.606, 1563.83/s, 781.915/s/gpu LR: 0.000491 Logit Scale: 14.097 Contrastive_loss: 7.5724 (7.6167) Loss: 7.5724 (7.6167)
|
| 465 |
+
2024-09-07,10:37:35 | INFO | Train Epoch: 1 [ 823296/2572288 (32%)] Data (t): 0.565 Batch (t): 2.615, 1566.09/s, 783.044/s/gpu LR: 0.000481 Logit Scale: 14.127 Contrastive_loss: 7.4356 (7.5563) Loss: 7.4356 (7.5563)
|
| 466 |
+
2024-09-07,10:41:56 | INFO | Train Epoch: 1 [1232896/2572288 (48%)] Data (t): 0.566 Batch (t): 2.616, 1558.96/s, 779.482/s/gpu LR: 0.000468 Logit Scale: 14.170 Contrastive_loss: 7.3573 (7.5066) Loss: 7.3573 (7.5066)
|
| 467 |
+
2024-09-07,10:46:18 | INFO | Train Epoch: 1 [1642496/2572288 (64%)] Data (t): 0.568 Batch (t): 2.620, 1562.54/s, 781.271/s/gpu LR: 0.000452 Logit Scale: 14.245 Contrastive_loss: 7.4000 (7.4853) Loss: 7.4000 (7.4853)
|
| 468 |
+
2024-09-07,10:50:41 | INFO | Train Epoch: 1 [2052096/2572288 (80%)] Data (t): 0.571 Batch (t): 2.622, 1563.27/s, 781.634/s/gpu LR: 0.000433 Logit Scale: 14.335 Contrastive_loss: 7.2466 (7.4455) Loss: 7.2466 (7.4455)
|
| 469 |
+
2024-09-07,10:55:03 | INFO | Train Epoch: 1 [2461696/2572288 (96%)] Data (t): 0.570 Batch (t): 2.620, 1566.36/s, 783.178/s/gpu LR: 0.000412 Logit Scale: 14.443 Contrastive_loss: 7.2259 (7.4141) Loss: 7.2259 (7.4141)
|
| 470 |
+
2024-09-07,10:56:13 | INFO | Train Epoch: 1 [2572288/2572288 (100%)] Data (t): 0.563 Batch (t): 2.614, 1574.96/s, 787.481/s/gpu LR: 0.000406 Logit Scale: 14.478 Contrastive_loss: 7.1533 (7.3815) Loss: 7.1533 (7.3815)
|
| 471 |
+
2024-09-07,10:56:16 | INFO | Start epoch 2
|
| 472 |
+
2024-09-07,10:56:28 | INFO | Train Epoch: 2 [ 4096/2572288 (0%)] Data (t): 9.651 Batch (t): 11.699, 350.112/s, 175.056/s/gpu LR: 0.000405 Logit Scale: 14.480 Contrastive_loss: 6.9992 (6.9992) Loss: 6.9992 (6.9992)
|
| 473 |
+
2024-09-07,11:00:49 | INFO | Train Epoch: 2 [ 413696/2572288 (16%)] Data (t): 0.558 Batch (t): 2.617, 1562.06/s, 781.032/s/gpu LR: 0.000381 Logit Scale: 14.608 Contrastive_loss: 7.1339 (7.0665) Loss: 7.1339 (7.0665)
|
| 474 |
+
2024-09-07,11:05:12 | INFO | Train Epoch: 2 [ 823296/2572288 (32%)] Data (t): 0.569 Batch (t): 2.622, 1557.91/s, 778.953/s/gpu LR: 0.000355 Logit Scale: 14.768 Contrastive_loss: 7.0686 (7.0672) Loss: 7.0686 (7.0672)
|
| 475 |
+
2024-09-07,11:09:34 | INFO | Train Epoch: 2 [1232896/2572288 (48%)] Data (t): 0.570 Batch (t): 2.623, 1565.95/s, 782.973/s/gpu LR: 0.000327 Logit Scale: 14.891 Contrastive_loss: 6.9274 (7.0323) Loss: 6.9274 (7.0323)
|
| 476 |
+
2024-09-07,11:13:56 | INFO | Train Epoch: 2 [1642496/2572288 (64%)] Data (t): 0.569 Batch (t): 2.621, 1563.01/s, 781.503/s/gpu LR: 0.000298 Logit Scale: 15.027 Contrastive_loss: 6.8516 (6.9961) Loss: 6.8516 (6.9961)
|
| 477 |
+
2024-09-07,11:18:18 | INFO | Train Epoch: 2 [2052096/2572288 (80%)] Data (t): 0.568 Batch (t): 2.620, 1559.58/s, 779.792/s/gpu LR: 0.000269 Logit Scale: 15.198 Contrastive_loss: 6.9052 (6.9810) Loss: 6.9052 (6.9810)
|
| 478 |
+
2024-09-07,11:22:40 | INFO | Train Epoch: 2 [2461696/2572288 (96%)] Data (t): 0.568 Batch (t): 2.621, 1561.78/s, 780.890/s/gpu LR: 0.000239 Logit Scale: 15.340 Contrastive_loss: 6.7375 (6.9462) Loss: 6.7375 (6.9462)
|
| 479 |
+
2024-09-07,11:23:51 | INFO | Train Epoch: 2 [2572288/2572288 (100%)] Data (t): 0.567 Batch (t): 2.619, 1575.79/s, 787.897/s/gpu LR: 0.000231 Logit Scale: 15.374 Contrastive_loss: 6.8204 (6.9305) Loss: 6.8204 (6.9305)
|
| 480 |
+
2024-09-07,11:23:54 | INFO | Start epoch 3
|
| 481 |
+
2024-09-07,11:24:05 | INFO | Train Epoch: 3 [ 4096/2572288 (0%)] Data (t): 9.380 Batch (t): 11.428, 358.426/s, 179.213/s/gpu LR: 0.000231 Logit Scale: 15.375 Contrastive_loss: 6.6847 (6.6847) Loss: 6.6847 (6.6847)
|
| 482 |
+
2024-09-07,11:28:26 | INFO | Train Epoch: 3 [ 413696/2572288 (16%)] Data (t): 0.552 Batch (t): 2.613, 1562.80/s, 781.399/s/gpu LR: 0.000202 Logit Scale: 15.524 Contrastive_loss: 6.5905 (6.6376) Loss: 6.5905 (6.6376)
|
| 483 |
+
2024-09-07,11:32:48 | INFO | Train Epoch: 3 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.617, 1563.18/s, 781.589/s/gpu LR: 0.000173 Logit Scale: 15.666 Contrastive_loss: 6.5036 (6.5929) Loss: 6.5036 (6.5929)
|
| 484 |
+
2024-09-07,11:37:10 | INFO | Train Epoch: 3 [1232896/2572288 (48%)] Data (t): 0.568 Batch (t): 2.620, 1562.99/s, 781.497/s/gpu LR: 0.000145 Logit Scale: 15.784 Contrastive_loss: 6.2833 (6.5155) Loss: 6.2833 (6.5155)
|
| 485 |
+
2024-09-07,11:41:32 | INFO | Train Epoch: 3 [1642496/2572288 (64%)] Data (t): 0.565 Batch (t): 2.618, 1568.86/s, 784.430/s/gpu LR: 0.000119 Logit Scale: 15.895 Contrastive_loss: 6.2988 (6.4722) Loss: 6.2988 (6.4722)
|
| 486 |
+
2024-09-07,11:45:54 | INFO | Train Epoch: 3 [2052096/2572288 (80%)] Data (t): 0.564 Batch (t): 2.617, 1566.40/s, 783.198/s/gpu LR: 0.000095 Logit Scale: 16.002 Contrastive_loss: 6.3952 (6.4594) Loss: 6.3952 (6.4594)
|
| 487 |
+
2024-09-07,11:50:16 | INFO | Train Epoch: 3 [2461696/2572288 (96%)] Data (t): 0.566 Batch (t): 2.619, 1568.68/s, 784.342/s/gpu LR: 0.000072 Logit Scale: 16.096 Contrastive_loss: 6.1727 (6.4184) Loss: 6.1727 (6.4184)
|
| 488 |
+
2024-09-07,11:51:26 | INFO | Train Epoch: 3 [2572288/2572288 (100%)] Data (t): 0.567 Batch (t): 2.619, 1575.88/s, 787.941/s/gpu LR: 0.000067 Logit Scale: 16.118 Contrastive_loss: 6.3202 (6.4061) Loss: 6.3202 (6.4061)
|
| 489 |
+
2024-09-07,11:51:29 | INFO | Start epoch 4
|
| 490 |
+
2024-09-07,11:51:41 | INFO | Train Epoch: 4 [ 4096/2572288 (0%)] Data (t): 9.530 Batch (t): 11.577, 353.813/s, 176.906/s/gpu LR: 0.000067 Logit Scale: 16.118 Contrastive_loss: 6.2788 (6.2788) Loss: 6.2788 (6.2788)
|
| 491 |
+
2024-09-07,11:56:02 | INFO | Train Epoch: 4 [ 413696/2572288 (16%)] Data (t): 0.554 Batch (t): 2.615, 1562.67/s, 781.334/s/gpu LR: 0.000048 Logit Scale: 16.177 Contrastive_loss: 6.4553 (6.3671) Loss: 6.4553 (6.3671)
|
| 492 |
+
2024-09-07,12:00:24 | INFO | Train Epoch: 4 [ 823296/2572288 (32%)] Data (t): 0.563 Batch (t): 2.616, 1563.44/s, 781.721/s/gpu LR: 0.000032 Logit Scale: 16.220 Contrastive_loss: 6.3225 (6.3522) Loss: 6.3225 (6.3522)
|
| 493 |
+
2024-09-07,12:04:45 | INFO | Train Epoch: 4 [1232896/2572288 (48%)] Data (t): 0.563 Batch (t): 2.615, 1567.49/s, 783.743/s/gpu LR: 0.000019 Logit Scale: 16.246 Contrastive_loss: 6.2155 (6.3180) Loss: 6.2155 (6.3180)
|
| 494 |
+
2024-09-07,12:09:07 | INFO | Train Epoch: 4 [1642496/2572288 (64%)] Data (t): 0.565 Batch (t): 2.619, 1563.51/s, 781.753/s/gpu LR: 0.000009 Logit Scale: 16.261 Contrastive_loss: 6.4286 (6.3401) Loss: 6.4286 (6.3401)
|
| 495 |
+
2024-09-07,12:13:29 | INFO | Train Epoch: 4 [2052096/2572288 (80%)] Data (t): 0.563 Batch (t): 2.616, 1565.01/s, 782.504/s/gpu LR: 0.000003 Logit Scale: 16.267 Contrastive_loss: 5.7948 (6.2493) Loss: 5.7948 (6.2493)
|
| 496 |
+
2024-09-07,12:17:51 | INFO | Train Epoch: 4 [2461696/2572288 (96%)] Data (t): 0.564 Batch (t): 2.618, 1563.70/s, 781.850/s/gpu LR: 0.000000 Logit Scale: 16.268 Contrastive_loss: 6.4187 (6.2735) Loss: 6.4187 (6.2735)
|
| 497 |
+
2024-09-07,12:19:01 | INFO | Train Epoch: 4 [2572288/2572288 (100%)] Data (t): 0.563 Batch (t): 2.615, 1572.01/s, 786.003/s/gpu LR: 0.000000 Logit Scale: 16.268 Contrastive_loss: 6.3365 (6.2813) Loss: 6.3365 (6.2813)
|
breaking_0.7_trained/70_most_difficult/params.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 1
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
checkpoint_path: /home/breaking_0.7_trained/70_most_difficult/checkpoints
|
| 7 |
+
coca_caption_loss_weight: 2.0
|
| 8 |
+
coca_contrastive_loss_weight: 1.0
|
| 9 |
+
copy_codebase: False
|
| 10 |
+
csv_caption_key: title
|
| 11 |
+
csv_img_key: filepath
|
| 12 |
+
csv_separator:
|
| 13 |
+
dataset_resampled: True
|
| 14 |
+
dataset_type: webdataset
|
| 15 |
+
ddp_static_graph: True
|
| 16 |
+
debug: False
|
| 17 |
+
delete_previous_checkpoint: False
|
| 18 |
+
device: cuda:0
|
| 19 |
+
dist_backend: nccl
|
| 20 |
+
dist_url: env://
|
| 21 |
+
distill: False
|
| 22 |
+
distill_model: None
|
| 23 |
+
distill_pretrained: None
|
| 24 |
+
distributed: True
|
| 25 |
+
epochs: 5
|
| 26 |
+
epochs_cooldown: None
|
| 27 |
+
eps: 1e-06
|
| 28 |
+
force_custom_text: False
|
| 29 |
+
force_image_size: None
|
| 30 |
+
force_patch_dropout: None
|
| 31 |
+
force_quick_gelu: False
|
| 32 |
+
gather_with_grad: True
|
| 33 |
+
grad_checkpointing: True
|
| 34 |
+
grad_clip_norm: None
|
| 35 |
+
horovod: False
|
| 36 |
+
image_mean: None
|
| 37 |
+
image_std: None
|
| 38 |
+
imagenet_v2: None
|
| 39 |
+
imagenet_val: None
|
| 40 |
+
local_loss: True
|
| 41 |
+
local_rank: 0
|
| 42 |
+
lock_image: False
|
| 43 |
+
lock_image_freeze_bn_stats: False
|
| 44 |
+
lock_image_unlocked_groups: 0
|
| 45 |
+
lock_text: False
|
| 46 |
+
lock_text_freeze_layer_norm: False
|
| 47 |
+
lock_text_unlocked_layers: 0
|
| 48 |
+
log_every_n_steps: 100
|
| 49 |
+
log_level: 20
|
| 50 |
+
log_local: False
|
| 51 |
+
log_path: /home/breaking_0.7_trained/70_most_difficult/out.log
|
| 52 |
+
logs: /home/breaking_0.7_trained
|
| 53 |
+
lr: 0.0005
|
| 54 |
+
lr_cooldown_end: 0.0
|
| 55 |
+
lr_cooldown_power: 1.0
|
| 56 |
+
lr_scheduler: cosine
|
| 57 |
+
model: ViT-B-32
|
| 58 |
+
name: 70_most_difficult
|
| 59 |
+
no_set_device_rank: False
|
| 60 |
+
precision: amp
|
| 61 |
+
pretrained:
|
| 62 |
+
pretrained_image: False
|
| 63 |
+
rank: 0
|
| 64 |
+
remote_sync: None
|
| 65 |
+
remote_sync_frequency: 300
|
| 66 |
+
remote_sync_protocol: s3
|
| 67 |
+
report_to: wandb
|
| 68 |
+
resume: None
|
| 69 |
+
save_frequency: 0
|
| 70 |
+
save_most_recent: True
|
| 71 |
+
seed: 0
|
| 72 |
+
skip_scheduler: False
|
| 73 |
+
tensorboard: False
|
| 74 |
+
tensorboard_path:
|
| 75 |
+
torchscript: False
|
| 76 |
+
trace: False
|
| 77 |
+
train_data: /home/breaking_0.7/{00000000..00000763}.tar
|
| 78 |
+
train_data_upsampling_factors: None
|
| 79 |
+
train_num_samples: 2560000
|
| 80 |
+
use_bn_sync: False
|
| 81 |
+
val_data: None
|
| 82 |
+
val_frequency: 1
|
| 83 |
+
val_num_samples: None
|
| 84 |
+
wandb: True
|
| 85 |
+
wandb_notes:
|
| 86 |
+
wandb_project_name: clip_text_hq_clusters
|
| 87 |
+
warmup: 500
|
| 88 |
+
wd: 0.2
|
| 89 |
+
workers: 4
|
| 90 |
+
world_size: 2
|
| 91 |
+
zeroshot_frequency: 2
|
breaking_0.9_trained/90_most_difficult/checkpoints/epoch_5.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:385d8998b34593532134cb9e27569b8fa26041c7883f44a2879ac5dd5fb63831
|
| 3 |
+
size 1815701601
|
breaking_0.9_trained/90_most_difficult/checkpoints/epoch_latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf4bcef741c6277c38eda5a8a2b606b7790be2bf823e8e2570387b786a6a4b07
|
| 3 |
+
size 1815639289
|
breaking_0.9_trained/90_most_difficult/info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:497529ead8ce8bbef29d5773f843ad88a6775e7828bc7bf21a40a5acae703618
|
| 3 |
+
size 321
|
breaking_0.9_trained/90_most_difficult/out.log
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2024-09-07,12:35:40 | INFO | No latest resume checkpoint found in /home/breaking_0.9_trained/90_most_difficult/checkpoints.
|
| 2 |
+
2024-09-07,12:35:41 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 2.
|
| 3 |
+
2024-09-07,12:35:41 | INFO | Loaded ViT-B-32 model config.
|
| 4 |
+
2024-09-07,12:35:42 | INFO | Model:
|
| 5 |
+
2024-09-07,12:35:42 | INFO | CLIP(
|
| 6 |
+
(visual): VisionTransformer(
|
| 7 |
+
(patchnorm_pre_ln): Identity()
|
| 8 |
+
(conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
|
| 9 |
+
(patch_dropout): Identity()
|
| 10 |
+
(ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 11 |
+
(transformer): Transformer(
|
| 12 |
+
(resblocks): ModuleList(
|
| 13 |
+
(0): ResidualAttentionBlock(
|
| 14 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 15 |
+
(attn): MultiheadAttention(
|
| 16 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 17 |
+
)
|
| 18 |
+
(ls_1): Identity()
|
| 19 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 20 |
+
(mlp): Sequential(
|
| 21 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 22 |
+
(gelu): GELU(approximate='none')
|
| 23 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 24 |
+
)
|
| 25 |
+
(ls_2): Identity()
|
| 26 |
+
)
|
| 27 |
+
(1): ResidualAttentionBlock(
|
| 28 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 29 |
+
(attn): MultiheadAttention(
|
| 30 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 31 |
+
)
|
| 32 |
+
(ls_1): Identity()
|
| 33 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 34 |
+
(mlp): Sequential(
|
| 35 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 36 |
+
(gelu): GELU(approximate='none')
|
| 37 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 38 |
+
)
|
| 39 |
+
(ls_2): Identity()
|
| 40 |
+
)
|
| 41 |
+
(2): ResidualAttentionBlock(
|
| 42 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 43 |
+
(attn): MultiheadAttention(
|
| 44 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 45 |
+
)
|
| 46 |
+
(ls_1): Identity()
|
| 47 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 48 |
+
(mlp): Sequential(
|
| 49 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 50 |
+
(gelu): GELU(approximate='none')
|
| 51 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 52 |
+
)
|
| 53 |
+
(ls_2): Identity()
|
| 54 |
+
)
|
| 55 |
+
(3): ResidualAttentionBlock(
|
| 56 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 57 |
+
(attn): MultiheadAttention(
|
| 58 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 59 |
+
)
|
| 60 |
+
(ls_1): Identity()
|
| 61 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 62 |
+
(mlp): Sequential(
|
| 63 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 64 |
+
(gelu): GELU(approximate='none')
|
| 65 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 66 |
+
)
|
| 67 |
+
(ls_2): Identity()
|
| 68 |
+
)
|
| 69 |
+
(4): ResidualAttentionBlock(
|
| 70 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 71 |
+
(attn): MultiheadAttention(
|
| 72 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 73 |
+
)
|
| 74 |
+
(ls_1): Identity()
|
| 75 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 76 |
+
(mlp): Sequential(
|
| 77 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 78 |
+
(gelu): GELU(approximate='none')
|
| 79 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 80 |
+
)
|
| 81 |
+
(ls_2): Identity()
|
| 82 |
+
)
|
| 83 |
+
(5): ResidualAttentionBlock(
|
| 84 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 85 |
+
(attn): MultiheadAttention(
|
| 86 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 87 |
+
)
|
| 88 |
+
(ls_1): Identity()
|
| 89 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 90 |
+
(mlp): Sequential(
|
| 91 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 92 |
+
(gelu): GELU(approximate='none')
|
| 93 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 94 |
+
)
|
| 95 |
+
(ls_2): Identity()
|
| 96 |
+
)
|
| 97 |
+
(6): ResidualAttentionBlock(
|
| 98 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 99 |
+
(attn): MultiheadAttention(
|
| 100 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 101 |
+
)
|
| 102 |
+
(ls_1): Identity()
|
| 103 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 104 |
+
(mlp): Sequential(
|
| 105 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 106 |
+
(gelu): GELU(approximate='none')
|
| 107 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 108 |
+
)
|
| 109 |
+
(ls_2): Identity()
|
| 110 |
+
)
|
| 111 |
+
(7): ResidualAttentionBlock(
|
| 112 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 113 |
+
(attn): MultiheadAttention(
|
| 114 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 115 |
+
)
|
| 116 |
+
(ls_1): Identity()
|
| 117 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 118 |
+
(mlp): Sequential(
|
| 119 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 120 |
+
(gelu): GELU(approximate='none')
|
| 121 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 122 |
+
)
|
| 123 |
+
(ls_2): Identity()
|
| 124 |
+
)
|
| 125 |
+
(8): ResidualAttentionBlock(
|
| 126 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 127 |
+
(attn): MultiheadAttention(
|
| 128 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 129 |
+
)
|
| 130 |
+
(ls_1): Identity()
|
| 131 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 132 |
+
(mlp): Sequential(
|
| 133 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 134 |
+
(gelu): GELU(approximate='none')
|
| 135 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 136 |
+
)
|
| 137 |
+
(ls_2): Identity()
|
| 138 |
+
)
|
| 139 |
+
(9): ResidualAttentionBlock(
|
| 140 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 141 |
+
(attn): MultiheadAttention(
|
| 142 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 143 |
+
)
|
| 144 |
+
(ls_1): Identity()
|
| 145 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 146 |
+
(mlp): Sequential(
|
| 147 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 148 |
+
(gelu): GELU(approximate='none')
|
| 149 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 150 |
+
)
|
| 151 |
+
(ls_2): Identity()
|
| 152 |
+
)
|
| 153 |
+
(10): ResidualAttentionBlock(
|
| 154 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 155 |
+
(attn): MultiheadAttention(
|
| 156 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 157 |
+
)
|
| 158 |
+
(ls_1): Identity()
|
| 159 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 160 |
+
(mlp): Sequential(
|
| 161 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 162 |
+
(gelu): GELU(approximate='none')
|
| 163 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 164 |
+
)
|
| 165 |
+
(ls_2): Identity()
|
| 166 |
+
)
|
| 167 |
+
(11): ResidualAttentionBlock(
|
| 168 |
+
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 169 |
+
(attn): MultiheadAttention(
|
| 170 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
|
| 171 |
+
)
|
| 172 |
+
(ls_1): Identity()
|
| 173 |
+
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 174 |
+
(mlp): Sequential(
|
| 175 |
+
(c_fc): Linear(in_features=768, out_features=3072, bias=True)
|
| 176 |
+
(gelu): GELU(approximate='none')
|
| 177 |
+
(c_proj): Linear(in_features=3072, out_features=768, bias=True)
|
| 178 |
+
)
|
| 179 |
+
(ls_2): Identity()
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
(ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
|
| 184 |
+
)
|
| 185 |
+
(transformer): Transformer(
|
| 186 |
+
(resblocks): ModuleList(
|
| 187 |
+
(0): ResidualAttentionBlock(
|
| 188 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 189 |
+
(attn): MultiheadAttention(
|
| 190 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 191 |
+
)
|
| 192 |
+
(ls_1): Identity()
|
| 193 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 194 |
+
(mlp): Sequential(
|
| 195 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 196 |
+
(gelu): GELU(approximate='none')
|
| 197 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 198 |
+
)
|
| 199 |
+
(ls_2): Identity()
|
| 200 |
+
)
|
| 201 |
+
(1): ResidualAttentionBlock(
|
| 202 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 203 |
+
(attn): MultiheadAttention(
|
| 204 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 205 |
+
)
|
| 206 |
+
(ls_1): Identity()
|
| 207 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 208 |
+
(mlp): Sequential(
|
| 209 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 210 |
+
(gelu): GELU(approximate='none')
|
| 211 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 212 |
+
)
|
| 213 |
+
(ls_2): Identity()
|
| 214 |
+
)
|
| 215 |
+
(2): ResidualAttentionBlock(
|
| 216 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 217 |
+
(attn): MultiheadAttention(
|
| 218 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 219 |
+
)
|
| 220 |
+
(ls_1): Identity()
|
| 221 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 222 |
+
(mlp): Sequential(
|
| 223 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 224 |
+
(gelu): GELU(approximate='none')
|
| 225 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 226 |
+
)
|
| 227 |
+
(ls_2): Identity()
|
| 228 |
+
)
|
| 229 |
+
(3): ResidualAttentionBlock(
|
| 230 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 231 |
+
(attn): MultiheadAttention(
|
| 232 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 233 |
+
)
|
| 234 |
+
(ls_1): Identity()
|
| 235 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 236 |
+
(mlp): Sequential(
|
| 237 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 238 |
+
(gelu): GELU(approximate='none')
|
| 239 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 240 |
+
)
|
| 241 |
+
(ls_2): Identity()
|
| 242 |
+
)
|
| 243 |
+
(4): ResidualAttentionBlock(
|
| 244 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 245 |
+
(attn): MultiheadAttention(
|
| 246 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 247 |
+
)
|
| 248 |
+
(ls_1): Identity()
|
| 249 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 250 |
+
(mlp): Sequential(
|
| 251 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 252 |
+
(gelu): GELU(approximate='none')
|
| 253 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 254 |
+
)
|
| 255 |
+
(ls_2): Identity()
|
| 256 |
+
)
|
| 257 |
+
(5): ResidualAttentionBlock(
|
| 258 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 259 |
+
(attn): MultiheadAttention(
|
| 260 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 261 |
+
)
|
| 262 |
+
(ls_1): Identity()
|
| 263 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 264 |
+
(mlp): Sequential(
|
| 265 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 266 |
+
(gelu): GELU(approximate='none')
|
| 267 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 268 |
+
)
|
| 269 |
+
(ls_2): Identity()
|
| 270 |
+
)
|
| 271 |
+
(6): ResidualAttentionBlock(
|
| 272 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 273 |
+
(attn): MultiheadAttention(
|
| 274 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 275 |
+
)
|
| 276 |
+
(ls_1): Identity()
|
| 277 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 278 |
+
(mlp): Sequential(
|
| 279 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 280 |
+
(gelu): GELU(approximate='none')
|
| 281 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 282 |
+
)
|
| 283 |
+
(ls_2): Identity()
|
| 284 |
+
)
|
| 285 |
+
(7): ResidualAttentionBlock(
|
| 286 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 287 |
+
(attn): MultiheadAttention(
|
| 288 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 289 |
+
)
|
| 290 |
+
(ls_1): Identity()
|
| 291 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 292 |
+
(mlp): Sequential(
|
| 293 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 294 |
+
(gelu): GELU(approximate='none')
|
| 295 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 296 |
+
)
|
| 297 |
+
(ls_2): Identity()
|
| 298 |
+
)
|
| 299 |
+
(8): ResidualAttentionBlock(
|
| 300 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 301 |
+
(attn): MultiheadAttention(
|
| 302 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 303 |
+
)
|
| 304 |
+
(ls_1): Identity()
|
| 305 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 306 |
+
(mlp): Sequential(
|
| 307 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 308 |
+
(gelu): GELU(approximate='none')
|
| 309 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 310 |
+
)
|
| 311 |
+
(ls_2): Identity()
|
| 312 |
+
)
|
| 313 |
+
(9): ResidualAttentionBlock(
|
| 314 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 315 |
+
(attn): MultiheadAttention(
|
| 316 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 317 |
+
)
|
| 318 |
+
(ls_1): Identity()
|
| 319 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 320 |
+
(mlp): Sequential(
|
| 321 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 322 |
+
(gelu): GELU(approximate='none')
|
| 323 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 324 |
+
)
|
| 325 |
+
(ls_2): Identity()
|
| 326 |
+
)
|
| 327 |
+
(10): ResidualAttentionBlock(
|
| 328 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 329 |
+
(attn): MultiheadAttention(
|
| 330 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 331 |
+
)
|
| 332 |
+
(ls_1): Identity()
|
| 333 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 334 |
+
(mlp): Sequential(
|
| 335 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 336 |
+
(gelu): GELU(approximate='none')
|
| 337 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 338 |
+
)
|
| 339 |
+
(ls_2): Identity()
|
| 340 |
+
)
|
| 341 |
+
(11): ResidualAttentionBlock(
|
| 342 |
+
(ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 343 |
+
(attn): MultiheadAttention(
|
| 344 |
+
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
|
| 345 |
+
)
|
| 346 |
+
(ls_1): Identity()
|
| 347 |
+
(ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 348 |
+
(mlp): Sequential(
|
| 349 |
+
(c_fc): Linear(in_features=512, out_features=2048, bias=True)
|
| 350 |
+
(gelu): GELU(approximate='none')
|
| 351 |
+
(c_proj): Linear(in_features=2048, out_features=512, bias=True)
|
| 352 |
+
)
|
| 353 |
+
(ls_2): Identity()
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
)
|
| 357 |
+
(token_embedding): Embedding(49408, 512)
|
| 358 |
+
(ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
|
| 359 |
+
)
|
| 360 |
+
2024-09-07,12:35:42 | INFO | Params:
|
| 361 |
+
2024-09-07,12:35:42 | INFO | accum_freq: 1
|
| 362 |
+
2024-09-07,12:35:42 | INFO | aug_cfg: {}
|
| 363 |
+
2024-09-07,12:35:42 | INFO | batch_size: 2048
|
| 364 |
+
2024-09-07,12:35:42 | INFO | beta1: 0.9
|
| 365 |
+
2024-09-07,12:35:42 | INFO | beta2: 0.98
|
| 366 |
+
2024-09-07,12:35:42 | INFO | checkpoint_path: /home/breaking_0.9_trained/90_most_difficult/checkpoints
|
| 367 |
+
2024-09-07,12:35:42 | INFO | coca_caption_loss_weight: 2.0
|
| 368 |
+
2024-09-07,12:35:42 | INFO | coca_contrastive_loss_weight: 1.0
|
| 369 |
+
2024-09-07,12:35:42 | INFO | copy_codebase: False
|
| 370 |
+
2024-09-07,12:35:42 | INFO | csv_caption_key: title
|
| 371 |
+
2024-09-07,12:35:42 | INFO | csv_img_key: filepath
|
| 372 |
+
2024-09-07,12:35:42 | INFO | csv_separator:
|
| 373 |
+
2024-09-07,12:35:42 | INFO | dataset_resampled: True
|
| 374 |
+
2024-09-07,12:35:42 | INFO | dataset_type: webdataset
|
| 375 |
+
2024-09-07,12:35:42 | INFO | ddp_static_graph: True
|
| 376 |
+
2024-09-07,12:35:42 | INFO | debug: False
|
| 377 |
+
2024-09-07,12:35:42 | INFO | delete_previous_checkpoint: False
|
| 378 |
+
2024-09-07,12:35:42 | INFO | device: cuda:0
|
| 379 |
+
2024-09-07,12:35:42 | INFO | dist_backend: nccl
|
| 380 |
+
2024-09-07,12:35:42 | INFO | dist_url: env://
|
| 381 |
+
2024-09-07,12:35:42 | INFO | distill: False
|
| 382 |
+
2024-09-07,12:35:42 | INFO | distill_model: None
|
| 383 |
+
2024-09-07,12:35:42 | INFO | distill_pretrained: None
|
| 384 |
+
2024-09-07,12:35:42 | INFO | distributed: True
|
| 385 |
+
2024-09-07,12:35:42 | INFO | epochs: 5
|
| 386 |
+
2024-09-07,12:35:42 | INFO | epochs_cooldown: None
|
| 387 |
+
2024-09-07,12:35:42 | INFO | eps: 1e-06
|
| 388 |
+
2024-09-07,12:35:42 | INFO | force_custom_text: False
|
| 389 |
+
2024-09-07,12:35:42 | INFO | force_image_size: None
|
| 390 |
+
2024-09-07,12:35:42 | INFO | force_patch_dropout: None
|
| 391 |
+
2024-09-07,12:35:42 | INFO | force_quick_gelu: False
|
| 392 |
+
2024-09-07,12:35:42 | INFO | gather_with_grad: True
|
| 393 |
+
2024-09-07,12:35:42 | INFO | grad_checkpointing: True
|
| 394 |
+
2024-09-07,12:35:42 | INFO | grad_clip_norm: None
|
| 395 |
+
2024-09-07,12:35:42 | INFO | horovod: False
|
| 396 |
+
2024-09-07,12:35:42 | INFO | image_mean: None
|
| 397 |
+
2024-09-07,12:35:42 | INFO | image_std: None
|
| 398 |
+
2024-09-07,12:35:42 | INFO | imagenet_v2: None
|
| 399 |
+
2024-09-07,12:35:42 | INFO | imagenet_val: None
|
| 400 |
+
2024-09-07,12:35:42 | INFO | local_loss: True
|
| 401 |
+
2024-09-07,12:35:42 | INFO | local_rank: 0
|
| 402 |
+
2024-09-07,12:35:42 | INFO | lock_image: False
|
| 403 |
+
2024-09-07,12:35:42 | INFO | lock_image_freeze_bn_stats: False
|
| 404 |
+
2024-09-07,12:35:42 | INFO | lock_image_unlocked_groups: 0
|
| 405 |
+
2024-09-07,12:35:42 | INFO | lock_text: False
|
| 406 |
+
2024-09-07,12:35:42 | INFO | lock_text_freeze_layer_norm: False
|
| 407 |
+
2024-09-07,12:35:42 | INFO | lock_text_unlocked_layers: 0
|
| 408 |
+
2024-09-07,12:35:42 | INFO | log_every_n_steps: 100
|
| 409 |
+
2024-09-07,12:35:42 | INFO | log_level: 20
|
| 410 |
+
2024-09-07,12:35:42 | INFO | log_local: False
|
| 411 |
+
2024-09-07,12:35:42 | INFO | log_path: /home/breaking_0.9_trained/90_most_difficult/out.log
|
| 412 |
+
2024-09-07,12:35:42 | INFO | logs: /home/breaking_0.9_trained
|
| 413 |
+
2024-09-07,12:35:42 | INFO | lr: 0.0005
|
| 414 |
+
2024-09-07,12:35:42 | INFO | lr_cooldown_end: 0.0
|
| 415 |
+
2024-09-07,12:35:42 | INFO | lr_cooldown_power: 1.0
|
| 416 |
+
2024-09-07,12:35:42 | INFO | lr_scheduler: cosine
|
| 417 |
+
2024-09-07,12:35:42 | INFO | model: ViT-B-32
|
| 418 |
+
2024-09-07,12:35:42 | INFO | name: 90_most_difficult
|
| 419 |
+
2024-09-07,12:35:42 | INFO | no_set_device_rank: False
|
| 420 |
+
2024-09-07,12:35:42 | INFO | precision: amp
|
| 421 |
+
2024-09-07,12:35:42 | INFO | pretrained:
|
| 422 |
+
2024-09-07,12:35:42 | INFO | pretrained_image: False
|
| 423 |
+
2024-09-07,12:35:42 | INFO | rank: 0
|
| 424 |
+
2024-09-07,12:35:42 | INFO | remote_sync: None
|
| 425 |
+
2024-09-07,12:35:42 | INFO | remote_sync_frequency: 300
|
| 426 |
+
2024-09-07,12:35:42 | INFO | remote_sync_protocol: s3
|
| 427 |
+
2024-09-07,12:35:42 | INFO | report_to: wandb
|
| 428 |
+
2024-09-07,12:35:42 | INFO | resume: None
|
| 429 |
+
2024-09-07,12:35:42 | INFO | save_frequency: 0
|
| 430 |
+
2024-09-07,12:35:42 | INFO | save_most_recent: True
|
| 431 |
+
2024-09-07,12:35:42 | INFO | seed: 0
|
| 432 |
+
2024-09-07,12:35:42 | INFO | skip_scheduler: False
|
| 433 |
+
2024-09-07,12:35:42 | INFO | tensorboard: False
|
| 434 |
+
2024-09-07,12:35:42 | INFO | tensorboard_path:
|
| 435 |
+
2024-09-07,12:35:42 | INFO | torchscript: False
|
| 436 |
+
2024-09-07,12:35:42 | INFO | trace: False
|
| 437 |
+
2024-09-07,12:35:42 | INFO | train_data: /home/breaking_0.9/{00000000..00000962}.tar
|
| 438 |
+
2024-09-07,12:35:42 | INFO | train_data_upsampling_factors: None
|
| 439 |
+
2024-09-07,12:35:42 | INFO | train_num_samples: 2560000
|
| 440 |
+
2024-09-07,12:35:42 | INFO | use_bn_sync: False
|
| 441 |
+
2024-09-07,12:35:42 | INFO | val_data: None
|
| 442 |
+
2024-09-07,12:35:42 | INFO | val_frequency: 1
|
| 443 |
+
2024-09-07,12:35:42 | INFO | val_num_samples: None
|
| 444 |
+
2024-09-07,12:35:42 | INFO | wandb: True
|
| 445 |
+
2024-09-07,12:35:42 | INFO | wandb_notes:
|
| 446 |
+
2024-09-07,12:35:42 | INFO | wandb_project_name: clip_text_hq_clusters
|
| 447 |
+
2024-09-07,12:35:42 | INFO | warmup: 500
|
| 448 |
+
2024-09-07,12:35:42 | INFO | wd: 0.2
|
| 449 |
+
2024-09-07,12:35:42 | INFO | workers: 4
|
| 450 |
+
2024-09-07,12:35:42 | INFO | world_size: 2
|
| 451 |
+
2024-09-07,12:35:42 | INFO | zeroshot_frequency: 2
|
| 452 |
+
2024-09-07,12:35:50 | INFO | Start epoch 0
|
| 453 |
+
2024-09-07,12:36:06 | INFO | Train Epoch: 0 [ 4096/2572288 (0%)] Data (t): 12.100 Batch (t): 16.653, 245.968/s, 122.984/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.3763 (8.3763) Loss: 8.3763 (8.3763)
|
| 454 |
+
2024-09-07,12:36:10 | INFO | Reducer buckets have been rebuilt in this iteration.
|
| 455 |
+
2024-09-07,12:40:27 | INFO | Train Epoch: 0 [ 413696/2572288 (16%)] Data (t): 0.552 Batch (t): 2.604, 1566.14/s, 783.069/s/gpu LR: 0.000101 Logit Scale: 14.266 Contrastive_loss: 8.1784 (8.2774) Loss: 8.1784 (8.2774)
|
| 456 |
+
2024-09-07,12:44:48 | INFO | Train Epoch: 0 [ 823296/2572288 (32%)] Data (t): 0.564 Batch (t): 2.612, 1563.16/s, 781.581/s/gpu LR: 0.000201 Logit Scale: 14.228 Contrastive_loss: 7.9988 (8.1845) Loss: 7.9988 (8.1845)
|
| 457 |
+
2024-09-07,12:49:09 | INFO | Train Epoch: 0 [1232896/2572288 (48%)] Data (t): 0.563 Batch (t): 2.613, 1564.24/s, 782.121/s/gpu LR: 0.000301 Logit Scale: 14.184 Contrastive_loss: 7.9886 (8.1355) Loss: 7.9886 (8.1355)
|
| 458 |
+
2024-09-07,12:53:31 | INFO | Train Epoch: 0 [1642496/2572288 (64%)] Data (t): 0.563 Batch (t): 2.612, 1570.66/s, 785.331/s/gpu LR: 0.000401 Logit Scale: 14.136 Contrastive_loss: 7.8946 (8.0873) Loss: 7.8946 (8.0873)
|
| 459 |
+
2024-09-07,12:57:52 | INFO | Train Epoch: 0 [2052096/2572288 (80%)] Data (t): 0.559 Batch (t): 2.609, 1567.63/s, 783.816/s/gpu LR: 0.000500 Logit Scale: 14.088 Contrastive_loss: 7.8069 (8.0406) Loss: 7.8069 (8.0406)
|
| 460 |
+
2024-09-07,13:02:13 | INFO | Train Epoch: 0 [2461696/2572288 (96%)] Data (t): 0.560 Batch (t): 2.611, 1576.40/s, 788.198/s/gpu LR: 0.000498 Logit Scale: 14.064 Contrastive_loss: 7.7242 (7.9954) Loss: 7.7242 (7.9954)
|
| 461 |
+
2024-09-07,13:03:23 | INFO | Train Epoch: 0 [2572288/2572288 (100%)] Data (t): 0.557 Batch (t): 2.607, 1578.68/s, 789.338/s/gpu LR: 0.000497 Logit Scale: 14.063 Contrastive_loss: 7.6876 (7.9569) Loss: 7.6876 (7.9569)
|
| 462 |
+
2024-09-07,13:03:26 | INFO | Start epoch 1
|
| 463 |
+
2024-09-07,13:03:37 | INFO | Train Epoch: 1 [ 4096/2572288 (0%)] Data (t): 9.636 Batch (t): 11.680, 350.674/s, 175.337/s/gpu LR: 0.000497 Logit Scale: 14.063 Contrastive_loss: 7.6917 (7.6917) Loss: 7.6917 (7.6917)
|
| 464 |
+
2024-09-07,13:07:57 | INFO | Train Epoch: 1 [ 413696/2572288 (16%)] Data (t): 0.538 Batch (t): 2.597, 1573.69/s, 786.847/s/gpu LR: 0.000491 Logit Scale: 14.065 Contrastive_loss: 7.6440 (7.6679) Loss: 7.6440 (7.6679)
|
| 465 |
+
2024-09-07,13:12:18 | INFO | Train Epoch: 1 [ 823296/2572288 (32%)] Data (t): 0.557 Batch (t): 2.609, 1572.22/s, 786.112/s/gpu LR: 0.000481 Logit Scale: 14.094 Contrastive_loss: 7.5110 (7.6156) Loss: 7.5110 (7.6156)
|
| 466 |
+
2024-09-07,13:16:39 | INFO | Train Epoch: 1 [1232896/2572288 (48%)] Data (t): 0.557 Batch (t): 2.609, 1571.16/s, 785.581/s/gpu LR: 0.000468 Logit Scale: 14.146 Contrastive_loss: 7.5073 (7.5885) Loss: 7.5073 (7.5885)
|
| 467 |
+
2024-09-07,13:20:59 | INFO | Train Epoch: 1 [1642496/2572288 (64%)] Data (t): 0.557 Batch (t): 2.607, 1575.14/s, 787.570/s/gpu LR: 0.000452 Logit Scale: 14.215 Contrastive_loss: 7.3952 (7.5499) Loss: 7.3952 (7.5499)
|
| 468 |
+
2024-09-07,13:25:20 | INFO | Train Epoch: 1 [2052096/2572288 (80%)] Data (t): 0.560 Batch (t): 2.610, 1553.88/s, 776.941/s/gpu LR: 0.000433 Logit Scale: 14.321 Contrastive_loss: 7.3651 (7.5191) Loss: 7.3651 (7.5191)
|
| 469 |
+
2024-09-07,13:29:42 | INFO | Train Epoch: 1 [2461696/2572288 (96%)] Data (t): 0.560 Batch (t): 2.611, 1564.99/s, 782.493/s/gpu LR: 0.000412 Logit Scale: 14.443 Contrastive_loss: 7.3117 (7.4894) Loss: 7.3117 (7.4894)
|
| 470 |
+
2024-09-07,13:30:52 | INFO | Train Epoch: 1 [2572288/2572288 (100%)] Data (t): 0.559 Batch (t): 2.607, 1581.65/s, 790.826/s/gpu LR: 0.000406 Logit Scale: 14.479 Contrastive_loss: 7.1874 (7.4517) Loss: 7.1874 (7.4517)
|
| 471 |
+
2024-09-07,13:30:55 | INFO | Start epoch 2
|
| 472 |
+
2024-09-07,13:31:06 | INFO | Train Epoch: 2 [ 4096/2572288 (0%)] Data (t): 9.545 Batch (t): 11.591, 353.370/s, 176.685/s/gpu LR: 0.000405 Logit Scale: 14.481 Contrastive_loss: 7.2456 (7.2456) Loss: 7.2456 (7.2456)
|
| 473 |
+
2024-09-07,13:35:27 | INFO | Train Epoch: 2 [ 413696/2572288 (16%)] Data (t): 0.545 Batch (t): 2.603, 1569.79/s, 784.896/s/gpu LR: 0.000381 Logit Scale: 14.642 Contrastive_loss: 7.2023 (7.2240) Loss: 7.2023 (7.2240)
|
| 474 |
+
2024-09-07,13:39:47 | INFO | Train Epoch: 2 [ 823296/2572288 (32%)] Data (t): 0.559 Batch (t): 2.609, 1571.68/s, 785.841/s/gpu LR: 0.000355 Logit Scale: 14.807 Contrastive_loss: 7.0309 (7.1596) Loss: 7.0309 (7.1596)
|
| 475 |
+
2024-09-07,13:44:09 | INFO | Train Epoch: 2 [1232896/2572288 (48%)] Data (t): 0.562 Batch (t): 2.611, 1565.52/s, 782.758/s/gpu LR: 0.000327 Logit Scale: 14.927 Contrastive_loss: 7.1046 (7.1459) Loss: 7.1046 (7.1459)
|
| 476 |
+
2024-09-07,13:48:30 | INFO | Train Epoch: 2 [1642496/2572288 (64%)] Data (t): 0.563 Batch (t): 2.614, 1569.29/s, 784.644/s/gpu LR: 0.000298 Logit Scale: 15.085 Contrastive_loss: 6.8606 (7.0888) Loss: 6.8606 (7.0888)
|
| 477 |
+
2024-09-07,13:52:51 | INFO | Train Epoch: 2 [2052096/2572288 (80%)] Data (t): 0.562 Batch (t): 2.614, 1568.08/s, 784.039/s/gpu LR: 0.000269 Logit Scale: 15.223 Contrastive_loss: 6.8216 (7.0443) Loss: 6.8216 (7.0443)
|
| 478 |
+
2024-09-07,13:57:13 | INFO | Train Epoch: 2 [2461696/2572288 (96%)] Data (t): 0.563 Batch (t): 2.613, 1567.16/s, 783.578/s/gpu LR: 0.000239 Logit Scale: 15.374 Contrastive_loss: 6.6735 (6.9913) Loss: 6.6735 (6.9913)
|
| 479 |
+
2024-09-07,13:58:23 | INFO | Train Epoch: 2 [2572288/2572288 (100%)] Data (t): 0.561 Batch (t): 2.611, 1576.59/s, 788.293/s/gpu LR: 0.000231 Logit Scale: 15.422 Contrastive_loss: 6.7912 (6.9663) Loss: 6.7912 (6.9663)
|
| 480 |
+
2024-09-07,13:58:26 | INFO | Start epoch 3
|
| 481 |
+
2024-09-07,13:58:37 | INFO | Train Epoch: 3 [ 4096/2572288 (0%)] Data (t): 9.438 Batch (t): 11.480, 356.781/s, 178.391/s/gpu LR: 0.000231 Logit Scale: 15.423 Contrastive_loss: 6.6685 (6.6685) Loss: 6.6685 (6.6685)
|
| 482 |
+
2024-09-07,14:02:58 | INFO | Train Epoch: 3 [ 413696/2572288 (16%)] Data (t): 0.547 Batch (t): 2.606, 1563.69/s, 781.844/s/gpu LR: 0.000202 Logit Scale: 15.573 Contrastive_loss: 6.7789 (6.7237) Loss: 6.7789 (6.7237)
|
| 483 |
+
2024-09-07,14:07:19 | INFO | Train Epoch: 3 [ 823296/2572288 (32%)] Data (t): 0.560 Batch (t): 2.613, 1571.76/s, 785.878/s/gpu LR: 0.000173 Logit Scale: 15.734 Contrastive_loss: 6.6477 (6.6984) Loss: 6.6477 (6.6984)
|
| 484 |
+
2024-09-07,14:11:41 | INFO | Train Epoch: 3 [1232896/2572288 (48%)] Data (t): 0.561 Batch (t): 2.613, 1567.31/s, 783.654/s/gpu LR: 0.000145 Logit Scale: 15.861 Contrastive_loss: 6.5687 (6.6660) Loss: 6.5687 (6.6660)
|
| 485 |
+
2024-09-07,14:16:02 | INFO | Train Epoch: 3 [1642496/2572288 (64%)] Data (t): 0.563 Batch (t): 2.614, 1571.02/s, 785.509/s/gpu LR: 0.000119 Logit Scale: 15.976 Contrastive_loss: 6.6244 (6.6576) Loss: 6.6244 (6.6576)
|
| 486 |
+
2024-09-07,14:20:23 | INFO | Train Epoch: 3 [2052096/2572288 (80%)] Data (t): 0.562 Batch (t): 2.614, 1569.91/s, 784.953/s/gpu LR: 0.000095 Logit Scale: 16.078 Contrastive_loss: 6.3511 (6.6066) Loss: 6.3511 (6.6066)
|
| 487 |
+
2024-09-07,14:24:45 | INFO | Train Epoch: 3 [2461696/2572288 (96%)] Data (t): 0.565 Batch (t): 2.617, 1565.69/s, 782.846/s/gpu LR: 0.000072 Logit Scale: 16.172 Contrastive_loss: 6.3930 (6.5761) Loss: 6.3930 (6.5761)
|
| 488 |
+
2024-09-07,14:25:56 | INFO | Train Epoch: 3 [2572288/2572288 (100%)] Data (t): 0.559 Batch (t): 2.610, 1579.64/s, 789.822/s/gpu LR: 0.000067 Logit Scale: 16.193 Contrastive_loss: 6.6402 (6.5841) Loss: 6.6402 (6.5841)
|
| 489 |
+
2024-09-07,14:25:58 | INFO | Start epoch 4
|
| 490 |
+
2024-09-07,14:26:10 | INFO | Train Epoch: 4 [ 4096/2572288 (0%)] Data (t): 9.504 Batch (t): 11.549, 354.649/s, 177.325/s/gpu LR: 0.000067 Logit Scale: 16.193 Contrastive_loss: 6.5566 (6.5566) Loss: 6.5566 (6.5566)
|
| 491 |
+
2024-09-07,14:30:31 | INFO | Train Epoch: 4 [ 413696/2572288 (16%)] Data (t): 0.546 Batch (t): 2.607, 1566.76/s, 783.382/s/gpu LR: 0.000048 Logit Scale: 16.260 Contrastive_loss: 6.4124 (6.4845) Loss: 6.4124 (6.4845)
|
| 492 |
+
2024-09-07,14:34:52 | INFO | Train Epoch: 4 [ 823296/2572288 (32%)] Data (t): 0.563 Batch (t): 2.615, 1552.31/s, 776.154/s/gpu LR: 0.000032 Logit Scale: 16.300 Contrastive_loss: 6.3687 (6.4459) Loss: 6.3687 (6.4459)
|
| 493 |
+
2024-09-07,14:39:14 | INFO | Train Epoch: 4 [1232896/2572288 (48%)] Data (t): 0.563 Batch (t): 2.617, 1567.87/s, 783.936/s/gpu LR: 0.000019 Logit Scale: 16.329 Contrastive_loss: 6.3193 (6.4142) Loss: 6.3193 (6.4142)
|
| 494 |
+
2024-09-07,14:43:35 | INFO | Train Epoch: 4 [1642496/2572288 (64%)] Data (t): 0.561 Batch (t): 2.613, 1567.16/s, 783.580/s/gpu LR: 0.000009 Logit Scale: 16.343 Contrastive_loss: 6.3362 (6.3986) Loss: 6.3362 (6.3986)
|
| 495 |
+
2024-09-07,14:47:57 | INFO | Train Epoch: 4 [2052096/2572288 (80%)] Data (t): 0.566 Batch (t): 2.618, 1566.95/s, 783.475/s/gpu LR: 0.000003 Logit Scale: 16.350 Contrastive_loss: 6.1241 (6.3529) Loss: 6.1241 (6.3529)
|
| 496 |
+
2024-09-07,14:52:19 | INFO | Train Epoch: 4 [2461696/2572288 (96%)] Data (t): 0.564 Batch (t): 2.618, 1557.86/s, 778.931/s/gpu LR: 0.000000 Logit Scale: 16.352 Contrastive_loss: 6.2534 (6.3387) Loss: 6.2534 (6.3387)
|
| 497 |
+
2024-09-07,14:53:29 | INFO | Train Epoch: 4 [2572288/2572288 (100%)] Data (t): 0.559 Batch (t): 2.611, 1577.81/s, 788.906/s/gpu LR: 0.000000 Logit Scale: 16.352 Contrastive_loss: 6.4079 (6.3473) Loss: 6.4079 (6.3473)
|
breaking_0.9_trained/90_most_difficult/params.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accum_freq: 1
|
| 2 |
+
aug_cfg: {}
|
| 3 |
+
batch_size: 2048
|
| 4 |
+
beta1: 0.9
|
| 5 |
+
beta2: 0.98
|
| 6 |
+
checkpoint_path: /home/breaking_0.9_trained/90_most_difficult/checkpoints
|
| 7 |
+
coca_caption_loss_weight: 2.0
|
| 8 |
+
coca_contrastive_loss_weight: 1.0
|
| 9 |
+
copy_codebase: False
|
| 10 |
+
csv_caption_key: title
|
| 11 |
+
csv_img_key: filepath
|
| 12 |
+
csv_separator:
|
| 13 |
+
dataset_resampled: True
|
| 14 |
+
dataset_type: webdataset
|
| 15 |
+
ddp_static_graph: True
|
| 16 |
+
debug: False
|
| 17 |
+
delete_previous_checkpoint: False
|
| 18 |
+
device: cuda:0
|
| 19 |
+
dist_backend: nccl
|
| 20 |
+
dist_url: env://
|
| 21 |
+
distill: False
|
| 22 |
+
distill_model: None
|
| 23 |
+
distill_pretrained: None
|
| 24 |
+
distributed: True
|
| 25 |
+
epochs: 5
|
| 26 |
+
epochs_cooldown: None
|
| 27 |
+
eps: 1e-06
|
| 28 |
+
force_custom_text: False
|
| 29 |
+
force_image_size: None
|
| 30 |
+
force_patch_dropout: None
|
| 31 |
+
force_quick_gelu: False
|
| 32 |
+
gather_with_grad: True
|
| 33 |
+
grad_checkpointing: True
|
| 34 |
+
grad_clip_norm: None
|
| 35 |
+
horovod: False
|
| 36 |
+
image_mean: None
|
| 37 |
+
image_std: None
|
| 38 |
+
imagenet_v2: None
|
| 39 |
+
imagenet_val: None
|
| 40 |
+
local_loss: True
|
| 41 |
+
local_rank: 0
|
| 42 |
+
lock_image: False
|
| 43 |
+
lock_image_freeze_bn_stats: False
|
| 44 |
+
lock_image_unlocked_groups: 0
|
| 45 |
+
lock_text: False
|
| 46 |
+
lock_text_freeze_layer_norm: False
|
| 47 |
+
lock_text_unlocked_layers: 0
|
| 48 |
+
log_every_n_steps: 100
|
| 49 |
+
log_level: 20
|
| 50 |
+
log_local: False
|
| 51 |
+
log_path: /home/breaking_0.9_trained/90_most_difficult/out.log
|
| 52 |
+
logs: /home/breaking_0.9_trained
|
| 53 |
+
lr: 0.0005
|
| 54 |
+
lr_cooldown_end: 0.0
|
| 55 |
+
lr_cooldown_power: 1.0
|
| 56 |
+
lr_scheduler: cosine
|
| 57 |
+
model: ViT-B-32
|
| 58 |
+
name: 90_most_difficult
|
| 59 |
+
no_set_device_rank: False
|
| 60 |
+
precision: amp
|
| 61 |
+
pretrained:
|
| 62 |
+
pretrained_image: False
|
| 63 |
+
rank: 0
|
| 64 |
+
remote_sync: None
|
| 65 |
+
remote_sync_frequency: 300
|
| 66 |
+
remote_sync_protocol: s3
|
| 67 |
+
report_to: wandb
|
| 68 |
+
resume: None
|
| 69 |
+
save_frequency: 0
|
| 70 |
+
save_most_recent: True
|
| 71 |
+
seed: 0
|
| 72 |
+
skip_scheduler: False
|
| 73 |
+
tensorboard: False
|
| 74 |
+
tensorboard_path:
|
| 75 |
+
torchscript: False
|
| 76 |
+
trace: False
|
| 77 |
+
train_data: /home/breaking_0.9/{00000000..00000962}.tar
|
| 78 |
+
train_data_upsampling_factors: None
|
| 79 |
+
train_num_samples: 2560000
|
| 80 |
+
use_bn_sync: False
|
| 81 |
+
val_data: None
|
| 82 |
+
val_frequency: 1
|
| 83 |
+
val_num_samples: None
|
| 84 |
+
wandb: True
|
| 85 |
+
wandb_notes:
|
| 86 |
+
wandb_project_name: clip_text_hq_clusters
|
| 87 |
+
warmup: 500
|
| 88 |
+
wd: 0.2
|
| 89 |
+
workers: 4
|
| 90 |
+
world_size: 2
|
| 91 |
+
zeroshot_frequency: 2
|