LamaDiab commited on
Commit
4bf38a7
·
verified ·
1 Parent(s): 9e3d963

Training in progress, epoch 1, checkpoint

Browse files
checkpoint-2071/README.md CHANGED
@@ -49,7 +49,7 @@ model-index:
49
  type: unknown
50
  metrics:
51
  - type: cosine_accuracy
52
- value: 0.9583552479743958
53
  name: Cosine Accuracy
54
  ---
55
 
@@ -114,9 +114,9 @@ print(embeddings.shape)
114
  # Get the similarity scores for the embeddings
115
  similarities = model.similarity(embeddings, embeddings)
116
  print(similarities)
117
- # tensor([[1.0000, 0.9723, 0.2011],
118
- # [0.9723, 1.0000, 0.1684],
119
- # [0.2011, 0.1684, 1.0000]])
120
  ```
121
 
122
  <!--
@@ -153,7 +153,7 @@ You can finetune this model on your own dataset.
153
 
154
  | Metric | Value |
155
  |:--------------------|:-----------|
156
- | **cosine_accuracy** | **0.9584** |
157
 
158
  <!--
159
  ## Bias, Risks and Limitations
@@ -227,9 +227,10 @@ You can finetune this model on your own dataset.
227
  - `eval_strategy`: steps
228
  - `per_device_train_batch_size`: 256
229
  - `per_device_eval_batch_size`: 256
 
230
  - `weight_decay`: 0.001
231
- - `num_train_epochs`: 6
232
- - `warmup_ratio`: 0.1
233
  - `fp16`: True
234
  - `dataloader_num_workers`: 1
235
  - `dataloader_prefetch_factor`: 2
@@ -252,17 +253,17 @@ You can finetune this model on your own dataset.
252
  - `gradient_accumulation_steps`: 1
253
  - `eval_accumulation_steps`: None
254
  - `torch_empty_cache_steps`: None
255
- - `learning_rate`: 5e-05
256
  - `weight_decay`: 0.001
257
  - `adam_beta1`: 0.9
258
  - `adam_beta2`: 0.999
259
  - `adam_epsilon`: 1e-08
260
  - `max_grad_norm`: 1.0
261
- - `num_train_epochs`: 6
262
  - `max_steps`: -1
263
  - `lr_scheduler_type`: linear
264
  - `lr_scheduler_kwargs`: {}
265
- - `warmup_ratio`: 0.1
266
  - `warmup_steps`: 0
267
  - `log_level`: passive
268
  - `log_level_replica`: warning
@@ -365,8 +366,8 @@ You can finetune this model on your own dataset.
365
  | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
366
  |:------:|:----:|:-------------:|:---------------:|:---------------:|
367
  | 0.0005 | 1 | 4.1585 | - | - |
368
- | 0.4829 | 1000 | 2.6877 | 0.5079 | 0.9513 |
369
- | 0.9657 | 2000 | 1.3275 | 0.4753 | 0.9584 |
370
 
371
 
372
  ### Framework Versions
 
49
  type: unknown
50
  metrics:
51
  - type: cosine_accuracy
52
+ value: 0.9479440450668335
53
  name: Cosine Accuracy
54
  ---
55
 
 
114
  # Get the similarity scores for the embeddings
115
  similarities = model.similarity(embeddings, embeddings)
116
  print(similarities)
117
+ # tensor([[1.0000, 0.9667, 0.2278],
118
+ # [0.9667, 1.0000, 0.2161],
119
+ # [0.2278, 0.2161, 1.0000]])
120
  ```
121
 
122
  <!--
 
153
 
154
  | Metric | Value |
155
  |:--------------------|:-----------|
156
+ | **cosine_accuracy** | **0.9479** |
157
 
158
  <!--
159
  ## Bias, Risks and Limitations
 
227
  - `eval_strategy`: steps
228
  - `per_device_train_batch_size`: 256
229
  - `per_device_eval_batch_size`: 256
230
+ - `learning_rate`: 2e-05
231
  - `weight_decay`: 0.001
232
+ - `num_train_epochs`: 8
233
+ - `warmup_ratio`: 0.2
234
  - `fp16`: True
235
  - `dataloader_num_workers`: 1
236
  - `dataloader_prefetch_factor`: 2
 
253
  - `gradient_accumulation_steps`: 1
254
  - `eval_accumulation_steps`: None
255
  - `torch_empty_cache_steps`: None
256
+ - `learning_rate`: 2e-05
257
  - `weight_decay`: 0.001
258
  - `adam_beta1`: 0.9
259
  - `adam_beta2`: 0.999
260
  - `adam_epsilon`: 1e-08
261
  - `max_grad_norm`: 1.0
262
+ - `num_train_epochs`: 8
263
  - `max_steps`: -1
264
  - `lr_scheduler_type`: linear
265
  - `lr_scheduler_kwargs`: {}
266
+ - `warmup_ratio`: 0.2
267
  - `warmup_steps`: 0
268
  - `log_level`: passive
269
  - `log_level_replica`: warning
 
366
  | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
367
  |:------:|:----:|:-------------:|:---------------:|:---------------:|
368
  | 0.0005 | 1 | 4.1585 | - | - |
369
+ | 0.4829 | 1000 | 3.2055 | 0.5676 | 0.9401 |
370
+ | 0.9657 | 2000 | 2.0069 | 0.5089 | 0.9479 |
371
 
372
 
373
  ### Framework Versions
checkpoint-2071/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3de5dca97f8efca7db244e5ecf375694116c51d56eacdf645845e5ff842a967d
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8c69769dbd05c7072f34ea2f05925262d6c023d92e9f985b4dc449c8405505
3
  size 90864192
checkpoint-2071/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75dbae26dbbb59787e87d3df2b4327fa5b4cdd3a12193e4f6a22a276171741c9
3
  size 180607738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e5da75eb39176d4a290a09f3f7bf1add65552f6dc6d63eaf67c1b5cb81f0edd
3
  size 180607738
checkpoint-2071/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57bbbe1983a8a9a6b14440da65bfa44f6e62bc74ad4d459b2d737491017f0d91
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091e83d21e287330b10fb74cc1330244dba58d03818725e35e0230804e5f3346
3
  size 1064
checkpoint-2071/trainer_state.json CHANGED
@@ -18,41 +18,41 @@
18
  },
19
  {
20
  "epoch": 0.48285852245292127,
21
- "grad_norm": 6.439189910888672,
22
- "learning_rate": 4.0185036202735324e-05,
23
- "loss": 2.6877,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.48285852245292127,
28
- "eval_cosine_accuracy": 0.9513092637062073,
29
- "eval_loss": 0.5078648924827576,
30
- "eval_runtime": 35.5752,
31
- "eval_samples_per_second": 267.293,
32
- "eval_steps_per_second": 1.068,
33
  "step": 1000
34
  },
35
  {
36
  "epoch": 0.9657170449058425,
37
- "grad_norm": 7.472978591918945,
38
- "learning_rate": 4.661986944469284e-05,
39
- "loss": 1.3275,
40
  "step": 2000
41
  },
42
  {
43
  "epoch": 0.9657170449058425,
44
- "eval_cosine_accuracy": 0.9583552479743958,
45
- "eval_loss": 0.4752802550792694,
46
- "eval_runtime": 34.5575,
47
- "eval_samples_per_second": 275.164,
48
- "eval_steps_per_second": 1.1,
49
  "step": 2000
50
  }
51
  ],
52
  "logging_steps": 1000,
53
- "max_steps": 12426,
54
  "num_input_tokens_seen": 0,
55
- "num_train_epochs": 6,
56
  "save_steps": 500,
57
  "stateful_callbacks": {
58
  "TrainerControl": {
 
18
  },
19
  {
20
  "epoch": 0.48285852245292127,
21
+ "grad_norm": 6.372687339782715,
22
+ "learning_rate": 6.028968014484008e-06,
23
+ "loss": 3.2055,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.48285852245292127,
28
+ "eval_cosine_accuracy": 0.9400568008422852,
29
+ "eval_loss": 0.5675864219665527,
30
+ "eval_runtime": 35.3676,
31
+ "eval_samples_per_second": 268.862,
32
+ "eval_steps_per_second": 1.074,
33
  "step": 1000
34
  },
35
  {
36
  "epoch": 0.9657170449058425,
37
+ "grad_norm": 8.225760459899902,
38
+ "learning_rate": 1.2063971031985518e-05,
39
+ "loss": 2.0069,
40
  "step": 2000
41
  },
42
  {
43
  "epoch": 0.9657170449058425,
44
+ "eval_cosine_accuracy": 0.9479440450668335,
45
+ "eval_loss": 0.5088897347450256,
46
+ "eval_runtime": 35.33,
47
+ "eval_samples_per_second": 269.148,
48
+ "eval_steps_per_second": 1.076,
49
  "step": 2000
50
  }
51
  ],
52
  "logging_steps": 1000,
53
+ "max_steps": 16568,
54
  "num_input_tokens_seen": 0,
55
+ "num_train_epochs": 8,
56
  "save_steps": 500,
57
  "stateful_callbacks": {
58
  "TrainerControl": {
checkpoint-2071/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce4819fc7434b230bbe88b6e1443de74fe11baa80cd2b8ebf29cee376218c0d
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21cb488b39046dd5929796463136d527fa7f4b248e28c84eb80348f28dc5da8a
3
  size 5752