LamaDiab commited on
Commit
442a166
·
verified ·
1 Parent(s): dcf399f

Training in progress, epoch 6, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -50,7 +50,7 @@ model-index:
50
  type: unknown
51
  metrics:
52
  - type: cosine_accuracy
53
- value: 0.9435034394264221
54
  name: Cosine Accuracy
55
  ---
56
 
@@ -115,9 +115,9 @@ print(embeddings.shape)
115
  # Get the similarity scores for the embeddings
116
  similarities = model.similarity(embeddings, embeddings)
117
  print(similarities)
118
- # tensor([[1.0000, 0.6910, 0.3222],
119
- # [0.6910, 1.0000, 0.3099],
120
- # [0.3222, 0.3099, 1.0000]])
121
  ```
122
 
123
  <!--
@@ -154,7 +154,7 @@ You can finetune this model on your own dataset.
154
 
155
  | Metric | Value |
156
  |:--------------------|:-----------|
157
- | **cosine_accuracy** | **0.9435** |
158
 
159
  <!--
160
  ## Bias, Risks and Limitations
@@ -368,6 +368,7 @@ You can finetune this model on your own dataset.
368
  |:-----:|:-----:|:-------------:|:---------------:|:---------------:|
369
  | 4.0 | 9112 | 1.4316 | 0.7736 | 0.9375 |
370
  | 5.0 | 11390 | 1.3415 | 0.7541 | 0.9435 |
 
371
 
372
 
373
  ### Framework Versions
 
50
  type: unknown
51
  metrics:
52
  - type: cosine_accuracy
53
+ value: 0.945607602596283
54
  name: Cosine Accuracy
55
  ---
56
 
 
115
  # Get the similarity scores for the embeddings
116
  similarities = model.similarity(embeddings, embeddings)
117
  print(similarities)
118
+ # tensor([[1.0000, 0.7013, 0.2786],
119
+ # [0.7013, 1.0000, 0.2947],
120
+ # [0.2786, 0.2947, 1.0000]])
121
  ```
122
 
123
  <!--
 
154
 
155
  | Metric | Value |
156
  |:--------------------|:-----------|
157
+ | **cosine_accuracy** | **0.9456** |
158
 
159
  <!--
160
  ## Bias, Risks and Limitations
 
368
  |:-----:|:-----:|:-------------:|:---------------:|:---------------:|
369
  | 4.0 | 9112 | 1.4316 | 0.7736 | 0.9375 |
370
  | 5.0 | 11390 | 1.3415 | 0.7541 | 0.9435 |
371
+ | 6.0 | 13668 | 1.2848 | 0.7366 | 0.9456 |
372
 
373
 
374
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9f3402611e3b3c3f41f95210941eb470fba6ed05452b3849a27bd4dd433f91e
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320486c0d72ec95abd1659c8bd328af000fee16acca976234080b60c5808f503
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76893f6607e3ebbfe6341fa0102ea836a998daa1b3f6024dda6df1ae75e07865
3
  size 180607738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7755d08694b0b98e9f637c1325a98df63baa0531c0b16f44df4b52258ac0dfb8
3
  size 180607738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37aac35e3c58c9053e71545ace22af7302bb0c360af070fa4f0ab6abc890f773
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5715fd5dd811d8d77c0931f527aac0675f97eb5e3128e2ac71138c0423321a1c
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdd86fc34d6a0f3e0667119a9de9c78b3da8a344f393071c4b8da7ec715e0886
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84fca30e11e9e228eeed287bc390c8efa222d906e82edaf51b0d6ee34d8323c
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb1ca70040fe2933c9ba4528086551bb0337a289b19dabf99f736ffc96fdbab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0988d979225dbf496efcd05bde3d2241f939432abad17b4ce650c14d14df89c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 5.0,
6
  "eval_steps": 5000,
7
- "global_step": 11390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -63,6 +63,22 @@
63
  "eval_samples_per_second": 298.028,
64
  "eval_steps_per_second": 2.352,
65
  "step": 11390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  ],
68
  "logging_steps": 5000,
@@ -77,7 +93,7 @@
77
  "should_evaluate": false,
78
  "should_log": false,
79
  "should_save": true,
80
- "should_training_stop": false
81
  },
82
  "attributes": {}
83
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 6.0,
6
  "eval_steps": 5000,
7
+ "global_step": 13668,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
63
  "eval_samples_per_second": 298.028,
64
  "eval_steps_per_second": 2.352,
65
  "step": 11390
66
+ },
67
+ {
68
+ "epoch": 6.0,
69
+ "grad_norm": 10.055571556091309,
70
+ "learning_rate": 2.2862368541380888e-08,
71
+ "loss": 1.2848,
72
+ "step": 13668
73
+ },
74
+ {
75
+ "epoch": 6.0,
76
+ "eval_cosine_accuracy": 0.945607602596283,
77
+ "eval_loss": 0.7366093397140503,
78
+ "eval_runtime": 32.7761,
79
+ "eval_samples_per_second": 289.998,
80
+ "eval_steps_per_second": 2.288,
81
+ "step": 13668
82
  }
83
  ],
84
  "logging_steps": 5000,
 
93
  "should_evaluate": false,
94
  "should_log": false,
95
  "should_save": true,
96
+ "should_training_stop": true
97
  },
98
  "attributes": {}
99
  }