thivy commited on
Commit
75e0866
·
verified ·
1 Parent(s): 520a0b4

Training in progress, step 10423, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -184,9 +184,9 @@ print(embeddings.shape)
184
  # Get the similarity scores for the embeddings
185
  similarities = model.similarity(embeddings, embeddings)
186
  print(similarities)
187
- # tensor([[26.0330, 15.2864, 1.5172],
188
- # [15.2864, 18.1790, 1.4252],
189
- # [ 1.5172, 1.4252, 10.7558]])
190
  ```
191
 
192
  <!--
@@ -664,6 +664,14 @@ You can finetune this model on your own dataset.
664
  | 0.9498 | 9900 | 0.0949 | - | - |
665
  | 0.9546 | 9950 | 0.0821 | - | - |
666
  | 0.9594 | 10000 | 0.0703 | 0.4128 | 0.1965 |
 
 
 
 
 
 
 
 
667
 
668
  </details>
669
 
 
184
  # Get the similarity scores for the embeddings
185
  similarities = model.similarity(embeddings, embeddings)
186
  print(similarities)
187
+ # tensor([[24.4025, 13.9460, 1.4325],
188
+ # [13.9460, 16.7882, 1.3582],
189
+ # [ 1.4325, 1.3582, 10.2211]])
190
  ```
191
 
192
  <!--
 
664
  | 0.9498 | 9900 | 0.0949 | - | - |
665
  | 0.9546 | 9950 | 0.0821 | - | - |
666
  | 0.9594 | 10000 | 0.0703 | 0.4128 | 0.1965 |
667
+ | 0.9642 | 10050 | 0.1004 | - | - |
668
+ | 0.9690 | 10100 | 0.0985 | - | - |
669
+ | 0.9738 | 10150 | 0.0906 | - | - |
670
+ | 0.9786 | 10200 | 0.0991 | - | - |
671
+ | 0.9834 | 10250 | 0.0811 | - | - |
672
+ | 0.9882 | 10300 | 0.1002 | - | - |
673
+ | 0.9930 | 10350 | 0.0872 | - | - |
674
+ | 0.9978 | 10400 | 0.1106 | - | - |
675
 
676
  </details>
677
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e51ef566acf288b0d358a37576bde59dc6b167b6a53ed1857a9b3c7d17aafc
3
  size 728561776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ac37cabac3c2a855ab2457343f660268bc81e8d0e56283536af5e9b8ee54aa
3
  size 728561776
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2a5fe0e654c9c6bf3638901cf2845a4adb158933e39f5eda410cb3cc0bd44c2
3
  size 1457369077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16845e30cdd6a1aa5713c4ac7c985582bc729ad8423f628eab97f2533acd7c69
3
  size 1457369077
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57d9d7558c709b6a972ce8ad22ae68adf5da98342c109a0c8ae20210d62a4e5a
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75eaade8cd62d6b12db3659471111ec66106cdaf49d1adb4716aa9b8893ced8e
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2df9de5796b1324aede75beded346eab735a6ced51772973f92463378a41bb3e
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523043000b1d832228380e68f0d4b68b296b7aacace12192adec8a11d32d0346
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e355c146c3d8245bc7493652056f88cd1816cc428f732f82ec17ea73202220c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d8a685c45c8f559dab2fc37e9c9ee050f23f22c48d6988da111853aad98abdb
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1500,
3
  "best_metric": 0.27095313455750514,
4
  "best_model_checkpoint": "models/splade-norbert4-base-retrieval-only/checkpoint-1500",
5
- "epoch": 0.9594166746618056,
6
  "eval_steps": 500,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3028,6 +3028,86 @@
3028
  "eval_samples_per_second": 39.94,
3029
  "eval_steps_per_second": 0.624,
3030
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3031
  }
3032
  ],
3033
  "logging_steps": 50,
@@ -3042,7 +3122,7 @@
3042
  "should_evaluate": false,
3043
  "should_log": false,
3044
  "should_save": true,
3045
- "should_training_stop": false
3046
  },
3047
  "attributes": {}
3048
  }
 
2
  "best_global_step": 1500,
3
  "best_metric": 0.27095313455750514,
4
  "best_model_checkpoint": "models/splade-norbert4-base-retrieval-only/checkpoint-1500",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 10423,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3028
  "eval_samples_per_second": 39.94,
3029
  "eval_steps_per_second": 0.624,
3030
  "step": 10000
3031
+ },
3032
+ {
3033
+ "base_loss": 0.0908,
3034
+ "document_regularizer_loss": 0.0073,
3035
+ "epoch": 0.9642137580351147,
3036
+ "grad_norm": 0.605567991733551,
3037
+ "learning_rate": 7.974413646055437e-07,
3038
+ "loss": 0.1004,
3039
+ "query_regularizer_loss": 0.0022,
3040
+ "step": 10050
3041
+ },
3042
+ {
3043
+ "base_loss": 0.0893,
3044
+ "document_regularizer_loss": 0.007,
3045
+ "epoch": 0.9690108414084236,
3046
+ "grad_norm": 0.786066472530365,
3047
+ "learning_rate": 6.908315565031984e-07,
3048
+ "loss": 0.0985,
3049
+ "query_regularizer_loss": 0.0021,
3050
+ "step": 10100
3051
+ },
3052
+ {
3053
+ "base_loss": 0.0814,
3054
+ "document_regularizer_loss": 0.0072,
3055
+ "epoch": 0.9738079247817327,
3056
+ "grad_norm": 3.1570448875427246,
3057
+ "learning_rate": 5.842217484008529e-07,
3058
+ "loss": 0.0906,
3059
+ "query_regularizer_loss": 0.0021,
3060
+ "step": 10150
3061
+ },
3062
+ {
3063
+ "base_loss": 0.0897,
3064
+ "document_regularizer_loss": 0.0072,
3065
+ "epoch": 0.9786050081550417,
3066
+ "grad_norm": 9.441202163696289,
3067
+ "learning_rate": 4.776119402985075e-07,
3068
+ "loss": 0.0991,
3069
+ "query_regularizer_loss": 0.0022,
3070
+ "step": 10200
3071
+ },
3072
+ {
3073
+ "base_loss": 0.0715,
3074
+ "document_regularizer_loss": 0.0074,
3075
+ "epoch": 0.9834020915283508,
3076
+ "grad_norm": 0.39448684453964233,
3077
+ "learning_rate": 3.710021321961621e-07,
3078
+ "loss": 0.0811,
3079
+ "query_regularizer_loss": 0.0022,
3080
+ "step": 10250
3081
+ },
3082
+ {
3083
+ "base_loss": 0.0909,
3084
+ "document_regularizer_loss": 0.0072,
3085
+ "epoch": 0.9881991749016598,
3086
+ "grad_norm": 8.17419147491455,
3087
+ "learning_rate": 2.6439232409381664e-07,
3088
+ "loss": 0.1002,
3089
+ "query_regularizer_loss": 0.0021,
3090
+ "step": 10300
3091
+ },
3092
+ {
3093
+ "base_loss": 0.0779,
3094
+ "document_regularizer_loss": 0.0072,
3095
+ "epoch": 0.9929962582749688,
3096
+ "grad_norm": 2.363359212875366,
3097
+ "learning_rate": 1.5778251599147122e-07,
3098
+ "loss": 0.0872,
3099
+ "query_regularizer_loss": 0.0021,
3100
+ "step": 10350
3101
+ },
3102
+ {
3103
+ "base_loss": 0.1014,
3104
+ "document_regularizer_loss": 0.0071,
3105
+ "epoch": 0.9977933416482778,
3106
+ "grad_norm": 2.649932861328125,
3107
+ "learning_rate": 5.1172707889125806e-08,
3108
+ "loss": 0.1106,
3109
+ "query_regularizer_loss": 0.002,
3110
+ "step": 10400
3111
  }
3112
  ],
3113
  "logging_steps": 50,
 
3122
  "should_evaluate": false,
3123
  "should_log": false,
3124
  "should_save": true,
3125
+ "should_training_stop": true
3126
  },
3127
  "attributes": {}
3128
  }