beamaia commited on
Commit
b21e827
·
verified ·
1 Parent(s): b318cbd

Upload folder using huggingface_hub

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -21,8 +21,8 @@
21
  "target_modules": [
22
  "o_proj",
23
  "q_proj",
24
- "k_proj",
25
- "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
 
21
  "target_modules": [
22
  "o_proj",
23
  "q_proj",
24
+ "v_proj",
25
+ "k_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54fc4264d53b7817584e2bece3faa84e29259146c91e0d9dabff36297282b16a
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3be29181cd0bc6297f4a2807e05a7b0465de3300470bff9c3702a18ad742a8
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd976350789b28f06eccbcaaea9c89d73939a81fa429dd93a04b342df73878df
3
  size 109204730
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8415338139558c07ee0d2124c173c2b83f22304609bfc8e8f609bdd84eda6e4
3
  size 109204730
checkpoint-100/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56a5cfc0d180ca9515a82326e20e2dca1a0034b75f34ac0f024d267881ee3cf9
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59d8a37ef428d5411dd48b6d9ebacb82b8bc47ecb2fe0a62c0798494709644e5
3
  size 14512
checkpoint-100/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff09f2de824a65539d4401b01781ac984c601cef0eefa6ee4f0a583e701bdd9
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d55e6ac2913c509fb7eb01dfd22e053c47bbb7a22a053f2d4c3ced416fed21f
3
  size 14512
checkpoint-100/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.4931565821170807,
3
  "best_model_checkpoint": "./mistral/07-03-24-Weni-pipeline_test3_Zeroshot-2_max_steps-3224_batch_64_2024-03-07_ppid_7/checkpoint-100",
4
  "epoch": 0.24798512089274644,
5
  "eval_steps": 100,
@@ -10,44 +10,44 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.05,
13
- "grad_norm": 1.9672261476516724,
14
- "learning_rate": 1.1801242236024846e-05,
15
- "loss": 1.8876,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.1,
20
- "grad_norm": 3.650391101837158,
21
  "learning_rate": 2.3602484472049692e-05,
22
- "loss": 1.6653,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 0.15,
27
- "grad_norm": 1.0555133819580078,
28
  "learning_rate": 3.60248447204969e-05,
29
- "loss": 1.3654,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 0.2,
34
- "grad_norm": 0.6545969247817993,
35
  "learning_rate": 4.8447204968944106e-05,
36
- "loss": 0.9152,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 0.25,
41
- "grad_norm": 0.24923652410507202,
42
  "learning_rate": 6.086956521739131e-05,
43
- "loss": 0.5565,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 0.25,
48
- "eval_loss": 0.4931565821170807,
49
- "eval_runtime": 180.4575,
50
- "eval_samples_per_second": 15.887,
51
  "eval_steps_per_second": 0.997,
52
  "step": 100
53
  }
@@ -57,7 +57,7 @@
57
  "num_input_tokens_seen": 0,
58
  "num_train_epochs": 8,
59
  "save_steps": 100,
60
- "total_flos": 2.060663117494354e+17,
61
  "train_batch_size": 8,
62
  "trial_name": null,
63
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.49974650144577026,
3
  "best_model_checkpoint": "./mistral/07-03-24-Weni-pipeline_test3_Zeroshot-2_max_steps-3224_batch_64_2024-03-07_ppid_7/checkpoint-100",
4
  "epoch": 0.24798512089274644,
5
  "eval_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.05,
13
+ "grad_norm": 1.8224879503250122,
14
+ "learning_rate": 1.1180124223602485e-05,
15
+ "loss": 1.913,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.1,
20
+ "grad_norm": 2.1416420936584473,
21
  "learning_rate": 2.3602484472049692e-05,
22
+ "loss": 1.6555,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 0.15,
27
+ "grad_norm": 1.7435169219970703,
28
  "learning_rate": 3.60248447204969e-05,
29
+ "loss": 1.3503,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 0.2,
34
+ "grad_norm": 0.6260000467300415,
35
  "learning_rate": 4.8447204968944106e-05,
36
+ "loss": 0.9403,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 0.25,
41
+ "grad_norm": 0.6695926189422607,
42
  "learning_rate": 6.086956521739131e-05,
43
+ "loss": 0.5642,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 0.25,
48
+ "eval_loss": 0.49974650144577026,
49
+ "eval_runtime": 180.5121,
50
+ "eval_samples_per_second": 15.883,
51
  "eval_steps_per_second": 0.997,
52
  "step": 100
53
  }
 
57
  "num_input_tokens_seen": 0,
58
  "num_train_epochs": 8,
59
  "save_steps": 100,
60
+ "total_flos": 2.0437256133148672e+17,
61
  "train_batch_size": 8,
62
  "trial_name": null,
63
  "trial_params": null
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:062109216a0084aea3dc340c0b85f9dfab30a0afc265e7ea132f1e1b1a292fe1
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31dd07d3bcf35f257ee096471f2d6906b6529d45189b00701808fc5b82a560cf
3
  size 5112