KrafterDen commited on
Commit
e10defd
·
verified ·
1 Parent(s): cef30ad

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: exontidev/SISUS_SIKERS
4
  ---
5
 
6
  # Model Card for Model ID
@@ -201,5 +201,6 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
 
202
  ### Framework versions
203
 
 
204
  - PEFT 0.8.2
205
  - PEFT 0.7.1
 
1
  ---
2
  library_name: peft
3
+ base_model: IlyaGusev/rugpt_large_turbo_instructed
4
  ---
5
 
6
  # Model Card for Model ID
 
201
 
202
  ### Framework versions
203
 
204
+ - PEFT 0.9.0
205
  - PEFT 0.8.2
206
  - PEFT 0.7.1
checkpoint-100/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "exontidev/SISUS_SIKERS",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -22,5 +22,6 @@
22
  "c_attn"
23
  ],
24
  "task_type": "CAUSAL_LM",
 
25
  "use_rslora": false
26
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "IlyaGusev/rugpt_large_turbo_instructed",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
22
  "c_attn"
23
  ],
24
  "task_type": "CAUSAL_LM",
25
+ "use_dora": false,
26
  "use_rslora": false
27
  }
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83297727419ff3fdfcb22d67abebaec4169a2026b2157194b8c18c8d0d3fc7b0
3
  size 9443384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723d245e9ca9bf2cf103f4c7cbcc64b245ae77c89fe73a88a7d0665207cbc743
3
  size 9443384
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6bc1c2471ae085ae777895956c9c2d09bbe11bfa7f5423fadb03417a66d6b5e
3
  size 18914450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e0b23d4bf5faab5873c91c421fc565e50df58dc82d2f05d6243453c1a2f3ee
3
  size 18914450
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:095b555d6adde095e244513c780c9707a33481faa3b1e2781321113c02b6edbd
3
  size 14168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c0f1c9884efbd52a2ccba350f60152761f753ca8e4d0fe74b04f5dbf78a9a4
3
  size 14168
checkpoint-100/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24613491269902316,
5
  "eval_steps": 500,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
@@ -9,72 +9,82 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
 
13
  "learning_rate": 2.9999999999999997e-05,
14
- "loss": 3.623,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.05,
 
19
  "learning_rate": 5.9999999999999995e-05,
20
- "loss": 3.605,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.07,
 
25
  "learning_rate": 8.999999999999999e-05,
26
- "loss": 3.5168,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.1,
 
31
  "learning_rate": 0.00011999999999999999,
32
- "loss": 3.3511,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.12,
 
37
  "learning_rate": 0.00015,
38
- "loss": 3.1952,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.15,
 
43
  "learning_rate": 0.00017999999999999998,
44
- "loss": 3.0575,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 0.17,
 
49
  "learning_rate": 0.00020999999999999998,
50
- "loss": 2.8575,
51
  "step": 70
52
  },
53
  {
54
- "epoch": 0.2,
 
55
  "learning_rate": 0.00023999999999999998,
56
- "loss": 2.7029,
57
  "step": 80
58
  },
59
  {
60
- "epoch": 0.22,
 
61
  "learning_rate": 0.00027,
62
- "loss": 2.6392,
63
  "step": 90
64
  },
65
  {
66
- "epoch": 0.25,
 
67
  "learning_rate": 0.0003,
68
- "loss": 2.5679,
69
  "step": 100
70
  }
71
  ],
72
  "logging_steps": 10,
73
- "max_steps": 200,
74
  "num_input_tokens_seen": 0,
75
- "num_train_epochs": 1,
76
  "save_steps": 100,
77
- "total_flos": 1.3643358381416448e+16,
78
  "train_batch_size": 4,
79
  "trial_name": null,
80
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.177545691906005,
5
  "eval_steps": 500,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.42,
13
+ "grad_norm": 0.18151910603046417,
14
  "learning_rate": 2.9999999999999997e-05,
15
+ "loss": 3.8655,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.84,
20
+ "grad_norm": 0.23640306293964386,
21
  "learning_rate": 5.9999999999999995e-05,
22
+ "loss": 3.8223,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 1.25,
27
+ "grad_norm": 0.3272729814052582,
28
  "learning_rate": 8.999999999999999e-05,
29
+ "loss": 3.7256,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 1.67,
34
+ "grad_norm": 0.3942272365093231,
35
  "learning_rate": 0.00011999999999999999,
36
+ "loss": 3.4944,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 2.09,
41
+ "grad_norm": 0.5312587022781372,
42
  "learning_rate": 0.00015,
43
+ "loss": 3.0931,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 2.51,
48
+ "grad_norm": 0.6111555695533752,
49
  "learning_rate": 0.00017999999999999998,
50
+ "loss": 2.5215,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 2.92,
55
+ "grad_norm": 0.4672750234603882,
56
  "learning_rate": 0.00020999999999999998,
57
+ "loss": 1.8905,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 3.34,
62
+ "grad_norm": 0.16217181086540222,
63
  "learning_rate": 0.00023999999999999998,
64
+ "loss": 1.5992,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 3.76,
69
+ "grad_norm": 0.11098425090312958,
70
  "learning_rate": 0.00027,
71
+ "loss": 1.4717,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 4.18,
76
+ "grad_norm": 0.08228754252195358,
77
  "learning_rate": 0.0003,
78
+ "loss": 1.4336,
79
  "step": 100
80
  }
81
  ],
82
  "logging_steps": 10,
83
+ "max_steps": 300,
84
  "num_input_tokens_seen": 0,
85
+ "num_train_epochs": 14,
86
  "save_steps": 100,
87
+ "total_flos": 1.863006384782131e+16,
88
  "train_batch_size": 4,
89
  "trial_name": null,
90
  "trial_params": null
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d7fe1522993e9d0465d19bb981aba89a2d40513d15ab46cef263ce1ebaa4eb5
3
- size 4768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b7fad922cb586b3f4c9d21e8952021ac611c19938a5281fabb27589d026b64
3
+ size 4960