KrafterDen commited on
Commit
9f3778a
·
verified ·
1 Parent(s): 8ca7a04

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: IlyaGusev/rugpt_large_turbo_instructed
4
  ---
5
 
6
  # Model Card for Model ID
@@ -201,5 +201,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
 
202
  ### Framework versions
203
 
 
204
  - PEFT 0.9.0
 
205
  - PEFT 0.7.1
 
1
  ---
2
  library_name: peft
3
+ base_model: exontidev/SISUS_SIKERS
4
  ---
5
 
6
  # Model Card for Model ID
 
201
 
202
  ### Framework versions
203
 
204
+ - PEFT 0.10.0
205
  - PEFT 0.9.0
206
+ - PEFT 0.8.2
207
  - PEFT 0.7.1
checkpoint-100/adapter_config.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "IlyaGusev/rugpt_large_turbo_instructed",
5
  "bias": "none",
6
- "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "exontidev/SISUS_SIKERS",
5
  "bias": "none",
6
+ "fan_in_fan_out": true,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
+ "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1a1118bbcbdab8294afd7683e4ef0e3a54d94d1b86e8087686f7e2822a75195
3
  size 9443384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2b5285810c20bd09e835779ffe6024527b320ac033491cd8183484c601bf233
3
  size 9443384
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5445f987e06033ca8180dcacac7106c163198139c090c3fb6a5c5a123d3e3751
3
  size 18914450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99d0b39339284219fa33e62f42f124120b19e564a33b374195bdae3896bb1592
3
  size 18914450
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11c0f1c9884efbd52a2ccba350f60152761f753ca8e4d0fe74b04f5dbf78a9a4
3
  size 14168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bab3f8144c2aa46aa41f9ab21aa5ec24e807b1a93fb2c1875d977349cf29cc5
3
  size 14168
checkpoint-100/trainer_state.json CHANGED
@@ -9,82 +9,82 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.42,
13
- "grad_norm": 0.18415939807891846,
14
  "learning_rate": 2.9999999999999997e-05,
15
- "loss": 3.8654,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.84,
20
- "grad_norm": 0.23708771169185638,
21
  "learning_rate": 5.9999999999999995e-05,
22
- "loss": 3.8218,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 1.25,
27
- "grad_norm": 0.3239809572696686,
28
  "learning_rate": 8.999999999999999e-05,
29
- "loss": 3.725,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 1.67,
34
- "grad_norm": 0.38910844922065735,
35
  "learning_rate": 0.00011999999999999999,
36
- "loss": 3.4953,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 2.09,
41
- "grad_norm": 0.520912230014801,
42
  "learning_rate": 0.00015,
43
- "loss": 3.0984,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 2.51,
48
- "grad_norm": 0.6063631772994995,
49
  "learning_rate": 0.00017999999999999998,
50
- "loss": 2.5316,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 2.92,
55
- "grad_norm": 0.4615532457828522,
56
  "learning_rate": 0.00020999999999999998,
57
- "loss": 1.8987,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 3.34,
62
- "grad_norm": 0.16907210648059845,
63
  "learning_rate": 0.00023999999999999998,
64
- "loss": 1.6041,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 3.76,
69
- "grad_norm": 0.11257671564817429,
70
  "learning_rate": 0.00027,
71
- "loss": 1.4732,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 4.18,
76
- "grad_norm": 0.08190377801656723,
77
  "learning_rate": 0.0003,
78
- "loss": 1.4341,
79
  "step": 100
80
  }
81
  ],
82
  "logging_steps": 10,
83
- "max_steps": 175,
84
  "num_input_tokens_seen": 0,
85
- "num_train_epochs": 8,
86
  "save_steps": 100,
87
- "total_flos": 1.863006384782131e+16,
88
  "train_batch_size": 4,
89
  "trial_name": null,
90
  "trial_params": null
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.4177545691906005,
13
+ "grad_norm": 0.28227752447128296,
14
  "learning_rate": 2.9999999999999997e-05,
15
+ "loss": 4.1508,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.835509138381201,
20
+ "grad_norm": 0.31433430314064026,
21
  "learning_rate": 5.9999999999999995e-05,
22
+ "loss": 4.1593,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 1.2532637075718016,
27
+ "grad_norm": 0.3350953161716461,
28
  "learning_rate": 8.999999999999999e-05,
29
+ "loss": 4.0414,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 1.671018276762402,
34
+ "grad_norm": 0.2885706126689911,
35
  "learning_rate": 0.00011999999999999999,
36
+ "loss": 3.8411,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 2.0887728459530024,
41
+ "grad_norm": 0.23711609840393066,
42
  "learning_rate": 0.00015,
43
+ "loss": 3.6434,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 2.506527415143603,
48
+ "grad_norm": 0.21583135426044464,
49
  "learning_rate": 0.00017999999999999998,
50
+ "loss": 3.4636,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 2.9242819843342036,
55
+ "grad_norm": 0.18754692375659943,
56
  "learning_rate": 0.00020999999999999998,
57
+ "loss": 3.3154,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 3.342036553524804,
62
+ "grad_norm": 0.15951760113239288,
63
  "learning_rate": 0.00023999999999999998,
64
+ "loss": 3.2195,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 3.759791122715405,
69
+ "grad_norm": 0.14639759063720703,
70
  "learning_rate": 0.00027,
71
+ "loss": 3.122,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 4.177545691906005,
76
+ "grad_norm": 0.1860765665769577,
77
  "learning_rate": 0.0003,
78
+ "loss": 3.0677,
79
  "step": 100
80
  }
81
  ],
82
  "logging_steps": 10,
83
+ "max_steps": 300,
84
  "num_input_tokens_seen": 0,
85
+ "num_train_epochs": 14,
86
  "save_steps": 100,
87
+ "total_flos": 1.6201284405755904e+16,
88
  "train_batch_size": 4,
89
  "trial_name": null,
90
  "trial_params": null
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79f806349f0bb61a00c2d540daba3c569079d7140776f830f475a964ef346330
3
- size 4960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42329f5345a3c120af37c6fdbce453b0541524f81257e209baeb9a0b15e22c94
3
+ size 5024