baby-dev commited on
Commit
8c114a3
·
verified ·
1 Parent(s): 888c931

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "o_proj",
25
- "k_proj",
26
- "up_proj",
27
  "v_proj",
28
  "down_proj",
 
 
 
 
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "v_proj",
24
  "down_proj",
25
+ "up_proj",
26
+ "k_proj",
27
+ "q_proj",
28
+ "o_proj",
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d253b566d126f3aba50addeec7b7eb765dcd73cb4b88bcfa0193f7cdfb91c3f5
3
  size 48552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dfe9076ca68332c8800b76be309707308999cea6fde6c924587a91918a82301
3
  size 48552
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3d7ca924fd1345c00abf75278e6b67d1da97a8585f9d4fe25064f0dcd089c7f
3
  size 107046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2f3dd40831dcb9a61550b30ceb021fd76a499faf698e24e0372cfb55613941
3
  size 107046
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adef08fda30a046b79221275438406fd995dd5a4dadb0a71a27682a7dcae8f3b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc6143655f9449cd8116f34973ede555de22e2b5f4efa316d0986434dbbbc1d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63ac0c876d99cedd1c7e14354171b8bdaaea7163f00b90c59253d9bf8b43e991
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c429fe7554e31e133d2b97a95081dbbed81a5b94f15c6efd193ee9c92fb8a1b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,99 +1,40 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0020964360587,
5
  "eval_steps": 500,
6
- "global_step": 358,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.4192872117400419,
13
- "grad_norm": 0.057723674923181534,
14
- "learning_rate": 0.00021813031161473087,
15
- "loss": 11.9272,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.8385744234800838,
20
- "grad_norm": 0.04375343769788742,
21
- "learning_rate": 0.00018271954674220964,
22
- "loss": 11.9208,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.9979035639412998,
27
- "eval_loss": 11.919021606445312,
28
- "eval_runtime": 0.4164,
29
- "eval_samples_per_second": 242.533,
30
- "eval_steps_per_second": 62.434,
31
  "step": 119
32
- },
33
- {
34
- "epoch": 1.2578616352201257,
35
- "grad_norm": 0.028496425598859787,
36
- "learning_rate": 0.00014730878186968837,
37
- "loss": 12.1057,
38
- "step": 150
39
- },
40
- {
41
- "epoch": 1.6771488469601676,
42
- "grad_norm": 0.027717996388673782,
43
- "learning_rate": 0.00011189801699716715,
44
- "loss": 11.9206,
45
- "step": 200
46
- },
47
- {
48
- "epoch": 1.9958071278825997,
49
- "eval_loss": 11.916767120361328,
50
- "eval_runtime": 0.4141,
51
- "eval_samples_per_second": 243.91,
52
- "eval_steps_per_second": 62.789,
53
- "step": 238
54
- },
55
- {
56
- "epoch": 2.0964360587002098,
57
- "grad_norm": 0.03638681024312973,
58
- "learning_rate": 7.64872521246459e-05,
59
- "loss": 12.0807,
60
- "step": 250
61
- },
62
- {
63
- "epoch": 2.5157232704402515,
64
- "grad_norm": 0.029723290354013443,
65
- "learning_rate": 4.107648725212465e-05,
66
- "loss": 11.927,
67
- "step": 300
68
- },
69
- {
70
- "epoch": 2.9350104821802936,
71
- "grad_norm": 0.08462727814912796,
72
- "learning_rate": 5.6657223796034e-06,
73
- "loss": 11.916,
74
- "step": 350
75
- },
76
- {
77
- "epoch": 2.9937106918238996,
78
- "eval_loss": 11.916586875915527,
79
- "eval_runtime": 0.4094,
80
- "eval_samples_per_second": 246.686,
81
- "eval_steps_per_second": 63.503,
82
- "step": 357
83
- },
84
- {
85
- "epoch": 3.0020964360587,
86
- "eval_loss": 11.916298866271973,
87
- "eval_runtime": 0.4304,
88
- "eval_samples_per_second": 234.654,
89
- "eval_steps_per_second": 60.406,
90
- "step": 358
91
  }
92
  ],
93
  "logging_steps": 50,
94
- "max_steps": 358,
95
  "num_input_tokens_seen": 0,
96
- "num_train_epochs": 4,
97
  "save_steps": 500,
98
  "stateful_callbacks": {
99
  "TrainerControl": {
@@ -102,12 +43,12 @@
102
  "should_evaluate": false,
103
  "should_log": false,
104
  "should_save": true,
105
- "should_training_stop": true
106
  },
107
  "attributes": {}
108
  }
109
  },
110
- "total_flos": 402704130048.0,
111
  "train_batch_size": 4,
112
  "trial_name": null,
113
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9979035639412998,
5
  "eval_steps": 500,
6
+ "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.4192872117400419,
13
+ "grad_norm": 0.06033749505877495,
14
+ "learning_rate": 0.00019230769230769233,
15
+ "loss": 11.9275,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.8385744234800838,
20
+ "grad_norm": 0.050237834453582764,
21
+ "learning_rate": 0.0001282051282051282,
22
+ "loss": 11.9214,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.9979035639412998,
27
+ "eval_loss": 11.919645309448242,
28
+ "eval_runtime": 0.4167,
29
+ "eval_samples_per_second": 242.397,
30
+ "eval_steps_per_second": 62.399,
31
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 50,
35
+ "max_steps": 200,
36
  "num_input_tokens_seen": 0,
37
+ "num_train_epochs": 2,
38
  "save_steps": 500,
39
  "stateful_callbacks": {
40
  "TrainerControl": {
 
43
  "should_evaluate": false,
44
  "should_log": false,
45
  "should_save": true,
46
+ "should_training_stop": false
47
  },
48
  "attributes": {}
49
  }
50
  },
51
+ "total_flos": 134180413440.0,
52
  "train_batch_size": 4,
53
  "trial_name": null,
54
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b72ad6f8496316ca67ff95e24c67b4d45a7065ba4f49d3236072eb77840500f
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1138b5d37c58217051b42dbc89cbbffcf2cf2a49a47b4580082cd863c6a61f
3
  size 6776