ccore commited on
Commit
4005b60
·
verified ·
1 Parent(s): b25e993

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,32 +1,31 @@
1
  {
2
- "_name_or_path": "ccore/getcode-350m",
 
 
 
3
  "architectures": [
4
- "LlamaForCausalLM"
5
  ],
6
- "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 0,
9
- "eos_token_id": 0,
10
- "head_dim": 64,
11
- "hidden_act": "silu",
12
- "hidden_size": 960,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 2560,
15
- "is_llama_config": true,
16
- "max_position_embeddings": 8192,
17
- "mlp_bias": false,
18
- "model_type": "llama",
19
- "num_attention_heads": 15,
20
- "num_hidden_layers": 32,
21
- "num_key_value_heads": 5,
22
- "pretraining_tp": 1,
23
- "rms_norm_eps": 1e-05,
24
- "rope_interleaved": false,
25
- "rope_scaling": null,
26
- "rope_theta": 100000,
27
- "tie_word_embeddings": true,
28
  "torch_dtype": "float32",
29
  "transformers_version": "4.47.0",
30
  "use_cache": true,
31
- "vocab_size": 49152
 
32
  }
 
1
  {
2
+ "_name_or_path": "facebook/opt-350m",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
  "architectures": [
7
+ "OPTForCausalLM"
8
  ],
 
9
  "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": false,
12
+ "dropout": 0.1,
13
+ "enable_bias": true,
14
+ "eos_token_id": 2,
15
+ "ffn_dim": 4096,
16
+ "hidden_size": 1024,
17
+ "init_std": 0.02,
18
+ "layer_norm_elementwise_affine": true,
19
+ "layerdrop": 0.0,
20
+ "max_position_embeddings": 2048,
21
+ "model_type": "opt",
22
+ "num_attention_heads": 16,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 1,
25
+ "prefix": "</s>",
 
 
 
 
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.47.0",
28
  "use_cache": true,
29
+ "vocab_size": 50272,
30
+ "word_embed_proj_dim": 512
31
  }
last-checkpoint/generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 0,
 
5
  "transformers_version": "4.47.0"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
  "transformers_version": "4.47.0"
7
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ab237cb7a4a12b5913039477bbfb178aac5c8809b3264af22d9311f26c092d4
3
- size 1447317080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf1d2a952d87403172e62da7597cdc8a96002c708f0a5e79d7de04f4584bf8e
3
+ size 1324830880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7587ea383570bccad6d8c4d94834efb8793ba6d29c3ce414358d378cac00fe4
3
- size 2894813242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2edf4f04bf9b4035f407e950f30565e1809557f5a012ef19f0215deee1a206
3
+ size 2649896030
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df28232a1de30d1b227c0efb54de7a0d430617f62ba514c9422d3e8b85d3ced8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c785df7642646aef2a39d5240ed589e73b085e3b34051de846243fbbdb4deb6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbf8f944ea194273642caaf2a78a4ac235bd542ada6976e2803f1735daa8e77
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,112 +1,24 @@
1
  {
2
- "best_metric": 0.344450980424881,
3
- "best_model_checkpoint": "./opt_trained2/checkpoint-803",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 4015,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.6226650062266501,
13
- "grad_norm": 1.3505125045776367,
14
- "learning_rate": 0.000387546699875467,
15
- "loss": 2.7763,
16
- "step": 500
17
- },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 0.344450980424881,
21
- "eval_runtime": 273.127,
22
- "eval_samples_per_second": 10.457,
23
- "eval_steps_per_second": 2.614,
24
- "step": 803
25
- },
26
- {
27
- "epoch": 1.2453300124533002,
28
- "grad_norm": 1.1673122644424438,
29
- "learning_rate": 0.000375093399750934,
30
- "loss": 2.6965,
31
- "step": 1000
32
- },
33
- {
34
- "epoch": 1.86799501867995,
35
- "grad_norm": 0.7170485258102417,
36
- "learning_rate": 0.000362640099626401,
37
- "loss": 2.5469,
38
- "step": 1500
39
- },
40
- {
41
- "epoch": 2.0,
42
- "eval_loss": 0.3459263741970062,
43
- "eval_runtime": 272.8846,
44
- "eval_samples_per_second": 10.466,
45
- "eval_steps_per_second": 2.616,
46
- "step": 1606
47
- },
48
- {
49
- "epoch": 2.4906600249066004,
50
- "grad_norm": 1.186563491821289,
51
- "learning_rate": 0.000350186799501868,
52
- "loss": 2.2782,
53
- "step": 2000
54
- },
55
- {
56
- "epoch": 3.0,
57
- "eval_loss": 0.366046667098999,
58
- "eval_runtime": 272.9944,
59
- "eval_samples_per_second": 10.462,
60
- "eval_steps_per_second": 2.615,
61
- "step": 2409
62
- },
63
- {
64
- "epoch": 3.1133250311332503,
65
- "grad_norm": 1.2075772285461426,
66
- "learning_rate": 0.000337733499377335,
67
- "loss": 2.1429,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 3.7359900373599,
72
- "grad_norm": 1.2370275259017944,
73
- "learning_rate": 0.00032528019925280203,
74
- "loss": 1.7732,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 4.0,
79
- "eval_loss": 0.41189202666282654,
80
- "eval_runtime": 272.6721,
81
- "eval_samples_per_second": 10.474,
82
- "eval_steps_per_second": 2.619,
83
- "step": 3212
84
- },
85
- {
86
- "epoch": 4.35865504358655,
87
- "grad_norm": 1.3971993923187256,
88
- "learning_rate": 0.000312826899128269,
89
- "loss": 1.5169,
90
- "step": 3500
91
- },
92
- {
93
- "epoch": 4.981320049813201,
94
- "grad_norm": 1.8089447021484375,
95
- "learning_rate": 0.000300373599003736,
96
- "loss": 1.3553,
97
- "step": 4000
98
- },
99
- {
100
- "epoch": 5.0,
101
- "eval_loss": 0.5129567384719849,
102
- "eval_runtime": 272.4812,
103
- "eval_samples_per_second": 10.481,
104
- "eval_steps_per_second": 2.62,
105
- "step": 4015
106
  }
107
  ],
108
  "logging_steps": 500,
109
- "max_steps": 16060,
110
  "num_input_tokens_seen": 0,
111
  "num_train_epochs": 20,
112
  "save_steps": 500,
@@ -122,8 +34,8 @@
122
  "attributes": {}
123
  }
124
  },
125
- "total_flos": 1.564360708284e+17,
126
- "train_batch_size": 4,
127
  "trial_name": null,
128
  "trial_params": null
129
  }
 
1
  {
2
+ "best_metric": 2.4022321701049805,
3
+ "best_model_checkpoint": "./opt_trained1/checkpoint-268",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 268,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 2.4022321701049805,
14
+ "eval_runtime": 207.1452,
15
+ "eval_samples_per_second": 13.787,
16
+ "eval_steps_per_second": 1.723,
17
+ "step": 268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
  "logging_steps": 500,
21
+ "max_steps": 5340,
22
  "num_input_tokens_seen": 0,
23
  "num_train_epochs": 20,
24
  "save_steps": 500,
 
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 1.809948514295808e+16,
38
+ "train_batch_size": 12,
39
  "trial_name": null,
40
  "trial_params": null
41
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83ebdc8e79af6a5c9fa4062f9f21aa548e32853f463a515333ea4cf561b7abfc
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5db2222136e05949276be8a807dc409239b47ab761a0c7e53aa25bc62897fc5
3
  size 5368