jnjj commited on
Commit
cdfe97d
·
verified ·
1 Parent(s): 98ecf5e

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. config.json +3 -3
  3. model.safetensors +2 -2
  4. tokenizer.json +2 -2
  5. training_args.bin +1 -1
README.md CHANGED
@@ -34,11 +34,11 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.00015
37
- - train_batch_size: 4
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - gradient_accumulation_steps: 2
41
- - total_train_batch_size: 8
42
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: cosine
44
  - num_epochs: 2
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.00015
37
+ - train_batch_size: 1
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - gradient_accumulation_steps: 2
41
+ - total_train_batch_size: 2
42
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
43
  - lr_scheduler_type: cosine
44
  - num_epochs: 2
config.json CHANGED
@@ -15,9 +15,9 @@
15
  "max_position_embeddings": 256,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
- "num_attention_heads": 4,
19
- "num_hidden_layers": 16,
20
- "num_key_value_heads": 4,
21
  "pad_token_id": 128009,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
 
15
  "max_position_embeddings": 256,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
+ "num_attention_heads": 1,
19
+ "num_hidden_layers": 1,
20
+ "num_key_value_heads": 1,
21
  "pad_token_id": 128009,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba19e7df94308ef945d40998187159d3fdd4f54975797d693642fc91b16d6ce1
3
- size 595395792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4851b4dbe8abaa173084fcabde68d36cc2663e327dfe0db8abde976d3c334933
3
+ size 404233304
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ff5472d095ccd9332d9e723153d7bc7226cb6be9c1bffda738b5ba2e71bf26
3
+ size 17210084
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07ce2eedabd6ff5a715d1500ec555599f6649aafe0e1fd4410881b8523e8c1d2
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b912cab2de08013eb943323e08c88da607712fa3e9865005308152fecab7b93e
3
  size 5368