SystemAdmin123 commited on
Commit
2616958
·
verified ·
1 Parent(s): 609a60c

Training in progress, step 200

Browse files
Files changed (3) hide show
  1. axolotl_config.yaml +9 -7
  2. model.safetensors +1 -1
  3. training_args.bin +1 -1
axolotl_config.yaml CHANGED
@@ -1,5 +1,5 @@
1
  base_model: unsloth/SmolLM-360M-Instruct
2
- batch_size: 32
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
@@ -17,27 +17,29 @@ device_map: auto
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
- gpu_memory_limit: 80GiB
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/SmolLM-360M-Instruct
23
  hub_strategy: checkpoint
24
  learning_rate: 0.0002
25
  logging_steps: 10
26
  lr_scheduler: cosine
27
- max_steps: 2500
28
- micro_batch_size: 4
29
  model_type: AutoModelForCausalLM
30
  num_epochs: 100
31
  optimizer: adamw_bnb_8bit
32
- output_dir: /root/.sn56/axolotl/outputs/SmolLM-360M-Instruct
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
- sample_packing: false
36
- save_steps: 400
37
  save_total_limit: 1
38
  sequence_len: 2048
39
  tokenizer_type: GPT2TokenizerFast
40
  torch_dtype: bf16
 
 
41
  trust_remote_code: true
42
  val_set_size: 0.1
43
  wandb_entity: ''
 
1
  base_model: unsloth/SmolLM-360M-Instruct
2
+ batch_size: 92
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
 
17
  eval_sample_packing: false
18
  eval_steps: 200
19
  flash_attention: true
20
+ gradient_checkpointing: true
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/SmolLM-360M-Instruct
23
  hub_strategy: checkpoint
24
  learning_rate: 0.0002
25
  logging_steps: 10
26
  lr_scheduler: cosine
27
+ max_steps: 10000
28
+ micro_batch_size: 23
29
  model_type: AutoModelForCausalLM
30
  num_epochs: 100
31
  optimizer: adamw_bnb_8bit
32
+ output_dir: /root/.sn56/axolotl/tmp/SmolLM-360M-Instruct
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
+ sample_packing: true
36
+ save_steps: 200
37
  save_total_limit: 1
38
  sequence_len: 2048
39
  tokenizer_type: GPT2TokenizerFast
40
  torch_dtype: bf16
41
+ training_args_kwargs:
42
+ hub_private_repo: true
43
  trust_remote_code: true
44
  val_set_size: 0.1
45
  wandb_entity: ''
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4dc1cc1cc9b54bfe6d9ce46c6c48d5e71549cfb18e37c573da51b96a6b7c6fc
3
  size 723674912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089ab77c575af96a66c3eebde29eb20b4802b1ca35d7fcd6a82088b5dfc73176
3
  size 723674912
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c005ee5b7b542fe5c31ab145aa5cb83513af587ae4027e5fb31ac777061c2a2
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dcec2cd7965a72fb6ad084d768d0c5ed5e5db711cd1f0987f9e8de610b55004
3
  size 6840