besimray commited on
Commit
32289a2
·
verified ·
1 Parent(s): 91ebd82

End of training

Browse files
Files changed (3) hide show
  1. README.md +21 -12
  2. adapter_model.bin +1 -1
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -24,8 +24,16 @@ bf16: auto
24
  chat_template: llama3
25
  dataset_prepared_path: null
26
  datasets:
27
- - path: mhenrichsen/alpaca_2k_test
28
- type: alpaca
 
 
 
 
 
 
 
 
29
  debug: null
30
  deepspeed: null
31
  early_stopping_patience: 3
@@ -57,7 +65,7 @@ lora_target_linear: true
57
  lr_scheduler: cosine
58
  max_steps: 150
59
  micro_batch_size: 10
60
- mlflow_experiment_name: mhenrichsen/alpaca_2k_test
61
  model_type: LlamaForCausalLM
62
  num_epochs: 5
63
  optimizer: adamw_bnb_8bit
@@ -78,7 +86,7 @@ wandb_entity: besimray24-rayon
78
  wandb_mode: online
79
  wandb_project: Public_TuningSN
80
  wandb_run: miner_id_24
81
- wandb_runid: 383a850e-bb15-45a2-8f4b-fc96eb001a74
82
  warmup_steps: 10
83
  weight_decay: 0.01
84
  xformers_attention: null
@@ -91,7 +99,7 @@ xformers_attention: null
91
 
92
  This model is a fine-tuned version of [unsloth/Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct) on the None dataset.
93
  It achieves the following results on the evaluation set:
94
- - Loss: 1.1679
95
 
96
  ## Model description
97
 
@@ -125,13 +133,14 @@ The following hyperparameters were used during training:
125
 
126
  | Training Loss | Epoch | Step | Validation Loss |
127
  |:-------------:|:------:|:----:|:---------------:|
128
- | 1.3028 | 0.0211 | 1 | 1.2579 |
129
- | 1.3521 | 0.4211 | 20 | 1.1702 |
130
- | 1.1977 | 0.8421 | 40 | 1.1533 |
131
- | 1.099 | 1.2632 | 60 | 1.1519 |
132
- | 1.0658 | 1.6842 | 80 | 1.1523 |
133
- | 1.0091 | 2.1053 | 100 | 1.1575 |
134
- | 1.1045 | 2.5263 | 120 | 1.1679 |
 
135
 
136
 
137
  ### Framework versions
 
24
  chat_template: llama3
25
  dataset_prepared_path: null
26
  datasets:
27
+ - data_files:
28
+ - MATH-Hard_train_data.json
29
+ ds_type: json
30
+ path: /workspace/input_data/MATH-Hard_train_data.json
31
+ type:
32
+ field_input: problem
33
+ field_instruction: type
34
+ field_output: solution
35
+ system_format: '{system}'
36
+ system_prompt: ''
37
  debug: null
38
  deepspeed: null
39
  early_stopping_patience: 3
 
65
  lr_scheduler: cosine
66
  max_steps: 150
67
  micro_batch_size: 10
68
+ mlflow_experiment_name: /tmp/MATH-Hard_train_data.json
69
  model_type: LlamaForCausalLM
70
  num_epochs: 5
71
  optimizer: adamw_bnb_8bit
 
86
  wandb_mode: online
87
  wandb_project: Public_TuningSN
88
  wandb_run: miner_id_24
89
+ wandb_runid: efa31d17-18f5-4448-b1b4-f65721354910
90
  warmup_steps: 10
91
  weight_decay: 0.01
92
  xformers_attention: null
 
99
 
100
  This model is a fine-tuned version of [unsloth/Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct) on the None dataset.
101
  It achieves the following results on the evaluation set:
102
+ - Loss: 0.7629
103
 
104
  ## Model description
105
 
 
133
 
134
  | Training Loss | Epoch | Step | Validation Loss |
135
  |:-------------:|:------:|:----:|:---------------:|
136
+ | 0.9595 | 0.0129 | 1 | 0.9746 |
137
+ | 0.9359 | 0.2572 | 20 | 0.8169 |
138
+ | 0.7895 | 0.5145 | 40 | 0.7905 |
139
+ | 0.7524 | 0.7717 | 60 | 0.7785 |
140
+ | 0.8127 | 1.0289 | 80 | 0.7698 |
141
+ | 0.7258 | 1.2862 | 100 | 0.7665 |
142
+ | 0.8212 | 1.5434 | 120 | 0.7636 |
143
+ | 0.6807 | 1.8006 | 140 | 0.7629 |
144
 
145
 
146
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06f5758c8cb21c14363a1d12df2489b3350d89721ddf69a0a83c34d8d1b99ba6
3
  size 45169354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27b15e9ee1833ae31432908afae3349855d2b7e578bcca141463bb3dcf1c209
3
  size 45169354
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9ac5f431e68bc0be3e94d65a484f12f766b83dc919f1a2a7192b4fa03404081
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f124971f6e45e066092c45814571663abeeb1c7c37d9b8a60eedfc0a06e172
3
  size 45118424