iproskurina commited on
Commit
e78d092
·
verified ·
1 Parent(s): fd4c7a1

Model save

Browse files
README.md CHANGED
@@ -4,6 +4,8 @@ license: llama3.2
4
  base_model: meta-llama/Llama-3.2-1B
5
  tags:
6
  - generated_from_trainer
 
 
7
  model-index:
8
  - name: test
9
  results: []
@@ -15,6 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
15
  # test
16
 
17
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
 
 
 
18
 
19
  ## Model description
20
 
 
4
  base_model: meta-llama/Llama-3.2-1B
5
  tags:
6
  - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
  model-index:
10
  - name: test
11
  results: []
 
17
  # test
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 2.6568
22
+ - Accuracy: 0.5230
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,16 +1,15 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.5619722863264766,
4
- "eval_loss": 2.4340596199035645,
5
- "eval_runtime": 13.228,
6
- "eval_samples": 1138,
7
- "eval_samples_per_second": 86.03,
8
- "eval_steps_per_second": 10.81,
9
- "perplexity": 11.405088550670825,
10
- "total_flos": 1.3626163133939712e+16,
11
- "train_loss": 2.4788927044784814,
12
- "train_runtime": 196.8793,
13
- "train_samples": 4558,
14
- "train_samples_per_second": 23.151,
15
- "train_steps_per_second": 5.79
16
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.5230333635625984,
4
+ "eval_loss": 2.656813859939575,
5
+ "eval_runtime": 59.8639,
6
+ "eval_samples_per_second": 85.611,
7
+ "eval_steps_per_second": 10.708,
8
+ "perplexity": 14.250811606566634,
9
+ "total_flos": 1695049253388288.0,
10
+ "train_loss": 2.6765631017550615,
11
+ "train_runtime": 106.623,
12
+ "train_samples": 567,
13
+ "train_samples_per_second": 5.318,
14
+ "train_steps_per_second": 1.332
 
15
  }
eval_epoch_1_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.5619722863264766,
4
- "eval_loss": 2.4340596199035645,
5
- "eval_runtime": 13.1565,
6
- "eval_samples_per_second": 86.497,
7
- "eval_steps_per_second": 10.869,
8
- "perplexity": 11.405088550670825
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_accuracy": 0.5230333635625984,
4
+ "eval_loss": 2.656813859939575,
5
+ "eval_runtime": 59.8639,
6
+ "eval_samples_per_second": 85.611,
7
+ "eval_steps_per_second": 10.708,
8
+ "perplexity": 14.250811606566634
9
  }
special_tokens_map.json CHANGED
@@ -12,5 +12,6 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
- }
 
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
  }
tokenizer_config.json CHANGED
@@ -2058,5 +2058,6 @@
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
2061
+ "pad_token": "<|end_of_text|>",
2062
  "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
train_epoch_1_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 1.3626163133939712e+16,
4
- "train_loss": 2.4788927044784814,
5
- "train_runtime": 196.8793,
6
- "train_samples": 4558,
7
- "train_samples_per_second": 23.151,
8
- "train_steps_per_second": 5.79
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 1695049253388288.0,
4
+ "train_loss": 2.6765631017550615,
5
+ "train_runtime": 106.623,
6
+ "train_samples": 567,
7
+ "train_samples_per_second": 5.318,
8
+ "train_steps_per_second": 1.332
9
  }
trainer_state.json CHANGED
@@ -4,37 +4,32 @@
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
- "global_step": 1140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.43859649122807015,
14
- "grad_norm": 17.125,
15
- "learning_rate": 5.622807017543859e-07,
16
- "loss": 2.5341,
17
- "step": 500
18
- },
19
- {
20
- "epoch": 0.8771929824561403,
21
- "grad_norm": 17.75,
22
- "learning_rate": 1.2368421052631579e-07,
23
- "loss": 2.4372,
24
- "step": 1000
25
  },
26
  {
27
  "epoch": 1.0,
28
- "eval_accuracy": 0.5619722863264766,
29
- "eval_loss": 2.4340596199035645,
30
- "eval_runtime": 13.1971,
31
- "eval_samples_per_second": 86.231,
32
- "eval_steps_per_second": 10.836,
33
- "step": 1140
34
  }
35
  ],
36
  "logging_steps": 500,
37
- "max_steps": 1140,
38
  "num_input_tokens_seen": 0,
39
  "num_train_epochs": 1,
40
  "save_steps": 500,
@@ -50,7 +45,7 @@
50
  "attributes": {}
51
  }
52
  },
53
- "total_flos": 1.3626163133939712e+16,
54
  "train_batch_size": 4,
55
  "trial_name": null,
56
  "trial_params": null
 
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 142,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5230333635625984,
15
+ "eval_loss": 2.656813859939575,
16
+ "eval_runtime": 59.8708,
17
+ "eval_samples_per_second": 85.601,
18
+ "eval_steps_per_second": 10.706,
19
+ "step": 142
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 1.0,
23
+ "step": 142,
24
+ "total_flos": 1695049253388288.0,
25
+ "train_loss": 2.6765631017550615,
26
+ "train_runtime": 106.623,
27
+ "train_samples_per_second": 5.318,
28
+ "train_steps_per_second": 1.332
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 142,
33
  "num_input_tokens_seen": 0,
34
  "num_train_epochs": 1,
35
  "save_steps": 500,
 
45
  "attributes": {}
46
  }
47
  },
48
+ "total_flos": 1695049253388288.0,
49
  "train_batch_size": 4,
50
  "trial_name": null,
51
  "trial_params": null