izzcw commited on
Commit
74bc8e0
·
verified ·
1 Parent(s): 608ccbd

End of training

Browse files
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # llama3_70b_lora_sft_cooking
18
 
19
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3289
22
 
23
  ## Model description
24
 
 
16
 
17
  # llama3_70b_lora_sft_cooking
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) on the identity and the filtered_cooking_train_data datasets.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3297
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 0.9552238805970149,
3
- "eval_loss": 0.8879265189170837,
4
- "eval_runtime": 4.3461,
5
- "eval_samples_per_second": 5.062,
6
- "eval_steps_per_second": 0.69,
7
- "total_flos": 706712843583488.0,
8
- "train_loss": 0.7195637822151184,
9
- "train_runtime": 712.756,
10
- "train_samples_per_second": 1.5,
11
- "train_steps_per_second": 0.011
12
  }
 
1
  {
2
+ "epoch": 0.9959839357429718,
3
+ "eval_loss": 0.32974663376808167,
4
+ "eval_runtime": 16.1177,
5
+ "eval_samples_per_second": 5.088,
6
+ "eval_steps_per_second": 0.682,
7
+ "total_flos": 2886883717677056.0,
8
+ "train_loss": 0.46779590124084103,
9
+ "train_runtime": 2530.0595,
10
+ "train_samples_per_second": 1.575,
11
+ "train_steps_per_second": 0.012
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.9552238805970149,
3
- "eval_loss": 0.8879265189170837,
4
- "eval_runtime": 4.3461,
5
- "eval_samples_per_second": 5.062,
6
- "eval_steps_per_second": 0.69
7
  }
 
1
  {
2
+ "epoch": 0.9959839357429718,
3
+ "eval_loss": 0.32974663376808167,
4
+ "eval_runtime": 16.1177,
5
+ "eval_samples_per_second": 5.088,
6
+ "eval_steps_per_second": 0.682
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.9552238805970149,
3
- "total_flos": 706712843583488.0,
4
- "train_loss": 0.7195637822151184,
5
- "train_runtime": 712.756,
6
- "train_samples_per_second": 1.5,
7
- "train_steps_per_second": 0.011
8
  }
 
1
  {
2
+ "epoch": 0.9959839357429718,
3
+ "total_flos": 2886883717677056.0,
4
+ "train_loss": 0.46779590124084103,
5
+ "train_runtime": 2530.0595,
6
+ "train_samples_per_second": 1.575,
7
+ "train_steps_per_second": 0.012
8
  }
trainer_state.json CHANGED
@@ -1,25 +1,54 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9552238805970149,
5
  "eval_steps": 30,
6
- "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9552238805970149,
13
- "step": 8,
14
- "total_flos": 706712843583488.0,
15
- "train_loss": 0.7195637822151184,
16
- "train_runtime": 712.756,
17
- "train_samples_per_second": 1.5,
18
- "train_steps_per_second": 0.011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 10,
22
- "max_steps": 8,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 1,
25
  "save_steps": 500,
@@ -35,7 +64,7 @@
35
  "attributes": {}
36
  }
37
  },
38
- "total_flos": 706712843583488.0,
39
  "train_batch_size": 1,
40
  "trial_name": null,
41
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9959839357429718,
5
  "eval_steps": 30,
6
+ "global_step": 31,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.321285140562249,
13
+ "grad_norm": 0.5042463327687016,
14
+ "learning_rate": 8.83022221559489e-05,
15
+ "loss": 0.668,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.642570281124498,
20
+ "grad_norm": 0.4073430870409904,
21
+ "learning_rate": 3.5659838364445505e-05,
22
+ "loss": 0.4073,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.963855421686747,
27
+ "grad_norm": 0.3439649236092943,
28
+ "learning_rate": 3.380821129028489e-07,
29
+ "loss": 0.3531,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.963855421686747,
34
+ "eval_loss": 0.32887399196624756,
35
+ "eval_runtime": 16.2654,
36
+ "eval_samples_per_second": 5.041,
37
+ "eval_steps_per_second": 0.676,
38
+ "step": 30
39
+ },
40
+ {
41
+ "epoch": 0.9959839357429718,
42
+ "step": 31,
43
+ "total_flos": 2886883717677056.0,
44
+ "train_loss": 0.46779590124084103,
45
+ "train_runtime": 2530.0595,
46
+ "train_samples_per_second": 1.575,
47
+ "train_steps_per_second": 0.012
48
  }
49
  ],
50
  "logging_steps": 10,
51
+ "max_steps": 31,
52
  "num_input_tokens_seen": 0,
53
  "num_train_epochs": 1,
54
  "save_steps": 500,
 
64
  "attributes": {}
65
  }
66
  },
67
+ "total_flos": 2886883717677056.0,
68
  "train_batch_size": 1,
69
  "trial_name": null,
70
  "trial_params": null
training_eval_loss.png ADDED
training_loss.png ADDED