izzcw commited on
Commit
b69fd0f
·
verified ·
1 Parent(s): 166c92b

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: llama3
4
  base_model: meta-llama/Meta-Llama-3-70B-Instruct
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: filtered_crafting_train_data
@@ -15,7 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # filtered_crafting_train_data
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) on the None dataset.
 
 
19
 
20
  ## Model description
21
 
 
4
  base_model: meta-llama/Meta-Llama-3-70B-Instruct
5
  tags:
6
  - llama-factory
7
+ - lora
8
  - generated_from_trainer
9
  model-index:
10
  - name: filtered_crafting_train_data
 
16
 
17
  # filtered_crafting_train_data
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) on the identity and the filtered_crafting_train_data datasets.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.3454
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.3438876271247864,
4
- "eval_runtime": 15.1284,
5
  "eval_samples_per_second": 4.891,
6
  "eval_steps_per_second": 0.661,
7
  "total_flos": 2776730917928960.0,
8
- "train_loss": 0.6446950520787921,
9
- "train_runtime": 2311.769,
10
- "train_samples_per_second": 1.549,
11
  "train_steps_per_second": 0.012
12
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.3453544080257416,
4
+ "eval_runtime": 15.13,
5
  "eval_samples_per_second": 4.891,
6
  "eval_steps_per_second": 0.661,
7
  "total_flos": 2776730917928960.0,
8
+ "train_loss": 0.6437431148120335,
9
+ "train_runtime": 2315.5194,
10
+ "train_samples_per_second": 1.547,
11
  "train_steps_per_second": 0.012
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.3438876271247864,
4
- "eval_runtime": 15.1284,
5
  "eval_samples_per_second": 4.891,
6
  "eval_steps_per_second": 0.661
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.3453544080257416,
4
+ "eval_runtime": 15.13,
5
  "eval_samples_per_second": 4.891,
6
  "eval_steps_per_second": 0.661
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2776730917928960.0,
4
- "train_loss": 0.6446950520787921,
5
- "train_runtime": 2311.769,
6
- "train_samples_per_second": 1.549,
7
  "train_steps_per_second": 0.012
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2776730917928960.0,
4
+ "train_loss": 0.6437431148120335,
5
+ "train_runtime": 2315.5194,
6
+ "train_samples_per_second": 1.547,
7
  "train_steps_per_second": 0.012
8
  }
trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.35714285714285715,
13
- "grad_norm": 0.8919971977009766,
14
  "learning_rate": 8.18711994874345e-05,
15
- "loss": 0.8992,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.7142857142857143,
20
- "grad_norm": 0.5649379015998761,
21
  "learning_rate": 2.3208660251050158e-05,
22
- "loss": 0.549,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.0,
27
  "step": 28,
28
  "total_flos": 2776730917928960.0,
29
- "train_loss": 0.6446950520787921,
30
- "train_runtime": 2311.769,
31
- "train_samples_per_second": 1.549,
32
  "train_steps_per_second": 0.012
33
  }
34
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.35714285714285715,
13
+ "grad_norm": 0.4399825973383531,
14
  "learning_rate": 8.18711994874345e-05,
15
+ "loss": 0.9002,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.7142857142857143,
20
+ "grad_norm": 0.4417428627206288,
21
  "learning_rate": 2.3208660251050158e-05,
22
+ "loss": 0.5484,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.0,
27
  "step": 28,
28
  "total_flos": 2776730917928960.0,
29
+ "train_loss": 0.6437431148120335,
30
+ "train_runtime": 2315.5194,
31
+ "train_samples_per_second": 1.547,
32
  "train_steps_per_second": 0.012
33
  }
34
  ],
training_loss.png CHANGED