advy commited on
Commit
7026c86
·
verified ·
1 Parent(s): a989beb

Finetune on MentalChat16K - eval_loss: 0.6693

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. training_metrics.json +46 -0
README.md CHANGED
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.7052
23
 
24
  ## Model description
25
 
 
19
 
20
  This model is a fine-tuned version of [meta-llama/Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.6542
23
 
24
  ## Model description
25
 
training_metrics.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "llama71b-mental-health",
3
+ "base_model": "meta-llama/Llama-3.1-70B-Instruct",
4
+ "dataset": "ShenLab/MentalChat16K",
5
+ "lora_config": {
6
+ "rank": 64,
7
+ "alpha": 128,
8
+ "target_modules": [
9
+ "q_proj",
10
+ "k_proj",
11
+ "v_proj",
12
+ "o_proj",
13
+ "gate_proj",
14
+ "up_proj",
15
+ "down_proj"
16
+ ],
17
+ "dropout": 0.1
18
+ },
19
+ "training": {
20
+ "final_train_loss": 0.5772495049900479,
21
+ "total_steps": 1800,
22
+ "epochs": 3,
23
+ "learning_rate": 8e-05,
24
+ "per_device_batch_size": 1,
25
+ "gradient_accumulation": 8
26
+ },
27
+ "evaluation": {
28
+ "eval_loss": 0.6692664623260498,
29
+ "eval_runtime": 354.8904,
30
+ "eval_samples_per_second": 1.33,
31
+ "eval_steps_per_second": 1.33,
32
+ "epoch": 2.691228726388629
33
+ },
34
+ "test_eval": {
35
+ "eval_loss": 0.6542104482650757,
36
+ "eval_runtime": 355.8479,
37
+ "eval_samples_per_second": 1.326,
38
+ "eval_steps_per_second": 1.326,
39
+ "epoch": 2.691228726388629
40
+ },
41
+ "dataset_stats": {
42
+ "train_size": 5347,
43
+ "val_size": 472,
44
+ "test_size": 472
45
+ }
46
+ }