shnl commited on
Commit
6e44fea
·
verified ·
1 Parent(s): e80f0da

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. model.safetensors +1 -1
  2. trainer_state.json +39 -6
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:beef18c1806ad7e3fcce32919e58436d6e4efcab7dd824d59efb93cf28eb5660
3
  size 988097824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1080eca2e2b2303df23691e909bc44bf473c367036322fbcadfdec34865aa63
3
  size 988097824
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 2000,
3
- "best_metric": 0.15754735469818115,
4
- "best_model_checkpoint": "check-point-qwen2.5-bags/checkpoint-2000",
5
- "epoch": 0.479415113561455,
6
  "eval_steps": 2000,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -19,6 +19,39 @@
19
  "eval_samples_per_second": 86.784,
20
  "eval_steps_per_second": 21.696,
21
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
23
  ],
24
  "logging_steps": 8344,
@@ -38,7 +71,7 @@
38
  "attributes": {}
39
  }
40
  },
41
- "total_flos": 7.0365632004096e+16,
42
  "train_batch_size": 4,
43
  "trial_name": null,
44
  "trial_params": null
 
1
  {
2
+ "best_global_step": 8000,
3
+ "best_metric": 0.13777850568294525,
4
+ "best_model_checkpoint": "check-point-qwen2.5-bags/checkpoint-8000",
5
+ "epoch": 1.9176005273566248,
6
  "eval_steps": 2000,
7
+ "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
19
  "eval_samples_per_second": 86.784,
20
  "eval_steps_per_second": 21.696,
21
  "step": 2000
22
+ },
23
+ {
24
+ "epoch": 0.95883022712291,
25
+ "eval_entropy": 0.1454699612403101,
26
+ "eval_loss": 0.14876286685466766,
27
+ "eval_mean_token_accuracy": 0.954681836819464,
28
+ "eval_num_tokens": 65536000.0,
29
+ "eval_runtime": 11.8969,
30
+ "eval_samples_per_second": 86.745,
31
+ "eval_steps_per_second": 21.686,
32
+ "step": 4000
33
+ },
34
+ {
35
+ "epoch": 1.43818541379517,
36
+ "eval_entropy": 0.1354526253633721,
37
+ "eval_loss": 0.14411340653896332,
38
+ "eval_mean_token_accuracy": 0.9560297101505043,
39
+ "eval_num_tokens": 98297856.0,
40
+ "eval_runtime": 11.9057,
41
+ "eval_samples_per_second": 86.682,
42
+ "eval_steps_per_second": 21.67,
43
+ "step": 6000
44
+ },
45
+ {
46
+ "epoch": 1.9176005273566248,
47
+ "eval_entropy": 0.13540720385174418,
48
+ "eval_loss": 0.13777850568294525,
49
+ "eval_mean_token_accuracy": 0.9574325225611989,
50
+ "eval_num_tokens": 131065856.0,
51
+ "eval_runtime": 11.8969,
52
+ "eval_samples_per_second": 86.745,
53
+ "eval_steps_per_second": 21.686,
54
+ "step": 8000
55
  }
56
  ],
57
  "logging_steps": 8344,
 
71
  "attributes": {}
72
  }
73
  },
74
+ "total_flos": 2.8144933446038323e+17,
75
  "train_batch_size": 4,
76
  "trial_name": null,
77
  "trial_params": null