hrezaei commited on
Commit
af6ef19
·
verified ·
1 Parent(s): cc0dc87

End of training

Browse files
Files changed (5) hide show
  1. README.md +16 -3
  2. all_results.json +12 -12
  3. eval_results.json +7 -7
  4. train_results.json +6 -6
  5. trainer_state.json +11 -11
README.md CHANGED
@@ -2,11 +2,24 @@
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
 
 
5
  metrics:
6
  - accuracy
7
  model-index:
8
  - name: T5LA
9
- results: []
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
15
  [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uoy/llm_training/runs/pzcq293g)
16
  # T5LA
17
 
18
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Accuracy: 0.0322
21
  - Loss: 5.5470
 
22
 
23
  ## Model description
24
 
 
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
+ datasets:
6
+ - HuggingFaceFW/fineweb
7
  metrics:
8
  - accuracy
9
  model-index:
10
  - name: T5LA
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: HuggingFaceFW/fineweb sample-10BT
17
+ type: HuggingFaceFW/fineweb
18
+ args: sample-10BT
19
+ metrics:
20
+ - name: Accuracy
21
+ type: accuracy
22
+ value: 0.0322300343763811
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
  [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/uoy/llm_training/runs/pzcq293g)
29
  # T5LA
30
 
31
+ This model is a fine-tuned version of [](https://huggingface.co/) on the HuggingFaceFW/fineweb sample-10BT dataset.
32
  It achieves the following results on the evaluation set:
 
33
  - Loss: 5.5470
34
+ - Accuracy: 0.0322
35
 
36
  ## Model description
37
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.00001,
3
- "eval_accuracy": 0.03222989830774154,
4
- "eval_loss": 5.5469770431518555,
5
- "eval_runtime": 110.5546,
6
  "eval_samples": 10000,
7
- "eval_samples_per_second": 32.491,
8
- "eval_steps_per_second": 2.035,
9
- "perplexity": 256.46111204397334,
10
- "total_flos": 9.182126159167488e+17,
11
- "train_loss": 5.625401986489168e-05,
12
- "train_runtime": 26.8473,
13
  "train_samples": 1000000,
14
- "train_samples_per_second": 59596.412,
15
- "train_steps_per_second": 3724.776
16
  }
 
1
  {
2
+ "epoch": 2.1069,
3
+ "eval_accuracy": 0.0322300343763811,
4
+ "eval_loss": 5.546974182128906,
5
+ "eval_runtime": 116.7315,
6
  "eval_samples": 10000,
7
+ "eval_samples_per_second": 30.771,
8
+ "eval_steps_per_second": 1.927,
9
+ "perplexity": 256.4603783038958,
10
+ "total_flos": 9.182034338135409e+17,
11
+ "train_loss": 0.0,
12
+ "train_runtime": 866.5464,
13
  "train_samples": 1000000,
14
+ "train_samples_per_second": 3692.82,
15
+ "train_steps_per_second": 230.801
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 1.00001,
3
- "eval_accuracy": 0.03222989830774154,
4
- "eval_loss": 5.5469770431518555,
5
- "eval_runtime": 110.5546,
6
  "eval_samples": 10000,
7
- "eval_samples_per_second": 32.491,
8
- "eval_steps_per_second": 2.035,
9
- "perplexity": 256.46111204397334
10
  }
 
1
  {
2
+ "epoch": 2.1069,
3
+ "eval_accuracy": 0.0322300343763811,
4
+ "eval_loss": 5.546974182128906,
5
+ "eval_runtime": 116.7315,
6
  "eval_samples": 10000,
7
+ "eval_samples_per_second": 30.771,
8
+ "eval_steps_per_second": 1.927,
9
+ "perplexity": 256.4603783038958
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.00001,
3
- "total_flos": 9.182126159167488e+17,
4
- "train_loss": 5.625401986489168e-05,
5
- "train_runtime": 26.8473,
6
  "train_samples": 1000000,
7
- "train_samples_per_second": 59596.412,
8
- "train_steps_per_second": 3724.776
9
  }
 
1
  {
2
+ "epoch": 2.1069,
3
+ "total_flos": 9.182034338135409e+17,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 866.5464,
6
  "train_samples": 1000000,
7
+ "train_samples_per_second": 3692.82,
8
+ "train_steps_per_second": 230.801
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 5.546974182128906,
3
  "best_model_checkpoint": "/users/hr1171/scratch/T5LA/checkpoint-100000",
4
- "epoch": 1.00001,
5
  "eval_steps": 1000,
6
- "global_step": 100001,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2309,17 +2309,17 @@
2309
  "step": 100000
2310
  },
2311
  {
2312
- "epoch": 1.00001,
2313
- "step": 100001,
2314
- "total_flos": 9.182126159167488e+17,
2315
- "train_loss": 5.625401986489168e-05,
2316
- "train_runtime": 26.8473,
2317
- "train_samples_per_second": 59596.412,
2318
- "train_steps_per_second": 3724.776
2319
  }
2320
  ],
2321
  "logging_steps": 500,
2322
- "max_steps": 100000,
2323
  "num_input_tokens_seen": 0,
2324
  "num_train_epochs": 9223372036854775807,
2325
  "save_steps": 500,
@@ -2335,7 +2335,7 @@
2335
  "attributes": {}
2336
  }
2337
  },
2338
- "total_flos": 9.182126159167488e+17,
2339
  "train_batch_size": 8,
2340
  "trial_name": null,
2341
  "trial_params": null
 
1
  {
2
  "best_metric": 5.546974182128906,
3
  "best_model_checkpoint": "/users/hr1171/scratch/T5LA/checkpoint-100000",
4
+ "epoch": 2.1069,
5
  "eval_steps": 1000,
6
+ "global_step": 100000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2309
  "step": 100000
2310
  },
2311
  {
2312
+ "epoch": 2.1069,
2313
+ "step": 100000,
2314
+ "total_flos": 9.182034338135409e+17,
2315
+ "train_loss": 0.0,
2316
+ "train_runtime": 866.5464,
2317
+ "train_samples_per_second": 3692.82,
2318
+ "train_steps_per_second": 230.801
2319
  }
2320
  ],
2321
  "logging_steps": 500,
2322
+ "max_steps": 200000,
2323
  "num_input_tokens_seen": 0,
2324
  "num_train_epochs": 9223372036854775807,
2325
  "save_steps": 500,
 
2335
  "attributes": {}
2336
  }
2337
  },
2338
+ "total_flos": 9.182034338135409e+17,
2339
  "train_batch_size": 8,
2340
  "trial_name": null,
2341
  "trial_params": null