hrezaei commited on
Commit
df5b00c
·
verified ·
1 Parent(s): e4ec7bc

End of training

Browse files
README.md CHANGED
@@ -2,11 +2,24 @@
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
 
 
5
  metrics:
6
  - accuracy
7
  model-index:
8
  - name: T5LA
9
- results: []
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,10 +27,10 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # T5LA
16
 
17
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
 
19
  - Accuracy: 0.0322
20
- - Loss: 5.5470
21
 
22
  ## Model description
23
 
 
2
  library_name: transformers
3
  tags:
4
  - generated_from_trainer
5
+ datasets:
6
+ - HuggingFaceFW/fineweb
7
  metrics:
8
  - accuracy
9
  model-index:
10
  - name: T5LA
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: HuggingFaceFW/fineweb sample-10BT
17
+ type: HuggingFaceFW/fineweb
18
+ args: sample-10BT
19
+ metrics:
20
+ - name: Accuracy
21
+ type: accuracy
22
+ value: 0.032223235792499715
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
27
 
28
  # T5LA
29
 
30
+ This model is a fine-tuned version of [](https://huggingface.co/) on the HuggingFaceFW/fineweb sample-10BT dataset.
31
  It achieves the following results on the evaluation set:
32
+ - Loss: 5.5467
33
  - Accuracy: 0.0322
 
34
 
35
  ## Model description
36
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 2.1069,
3
- "eval_accuracy": 0.03222160319607959,
4
  "eval_loss": 5.5467119216918945,
5
- "eval_runtime": 174.2027,
6
  "eval_samples": 10000,
7
- "eval_samples_per_second": 20.625,
8
- "eval_steps_per_second": 1.292,
9
  "perplexity": 256.3931277119759,
10
  "total_flos": 9.182034338135409e+17,
11
  "train_loss": 0.0,
12
- "train_runtime": 908.1395,
13
  "train_samples": 1000000,
14
- "train_samples_per_second": 3523.688,
15
- "train_steps_per_second": 220.23
16
  }
 
1
  {
2
  "epoch": 2.1069,
3
+ "eval_accuracy": 0.032223235792499715,
4
  "eval_loss": 5.5467119216918945,
5
+ "eval_runtime": 132.3636,
6
  "eval_samples": 10000,
7
+ "eval_samples_per_second": 27.145,
8
+ "eval_steps_per_second": 1.7,
9
  "perplexity": 256.3931277119759,
10
  "total_flos": 9.182034338135409e+17,
11
  "train_loss": 0.0,
12
+ "train_runtime": 1033.1844,
13
  "train_samples": 1000000,
14
+ "train_samples_per_second": 3097.221,
15
+ "train_steps_per_second": 193.576
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 2.1069,
3
- "eval_accuracy": 0.03222160319607959,
4
  "eval_loss": 5.5467119216918945,
5
- "eval_runtime": 174.2027,
6
  "eval_samples": 10000,
7
- "eval_samples_per_second": 20.625,
8
- "eval_steps_per_second": 1.292,
9
  "perplexity": 256.3931277119759
10
  }
 
1
  {
2
  "epoch": 2.1069,
3
+ "eval_accuracy": 0.032223235792499715,
4
  "eval_loss": 5.5467119216918945,
5
+ "eval_runtime": 132.3636,
6
  "eval_samples": 10000,
7
+ "eval_samples_per_second": 27.145,
8
+ "eval_steps_per_second": 1.7,
9
  "perplexity": 256.3931277119759
10
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 2.1069,
3
  "total_flos": 9.182034338135409e+17,
4
  "train_loss": 0.0,
5
- "train_runtime": 908.1395,
6
  "train_samples": 1000000,
7
- "train_samples_per_second": 3523.688,
8
- "train_steps_per_second": 220.23
9
  }
 
2
  "epoch": 2.1069,
3
  "total_flos": 9.182034338135409e+17,
4
  "train_loss": 0.0,
5
+ "train_runtime": 1033.1844,
6
  "train_samples": 1000000,
7
+ "train_samples_per_second": 3097.221,
8
+ "train_steps_per_second": 193.576
9
  }
trainer_state.json CHANGED
@@ -2313,9 +2313,9 @@
2313
  "step": 100000,
2314
  "total_flos": 9.182034338135409e+17,
2315
  "train_loss": 0.0,
2316
- "train_runtime": 908.1395,
2317
- "train_samples_per_second": 3523.688,
2318
- "train_steps_per_second": 220.23
2319
  }
2320
  ],
2321
  "logging_steps": 500,
 
2313
  "step": 100000,
2314
  "total_flos": 9.182034338135409e+17,
2315
  "train_loss": 0.0,
2316
+ "train_runtime": 1033.1844,
2317
+ "train_samples_per_second": 3097.221,
2318
+ "train_steps_per_second": 193.576
2319
  }
2320
  ],
2321
  "logging_steps": 500,