craa commited on
Commit
6dfa431
·
verified ·
1 Parent(s): 8c62e8b

End of training

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 3.5398
21
- - Accuracy: 0.3757
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 3.5557
21
+ - Accuracy: 0.3699
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -2,15 +2,15 @@
2
  "epoch": 33.76065440149045,
3
  "eval_accuracy": 0.36993376513648857,
4
  "eval_loss": 3.555708646774292,
5
- "eval_runtime": 179.6573,
6
  "eval_samples": 16651,
7
- "eval_samples_per_second": 92.682,
8
- "eval_steps_per_second": 5.794,
9
  "perplexity": 35.012622760624836,
10
  "total_flos": 2.424181507817472e+18,
11
- "train_loss": 0.976803156227901,
12
- "train_runtime": 71873.7484,
13
  "train_samples": 274807,
14
- "train_samples_per_second": 191.173,
15
- "train_steps_per_second": 2.39
16
  }
 
2
  "epoch": 33.76065440149045,
3
  "eval_accuracy": 0.36993376513648857,
4
  "eval_loss": 3.555708646774292,
5
+ "eval_runtime": 178.7494,
6
  "eval_samples": 16651,
7
+ "eval_samples_per_second": 93.153,
8
+ "eval_steps_per_second": 5.824,
9
  "perplexity": 35.012622760624836,
10
  "total_flos": 2.424181507817472e+18,
11
+ "train_loss": 0.97679699680723,
12
+ "train_runtime": 71541.8287,
13
  "train_samples": 274807,
14
+ "train_samples_per_second": 192.06,
15
+ "train_steps_per_second": 2.401
16
  }
eval_results.json CHANGED
@@ -2,9 +2,9 @@
2
  "epoch": 33.76065440149045,
3
  "eval_accuracy": 0.36993376513648857,
4
  "eval_loss": 3.555708646774292,
5
- "eval_runtime": 179.6573,
6
  "eval_samples": 16651,
7
- "eval_samples_per_second": 92.682,
8
- "eval_steps_per_second": 5.794,
9
  "perplexity": 35.012622760624836
10
  }
 
2
  "epoch": 33.76065440149045,
3
  "eval_accuracy": 0.36993376513648857,
4
  "eval_loss": 3.555708646774292,
5
+ "eval_runtime": 178.7494,
6
  "eval_samples": 16651,
7
+ "eval_samples_per_second": 93.153,
8
+ "eval_steps_per_second": 5.824,
9
  "perplexity": 35.012622760624836
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 33.76065440149045,
3
  "total_flos": 2.424181507817472e+18,
4
- "train_loss": 0.976803156227901,
5
- "train_runtime": 71873.7484,
6
  "train_samples": 274807,
7
- "train_samples_per_second": 191.173,
8
- "train_steps_per_second": 2.39
9
  }
 
1
  {
2
  "epoch": 33.76065440149045,
3
  "total_flos": 2.424181507817472e+18,
4
+ "train_loss": 0.97679699680723,
5
+ "train_runtime": 71541.8287,
6
  "train_samples": 274807,
7
+ "train_samples_per_second": 192.06,
8
+ "train_steps_per_second": 2.401
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff