sgugger commited on
Commit
105fecd
·
1 Parent(s): 0ce343f
README.md CHANGED
@@ -12,7 +12,7 @@ model_index:
12
  dataset:
13
  name: oscar
14
  type: oscar
15
- args: unshuffled_deduplicated_eo
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -39,7 +39,7 @@ More information needed
39
  ### Training hyperparameters
40
 
41
  The following hyperparameters were used during training:
42
- - learning_rate: 2e-05
43
  - train_batch_size: 64
44
  - eval_batch_size: 8
45
  - seed: 42
@@ -54,7 +54,7 @@ The following hyperparameters were used during training:
54
 
55
  ### Framework versions
56
 
57
- - Transformers 4.9.0.dev0
58
  - Pytorch 1.8.1+cu111
59
- - Datasets 1.10.0
60
  - Tokenizers 0.10.3
 
12
  dataset:
13
  name: oscar
14
  type: oscar
15
+ args: unshuffled_original_eo
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
39
  ### Training hyperparameters
40
 
41
  The following hyperparameters were used during training:
42
+ - learning_rate: 5e-05
43
  - train_batch_size: 64
44
  - eval_batch_size: 8
45
  - seed: 42
 
54
 
55
  ### Framework versions
56
 
57
+ - Transformers 4.10.0.dev0
58
  - Pytorch 1.8.1+cu111
59
+ - Datasets 1.10.3.dev0
60
  - Tokenizers 0.10.3
config.json CHANGED
@@ -4,6 +4,7 @@
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
 
7
  "eos_token_id": 2,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
@@ -19,8 +20,8 @@
19
  "pad_token_id": 1,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.9.0.dev0",
23
  "type_vocab_size": 1,
24
  "use_cache": true,
25
- "vocab_size": 52000
26
  }
 
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
  "eos_token_id": 2,
9
  "gradient_checkpointing": false,
10
  "hidden_act": "gelu",
 
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.10.0.dev0",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
26
+ "vocab_size": 12000
27
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3c807f81363ac4ff9d837b962d2cfce5996b9dda1954ba3c1cff64af0b84916
3
- size 334060082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6f078adc7b33e532883c54dc53ee965651b6682f8f5254f4394eeadef79b32
3
+ size 211020082
runs/Jul26_11-51-33_brahms/1627314988.6503177/events.out.tfevents.1627314988.brahms.3059668.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87005082cfb2701aaccb59c963fa802025acb40a37090c216bb14f814c1d155a
3
+ size 4183
runs/Jul26_11-51-33_brahms/events.out.tfevents.1627314988.brahms.3059668.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca719d2d5bdfa3ad093120ae68ea26b3bdaf38eb2c6f17f8c3be9cb228479d0
3
+ size 7961
runs/Jul26_12-33-57_brahms/1627317243.9934251/events.out.tfevents.1627317243.brahms.3059668.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb40655d9d048f84ed463ba554f312c8d9d9b00f98e07b868320104ecd2316fa
3
+ size 4183
runs/Jul26_12-33-57_brahms/events.out.tfevents.1627317243.brahms.3059668.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47e54733e6aeabef6f8e4e303e2aa227eb2b48470c210eba1454b66d8a2b3f3d
3
+ size 8042
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58a4eb8baab968d5ba22542bc7671f464ad303343a2e96119bb7274e7f422553
3
  size 2607
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1b5f0af503374b81916d8aeaf9a450dbe6c2cd02ca8c9489eac1d920265b94
3
  size 2607
vocab.json CHANGED
The diff for this file is too large to render. See raw diff