bart1259 commited on
Commit
4e321bc
·
verified ·
1 Parent(s): f5055c3

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. hyperparameters.json +1 -0
  3. tokenizer.json +28 -2
README.md CHANGED
@@ -7,12 +7,12 @@ library_name: transformers
7
  Chain of Thought (CoT) transformer model trained to do multi-step integer arithmetic.
8
 
9
  Model details:
10
- - **Vocabulary Size**: 35 (Character Tokenization)
11
  - **Layer Count**: 8
12
  - **Attention Head Count**: 4
13
  - **Residual Stream Size**: 256
14
  - **Context Length**: 256
15
- - **Tokens Trained on**: 419,649,024
16
 
17
  Training Score During Training
18
 
 
7
  Chain of Thought (CoT) transformer model trained to do multi-step integer arithmetic.
8
 
9
  Model details:
10
+ - **Vocabulary Size**: 40 (Character Tokenization)
11
  - **Layer Count**: 8
12
  - **Attention Head Count**: 4
13
  - **Residual Stream Size**: 256
14
  - **Context Length**: 256
15
+ - **Tokens Trained on**: 419,612,160
16
 
17
  Training Score During Training
18
 
hyperparameters.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"MIN_DIFFICULTY": 2, "MAX_DIFFICULTY": 4, "TRAINING_SAMPLES": 3000000, "CONTEXT_LENGTH": 256, "RESIDUAL_EMBEDDING_SIZE": 256, "MLP_EMBEDDING_SIZE": 1024, "NUM_ATTENTION_HEADS": 4, "NUM_LAYERS": 8, "VOCAB_SIZE": 40, "TOTAL_TOKENS": 419612160}
tokenizer.json CHANGED
@@ -112,8 +112,34 @@
112
  "t": 36,
113
  "u": 37,
114
  "Ċ": 38,
115
- "Ġ": 39
 
 
 
 
 
116
  },
117
- "merges": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  }
119
  }
 
112
  "t": 36,
113
  "u": 37,
114
  "Ċ": 38,
115
+ "Ġ": 39,
116
+ "Ġ-": 40,
117
+ "(-": 41,
118
+ "Ġ1": 42,
119
+ "St": 43,
120
+ "ep": 44
121
  },
122
+ "merges": [
123
+ [
124
+ "Ġ",
125
+ "-"
126
+ ],
127
+ [
128
+ "(",
129
+ "-"
130
+ ],
131
+ [
132
+ "Ġ",
133
+ "1"
134
+ ],
135
+ [
136
+ "S",
137
+ "t"
138
+ ],
139
+ [
140
+ "e",
141
+ "p"
142
+ ]
143
+ ]
144
  }
145
  }