File size: 5,382 Bytes
a88f8a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
{
  "version": "0.3.8",
  "rust_available": true,
  "cpu_count": 128,
  "samples": 10000,
  "total_bytes": 4353919,
  "thread_counts": [
    1,
    2,
    4,
    8
  ],
  "runs": 3,
  "results": {
    "SARFTokenizer (HF)": {
      "name": "SARFTokenizer (HF)",
      "type": "sarf",
      "vocab_size": 64641,
      "benchmarks": [
        {
          "threads": 1,
          "time_sec": 1.2484327759593725,
          "texts_per_sec": 8010.04282534587,
          "bytes_per_sec": 3487507.7648087065,
          "mb_per_sec": 3.325946583565432
        },
        {
          "threads": 2,
          "time_sec": 0.24223483664294085,
          "texts_per_sec": 41282.25377731365,
          "bytes_per_sec": 17973958.908386767,
          "mb_per_sec": 17.141302975069777,
          "speedup": 5.1538118681070975
        },
        {
          "threads": 4,
          "time_sec": 0.25055884880324203,
          "texts_per_sec": 39910.78362533811,
          "bytes_per_sec": 17376831.91312485,
          "mb_per_sec": 16.57183829605565,
          "speedup": 4.982593039209473
        },
        {
          "threads": 8,
          "time_sec": 0.23175926196078458,
          "texts_per_sec": 43148.22163047825,
          "bytes_per_sec": 18786386.197315022,
          "mb_per_sec": 17.916094014468214,
          "speedup": 5.38676541078482
        }
      ],
      "memory": {
        "current_mb": 6.103515625e-05,
        "peak_mb": 0.2074756622314453,
        "texts_processed": 10000
      }
    },
    "SARFTokenizer (Local)": {
      "name": "SARFTokenizer (Local)",
      "type": "sarf",
      "vocab_size": 64641,
      "benchmarks": [
        {
          "threads": 1,
          "time_sec": 1.2962954323738813,
          "texts_per_sec": 7714.2908555090635,
          "bytes_per_sec": 3358739.7527327165,
          "mb_per_sec": 3.2031438376738706
        },
        {
          "threads": 2,
          "time_sec": 0.24298642265299955,
          "texts_per_sec": 41154.562838602105,
          "bytes_per_sec": 17918363.307968363,
          "mb_per_sec": 17.08828287884556,
          "speedup": 5.3348471829023785
        },
        {
          "threads": 4,
          "time_sec": 0.23486537175873914,
          "texts_per_sec": 42577.58359658189,
          "bytes_per_sec": 18537935.019524623,
          "mb_per_sec": 17.679152507328627,
          "speedup": 5.51931271377662
        },
        {
          "threads": 8,
          "time_sec": 0.24369121653338274,
          "texts_per_sec": 41035.53727645379,
          "bytes_per_sec": 17866540.542316042,
          "mb_per_sec": 17.038860838237802,
          "speedup": 5.319417953647519
        }
      ],
      "memory": {
        "current_mb": 6.103515625e-05,
        "peak_mb": 0.2074756622314453,
        "texts_processed": 10000
      }
    },
    "tiktoken (o200k)": {
      "name": "tiktoken (o200k)",
      "type": "tiktoken",
      "vocab_size": 200019,
      "benchmarks": [
        {
          "threads": 1,
          "time_sec": 0.46731498713294667,
          "texts_per_sec": 21398.842911826185,
          "bytes_per_sec": 9316882.873181535,
          "mb_per_sec": 8.885271905118499
        },
        {
          "threads": 2,
          "time_sec": 0.52791434712708,
          "texts_per_sec": 18942.466811936807,
          "bytes_per_sec": 8247396.61593611,
          "mb_per_sec": 7.865330329834089,
          "speedup": 0.8852098634486518
        },
        {
          "threads": 4,
          "time_sec": 0.48348489341636497,
          "texts_per_sec": 20683.169497476425,
          "bytes_per_sec": 9005284.465528306,
          "mb_per_sec": 8.58810850670653,
          "speedup": 0.966555508758175
        },
        {
          "threads": 8,
          "time_sec": 0.8265109478185574,
          "texts_per_sec": 12099.053287065815,
          "bytes_per_sec": 5267829.798856831,
          "mb_per_sec": 5.023793982369262,
          "speedup": 0.5654068931165998
        }
      ],
      "memory": {
        "current_mb": 0.0,
        "peak_mb": 0.21294403076171875,
        "texts_processed": 10000
      }
    },
    "tiktoken (cl100k)": {
      "name": "tiktoken (cl100k)",
      "type": "tiktoken",
      "vocab_size": 100277,
      "benchmarks": [
        {
          "threads": 1,
          "time_sec": 0.3374793001761039,
          "texts_per_sec": 29631.447009584845,
          "bytes_per_sec": 12901292.013252463,
          "mb_per_sec": 12.303630841495956
        },
        {
          "threads": 2,
          "time_sec": 0.5219240722556909,
          "texts_per_sec": 19159.875030828225,
          "bytes_per_sec": 8342054.393434861,
          "mb_per_sec": 7.955603021082745,
          "speedup": 0.6466061216865516
        },
        {
          "threads": 4,
          "time_sec": 0.514385993902882,
          "texts_per_sec": 19440.653747442506,
          "bytes_per_sec": 8464303.172341112,
          "mb_per_sec": 8.072188541737663,
          "speedup": 0.6560818221652849
        },
        {
          "threads": 8,
          "time_sec": 0.8330062093834082,
          "texts_per_sec": 12004.71243473924,
          "bytes_per_sec": 5226754.555914744,
          "mb_per_sec": 4.984621578135246,
          "speedup": 0.4051341951291171
        }
      ],
      "memory": {
        "current_mb": 0.0,
        "peak_mb": 0.4065704345703125,
        "texts_processed": 10000
      }
    }
  }
}