Add tokenized data: data/tokenized_2100h
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- data/tokenized_2100h/metadata.json +16 -0
- data/tokenized_2100h/pipeline.log +0 -0
- data/tokenized_2100h/shard_00000.parquet +3 -0
- data/tokenized_2100h/shard_00001.parquet +3 -0
- data/tokenized_2100h/shard_00002.parquet +3 -0
- data/tokenized_2100h/shard_00003.parquet +3 -0
- data/tokenized_2100h/shard_00004.parquet +3 -0
- data/tokenized_2100h/shard_00005.parquet +3 -0
- data/tokenized_2100h/shard_00006.parquet +3 -0
- data/tokenized_2100h/shard_00007.parquet +3 -0
- data/tokenized_2100h/shard_00008.parquet +3 -0
- data/tokenized_2100h/shard_00009.parquet +3 -0
- data/tokenized_2100h/shard_00010.parquet +3 -0
- data/tokenized_2100h/shard_00011.parquet +3 -0
- data/tokenized_2100h/shard_00012.parquet +3 -0
- data/tokenized_2100h/shard_00013.parquet +3 -0
- data/tokenized_2100h/shard_00014.parquet +3 -0
- data/tokenized_2100h/shard_00015.parquet +3 -0
- data/tokenized_2100h/shard_00016.parquet +3 -0
- data/tokenized_2100h/shard_00017.parquet +3 -0
- data/tokenized_2100h/shard_00018.parquet +3 -0
- data/tokenized_2100h/shard_00019.parquet +3 -0
- data/tokenized_2100h/shard_00020.parquet +3 -0
- data/tokenized_2100h/shard_00021.parquet +3 -0
- data/tokenized_2100h/shard_00022.parquet +3 -0
- data/tokenized_2100h/shard_00023.parquet +3 -0
- data/tokenized_2100h/shard_00024.parquet +3 -0
- data/tokenized_2100h/shard_00025.parquet +3 -0
- data/tokenized_2100h/shard_00026.parquet +3 -0
- data/tokenized_2100h/shard_00027.parquet +3 -0
- data/tokenized_2100h/shard_00028.parquet +3 -0
- data/tokenized_2100h/shard_00029.parquet +3 -0
- data/tokenized_2100h/shard_00030.parquet +3 -0
- data/tokenized_2100h/shard_00031.parquet +3 -0
- data/tokenized_2100h/shard_00032.parquet +3 -0
- data/tokenized_2100h/shard_00033.parquet +3 -0
- data/tokenized_2100h/shard_00034.parquet +3 -0
- data/tokenized_2100h/shard_00035.parquet +3 -0
- data/tokenized_2100h/shard_00036.parquet +3 -0
- data/tokenized_2100h/shard_00037.parquet +3 -0
- data/tokenized_2100h/shard_00038.parquet +3 -0
- data/tokenized_2100h/shard_00039.parquet +3 -0
- data/tokenized_2100h/shard_00040.parquet +3 -0
- data/tokenized_2100h/shard_00041.parquet +3 -0
- data/tokenized_2100h/shard_00042.parquet +3 -0
- data/tokenized_2100h/shard_00043.parquet +3 -0
- data/tokenized_2100h/shard_00044.parquet +3 -0
- data/tokenized_2100h/shard_00045.parquet +3 -0
- data/tokenized_2100h/shard_00046.parquet +3 -0
- data/tokenized_2100h/shard_00047.parquet +3 -0
data/tokenized_2100h/metadata.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"total_samples": 188109,
|
| 3 |
+
"total_hours": 579.18,
|
| 4 |
+
"num_shards": 489,
|
| 5 |
+
"shard_size": 1000,
|
| 6 |
+
"codec": "mimi",
|
| 7 |
+
"num_codebooks": 8,
|
| 8 |
+
"frame_rate": 12.5,
|
| 9 |
+
"sample_rate": 24000,
|
| 10 |
+
"datasets": [
|
| 11 |
+
"mls",
|
| 12 |
+
"voxpopuli"
|
| 13 |
+
],
|
| 14 |
+
"format": "parquet",
|
| 15 |
+
"compression": "zstd"
|
| 16 |
+
}
|
data/tokenized_2100h/pipeline.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/tokenized_2100h/shard_00000.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4648d72191cbe98356f0f11f5df6c5d84a260d48b2270a6252d8f3aa1cf5b7c7
|
| 3 |
+
size 779253
|
data/tokenized_2100h/shard_00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dd535189d6e36912917ca581ffced976b5a234fdfb7697928b8e41f39f30d88
|
| 3 |
+
size 539244
|
data/tokenized_2100h/shard_00002.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f819cec8a2253308ee8882234b0fba28f448875ca5038df3a7060a600f0fbfe0
|
| 3 |
+
size 488202
|
data/tokenized_2100h/shard_00003.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:023843ffa8ccc5bbac45adbb39e005b272ef99b0cdb015b815f0bb936f263218
|
| 3 |
+
size 464539
|
data/tokenized_2100h/shard_00004.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffca34a393f7538eb751cf9db77569222ff40073552c4bdfb9a36694a69d053b
|
| 3 |
+
size 465680
|
data/tokenized_2100h/shard_00005.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1180f4d190b3f439853192f59dc2e8157d9a9c9662cd5cfc4b22a13e51bed6a0
|
| 3 |
+
size 517010
|
data/tokenized_2100h/shard_00006.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3184055357d396573aa491bcf4af85146da65267f47c818d4c2322adadc8106
|
| 3 |
+
size 613251
|
data/tokenized_2100h/shard_00007.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8502bb71d9149fe816045160e4f063c285e230751b58969b1502056df0be780
|
| 3 |
+
size 726880
|
data/tokenized_2100h/shard_00008.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:879ff71056c3eb4e038ac4b194fd2f90a0efe417abf97f9225c35ae5f8139198
|
| 3 |
+
size 751006
|
data/tokenized_2100h/shard_00009.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b7be099b175dc1590f0e531947d32d0c0d9cd5d14c652241685f1579eab2f79
|
| 3 |
+
size 762950
|
data/tokenized_2100h/shard_00010.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32d2f73ace839c67f2d781d1438d9c01e18ff0665c58ee72585f306980e145b0
|
| 3 |
+
size 732673
|
data/tokenized_2100h/shard_00011.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:372dd68d61f9fddda5a45238820df40fee6b6e02013830123f9d826935a667e8
|
| 3 |
+
size 719833
|
data/tokenized_2100h/shard_00012.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:292ebfeca5ce9009cadf57762884d9e527dea96319286c2ff10d8e7f7f358f84
|
| 3 |
+
size 712575
|
data/tokenized_2100h/shard_00013.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9d67e69c4d61606f568b6cb7e24cad58c5988c7737598b3c430463fd6a0f193
|
| 3 |
+
size 742862
|
data/tokenized_2100h/shard_00014.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:823837ffdf40d1a48c3948d83bb123a3021d3660cade222cdccc97deb0961f03
|
| 3 |
+
size 729079
|
data/tokenized_2100h/shard_00015.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fab31f304c69fb9c83ac21589e13a6bec5dc1051d6644bd6187e6d074467dcf6
|
| 3 |
+
size 777490
|
data/tokenized_2100h/shard_00016.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5031553da35b9c67680084b15882352267186ea3d00ab16bea344e188847069
|
| 3 |
+
size 745667
|
data/tokenized_2100h/shard_00017.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7e7a710e1a44eedeb9341ac36e01710fdb26e04b3a0f53266114b4ca3d6b448
|
| 3 |
+
size 803832
|
data/tokenized_2100h/shard_00018.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:241a18d7cd473f88e897d43ef8b41876ede51017eed0e6a65676e520e8763baf
|
| 3 |
+
size 815545
|
data/tokenized_2100h/shard_00019.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:369081440af59ba58de155cdf004f36a03c1ca4e7b782705df6fcc950070baf3
|
| 3 |
+
size 809762
|
data/tokenized_2100h/shard_00020.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0343c083162f1331f97272c1e60bf46e1acc509a196e5cf053db6600d40ffadf
|
| 3 |
+
size 841079
|
data/tokenized_2100h/shard_00021.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:273f7224c1a8eb6d1fccdbb169145bc3300a49e3d6b45f00211a93455668c71f
|
| 3 |
+
size 799656
|
data/tokenized_2100h/shard_00022.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59a4ec1e31eda4220422e56e438758a186799454a033924c2e3cc7e01932ef8b
|
| 3 |
+
size 841062
|
data/tokenized_2100h/shard_00023.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae9766e7011a5208818371d78e1b4c60633bb342f604d3ee702f023b76d1f546
|
| 3 |
+
size 843471
|
data/tokenized_2100h/shard_00024.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92cf4cf383aa14a4dada5f6d43254a51e1fbaf987ba37ce2b29daded9b7a2404
|
| 3 |
+
size 855138
|
data/tokenized_2100h/shard_00025.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74cd3986d1d7fcace939210d306c0d96a52a7ad4bea363ce2ac136a3d9d888d1
|
| 3 |
+
size 804751
|
data/tokenized_2100h/shard_00026.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd736d7b29542003c6ecb5f141bf0e1baf3b0df5c00932bd00f144763a4e53d6
|
| 3 |
+
size 802440
|
data/tokenized_2100h/shard_00027.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:079478950ae3634eec210baf9eb92444f954e0287f73d96a7d066522025f5e46
|
| 3 |
+
size 811086
|
data/tokenized_2100h/shard_00028.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc8b9eca8623ef2bc9514456461716534078b63bfcb91d927106cd04c08ec378
|
| 3 |
+
size 815204
|
data/tokenized_2100h/shard_00029.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b59f80f907753c5ab5c2fe24d1b2223a73c8e23eced316297c0f04167b77e84
|
| 3 |
+
size 831389
|
data/tokenized_2100h/shard_00030.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cb3375f9504fa61f4ebbfd50b19c3bd4e4940f4a4bc40a64c3e5180497cb7fe
|
| 3 |
+
size 652966
|
data/tokenized_2100h/shard_00031.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e8fdd250d5284234f0613e3def1064a28de92e53b3be6455fd7313706ab29ac
|
| 3 |
+
size 656303
|
data/tokenized_2100h/shard_00032.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4302ab1bf2c178582f2bdb8580ff0695ce2a34b869c35bcd322aa2bd0d6497ed
|
| 3 |
+
size 741607
|
data/tokenized_2100h/shard_00033.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:204f028b7a808a475ad94339af7cee383522c780925b129b17f5a65cafa55850
|
| 3 |
+
size 740055
|
data/tokenized_2100h/shard_00034.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9a44d13dc22a0a9671e2c0e40eee7de083849ebd20e6c29628399455284f580
|
| 3 |
+
size 756259
|
data/tokenized_2100h/shard_00035.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf5542e9e1a717f6f1f94d2f74923c2cf1891059eac89460bd7b24783a013f32
|
| 3 |
+
size 726128
|
data/tokenized_2100h/shard_00036.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96547d955027e1f45df491e96c53888e5d091cc0270d1d869f57a8cdfcca76bd
|
| 3 |
+
size 728778
|
data/tokenized_2100h/shard_00037.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a492abb25ef7bb940ae45a5c62917e3d095780036d55c76ed67480413d43be16
|
| 3 |
+
size 790481
|
data/tokenized_2100h/shard_00038.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52c7debcf7c84802b6dccaca5aca6715ad9121f937a2ea16f2f3c0c1744267a1
|
| 3 |
+
size 797248
|
data/tokenized_2100h/shard_00039.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e1d30527908b823711d2c722da375e30f545908d2a58f1d67b3fcd972864ebe
|
| 3 |
+
size 758165
|
data/tokenized_2100h/shard_00040.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b64d4f96f30d28e8713243f3d14fa6773fb6dfa35428f7cc779b758658ea4bba
|
| 3 |
+
size 792460
|
data/tokenized_2100h/shard_00041.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:820332762d3e550fcc259d2b2a9ada5b336acbd7c917154bd739927835cab3e3
|
| 3 |
+
size 849331
|
data/tokenized_2100h/shard_00042.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:677e2ac54e247d09690d277312bf302c8fe6b4be1329e4652e2663b6ed3500d5
|
| 3 |
+
size 819823
|
data/tokenized_2100h/shard_00043.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ceda40279a6d7135e70e999b23c1ac95bc453a4aec34d1fdbbdfef5483e69657
|
| 3 |
+
size 963096
|
data/tokenized_2100h/shard_00044.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73c4b04b86181c9e7b81abec68ffa4e778b9030531561963a7a1bd21099a050f
|
| 3 |
+
size 850508
|
data/tokenized_2100h/shard_00045.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d902f14bfad62e5813859aa275c630c21b1003b659b7fd567b93c7e18a155213
|
| 3 |
+
size 864305
|
data/tokenized_2100h/shard_00046.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdbcabb96c4d4538c5024f36c6ca0da294675f41417b2460f42e3622a7e23c97
|
| 3 |
+
size 887148
|
data/tokenized_2100h/shard_00047.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3383d3dda288ee2577818fad2324b7467d5784f545584934356b983c703fe5d2
|
| 3 |
+
size 884675
|