Rcarvalo commited on
Commit
a4db66a
·
verified ·
1 Parent(s): a60db63

Add tokenized data: data/tokenized_2100h

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. data/tokenized_2100h/metadata.json +16 -0
  2. data/tokenized_2100h/pipeline.log +0 -0
  3. data/tokenized_2100h/shard_00000.parquet +3 -0
  4. data/tokenized_2100h/shard_00001.parquet +3 -0
  5. data/tokenized_2100h/shard_00002.parquet +3 -0
  6. data/tokenized_2100h/shard_00003.parquet +3 -0
  7. data/tokenized_2100h/shard_00004.parquet +3 -0
  8. data/tokenized_2100h/shard_00005.parquet +3 -0
  9. data/tokenized_2100h/shard_00006.parquet +3 -0
  10. data/tokenized_2100h/shard_00007.parquet +3 -0
  11. data/tokenized_2100h/shard_00008.parquet +3 -0
  12. data/tokenized_2100h/shard_00009.parquet +3 -0
  13. data/tokenized_2100h/shard_00010.parquet +3 -0
  14. data/tokenized_2100h/shard_00011.parquet +3 -0
  15. data/tokenized_2100h/shard_00012.parquet +3 -0
  16. data/tokenized_2100h/shard_00013.parquet +3 -0
  17. data/tokenized_2100h/shard_00014.parquet +3 -0
  18. data/tokenized_2100h/shard_00015.parquet +3 -0
  19. data/tokenized_2100h/shard_00016.parquet +3 -0
  20. data/tokenized_2100h/shard_00017.parquet +3 -0
  21. data/tokenized_2100h/shard_00018.parquet +3 -0
  22. data/tokenized_2100h/shard_00019.parquet +3 -0
  23. data/tokenized_2100h/shard_00020.parquet +3 -0
  24. data/tokenized_2100h/shard_00021.parquet +3 -0
  25. data/tokenized_2100h/shard_00022.parquet +3 -0
  26. data/tokenized_2100h/shard_00023.parquet +3 -0
  27. data/tokenized_2100h/shard_00024.parquet +3 -0
  28. data/tokenized_2100h/shard_00025.parquet +3 -0
  29. data/tokenized_2100h/shard_00026.parquet +3 -0
  30. data/tokenized_2100h/shard_00027.parquet +3 -0
  31. data/tokenized_2100h/shard_00028.parquet +3 -0
  32. data/tokenized_2100h/shard_00029.parquet +3 -0
  33. data/tokenized_2100h/shard_00030.parquet +3 -0
  34. data/tokenized_2100h/shard_00031.parquet +3 -0
  35. data/tokenized_2100h/shard_00032.parquet +3 -0
  36. data/tokenized_2100h/shard_00033.parquet +3 -0
  37. data/tokenized_2100h/shard_00034.parquet +3 -0
  38. data/tokenized_2100h/shard_00035.parquet +3 -0
  39. data/tokenized_2100h/shard_00036.parquet +3 -0
  40. data/tokenized_2100h/shard_00037.parquet +3 -0
  41. data/tokenized_2100h/shard_00038.parquet +3 -0
  42. data/tokenized_2100h/shard_00039.parquet +3 -0
  43. data/tokenized_2100h/shard_00040.parquet +3 -0
  44. data/tokenized_2100h/shard_00041.parquet +3 -0
  45. data/tokenized_2100h/shard_00042.parquet +3 -0
  46. data/tokenized_2100h/shard_00043.parquet +3 -0
  47. data/tokenized_2100h/shard_00044.parquet +3 -0
  48. data/tokenized_2100h/shard_00045.parquet +3 -0
  49. data/tokenized_2100h/shard_00046.parquet +3 -0
  50. data/tokenized_2100h/shard_00047.parquet +3 -0
data/tokenized_2100h/metadata.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_samples": 188109,
3
+ "total_hours": 579.18,
4
+ "num_shards": 489,
5
+ "shard_size": 1000,
6
+ "codec": "mimi",
7
+ "num_codebooks": 8,
8
+ "frame_rate": 12.5,
9
+ "sample_rate": 24000,
10
+ "datasets": [
11
+ "mls",
12
+ "voxpopuli"
13
+ ],
14
+ "format": "parquet",
15
+ "compression": "zstd"
16
+ }
data/tokenized_2100h/pipeline.log ADDED
The diff for this file is too large to render. See raw diff
 
data/tokenized_2100h/shard_00000.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4648d72191cbe98356f0f11f5df6c5d84a260d48b2270a6252d8f3aa1cf5b7c7
3
+ size 779253
data/tokenized_2100h/shard_00001.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd535189d6e36912917ca581ffced976b5a234fdfb7697928b8e41f39f30d88
3
+ size 539244
data/tokenized_2100h/shard_00002.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f819cec8a2253308ee8882234b0fba28f448875ca5038df3a7060a600f0fbfe0
3
+ size 488202
data/tokenized_2100h/shard_00003.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023843ffa8ccc5bbac45adbb39e005b272ef99b0cdb015b815f0bb936f263218
3
+ size 464539
data/tokenized_2100h/shard_00004.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffca34a393f7538eb751cf9db77569222ff40073552c4bdfb9a36694a69d053b
3
+ size 465680
data/tokenized_2100h/shard_00005.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1180f4d190b3f439853192f59dc2e8157d9a9c9662cd5cfc4b22a13e51bed6a0
3
+ size 517010
data/tokenized_2100h/shard_00006.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3184055357d396573aa491bcf4af85146da65267f47c818d4c2322adadc8106
3
+ size 613251
data/tokenized_2100h/shard_00007.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8502bb71d9149fe816045160e4f063c285e230751b58969b1502056df0be780
3
+ size 726880
data/tokenized_2100h/shard_00008.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879ff71056c3eb4e038ac4b194fd2f90a0efe417abf97f9225c35ae5f8139198
3
+ size 751006
data/tokenized_2100h/shard_00009.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7be099b175dc1590f0e531947d32d0c0d9cd5d14c652241685f1579eab2f79
3
+ size 762950
data/tokenized_2100h/shard_00010.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d2f73ace839c67f2d781d1438d9c01e18ff0665c58ee72585f306980e145b0
3
+ size 732673
data/tokenized_2100h/shard_00011.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372dd68d61f9fddda5a45238820df40fee6b6e02013830123f9d826935a667e8
3
+ size 719833
data/tokenized_2100h/shard_00012.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292ebfeca5ce9009cadf57762884d9e527dea96319286c2ff10d8e7f7f358f84
3
+ size 712575
data/tokenized_2100h/shard_00013.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d67e69c4d61606f568b6cb7e24cad58c5988c7737598b3c430463fd6a0f193
3
+ size 742862
data/tokenized_2100h/shard_00014.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823837ffdf40d1a48c3948d83bb123a3021d3660cade222cdccc97deb0961f03
3
+ size 729079
data/tokenized_2100h/shard_00015.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fab31f304c69fb9c83ac21589e13a6bec5dc1051d6644bd6187e6d074467dcf6
3
+ size 777490
data/tokenized_2100h/shard_00016.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5031553da35b9c67680084b15882352267186ea3d00ab16bea344e188847069
3
+ size 745667
data/tokenized_2100h/shard_00017.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e7a710e1a44eedeb9341ac36e01710fdb26e04b3a0f53266114b4ca3d6b448
3
+ size 803832
data/tokenized_2100h/shard_00018.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241a18d7cd473f88e897d43ef8b41876ede51017eed0e6a65676e520e8763baf
3
+ size 815545
data/tokenized_2100h/shard_00019.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369081440af59ba58de155cdf004f36a03c1ca4e7b782705df6fcc950070baf3
3
+ size 809762
data/tokenized_2100h/shard_00020.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0343c083162f1331f97272c1e60bf46e1acc509a196e5cf053db6600d40ffadf
3
+ size 841079
data/tokenized_2100h/shard_00021.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:273f7224c1a8eb6d1fccdbb169145bc3300a49e3d6b45f00211a93455668c71f
3
+ size 799656
data/tokenized_2100h/shard_00022.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a4ec1e31eda4220422e56e438758a186799454a033924c2e3cc7e01932ef8b
3
+ size 841062
data/tokenized_2100h/shard_00023.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9766e7011a5208818371d78e1b4c60633bb342f604d3ee702f023b76d1f546
3
+ size 843471
data/tokenized_2100h/shard_00024.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92cf4cf383aa14a4dada5f6d43254a51e1fbaf987ba37ce2b29daded9b7a2404
3
+ size 855138
data/tokenized_2100h/shard_00025.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74cd3986d1d7fcace939210d306c0d96a52a7ad4bea363ce2ac136a3d9d888d1
3
+ size 804751
data/tokenized_2100h/shard_00026.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd736d7b29542003c6ecb5f141bf0e1baf3b0df5c00932bd00f144763a4e53d6
3
+ size 802440
data/tokenized_2100h/shard_00027.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:079478950ae3634eec210baf9eb92444f954e0287f73d96a7d066522025f5e46
3
+ size 811086
data/tokenized_2100h/shard_00028.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8b9eca8623ef2bc9514456461716534078b63bfcb91d927106cd04c08ec378
3
+ size 815204
data/tokenized_2100h/shard_00029.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b59f80f907753c5ab5c2fe24d1b2223a73c8e23eced316297c0f04167b77e84
3
+ size 831389
data/tokenized_2100h/shard_00030.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb3375f9504fa61f4ebbfd50b19c3bd4e4940f4a4bc40a64c3e5180497cb7fe
3
+ size 652966
data/tokenized_2100h/shard_00031.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e8fdd250d5284234f0613e3def1064a28de92e53b3be6455fd7313706ab29ac
3
+ size 656303
data/tokenized_2100h/shard_00032.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4302ab1bf2c178582f2bdb8580ff0695ce2a34b869c35bcd322aa2bd0d6497ed
3
+ size 741607
data/tokenized_2100h/shard_00033.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:204f028b7a808a475ad94339af7cee383522c780925b129b17f5a65cafa55850
3
+ size 740055
data/tokenized_2100h/shard_00034.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a44d13dc22a0a9671e2c0e40eee7de083849ebd20e6c29628399455284f580
3
+ size 756259
data/tokenized_2100h/shard_00035.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5542e9e1a717f6f1f94d2f74923c2cf1891059eac89460bd7b24783a013f32
3
+ size 726128
data/tokenized_2100h/shard_00036.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96547d955027e1f45df491e96c53888e5d091cc0270d1d869f57a8cdfcca76bd
3
+ size 728778
data/tokenized_2100h/shard_00037.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a492abb25ef7bb940ae45a5c62917e3d095780036d55c76ed67480413d43be16
3
+ size 790481
data/tokenized_2100h/shard_00038.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52c7debcf7c84802b6dccaca5aca6715ad9121f937a2ea16f2f3c0c1744267a1
3
+ size 797248
data/tokenized_2100h/shard_00039.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1d30527908b823711d2c722da375e30f545908d2a58f1d67b3fcd972864ebe
3
+ size 758165
data/tokenized_2100h/shard_00040.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64d4f96f30d28e8713243f3d14fa6773fb6dfa35428f7cc779b758658ea4bba
3
+ size 792460
data/tokenized_2100h/shard_00041.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820332762d3e550fcc259d2b2a9ada5b336acbd7c917154bd739927835cab3e3
3
+ size 849331
data/tokenized_2100h/shard_00042.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677e2ac54e247d09690d277312bf302c8fe6b4be1329e4652e2663b6ed3500d5
3
+ size 819823
data/tokenized_2100h/shard_00043.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceda40279a6d7135e70e999b23c1ac95bc453a4aec34d1fdbbdfef5483e69657
3
+ size 963096
data/tokenized_2100h/shard_00044.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c4b04b86181c9e7b81abec68ffa4e778b9030531561963a7a1bd21099a050f
3
+ size 850508
data/tokenized_2100h/shard_00045.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d902f14bfad62e5813859aa275c630c21b1003b659b7fd567b93c7e18a155213
3
+ size 864305
data/tokenized_2100h/shard_00046.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbcabb96c4d4538c5024f36c6ca0da294675f41417b2460f42e3622a7e23c97
3
+ size 887148
data/tokenized_2100h/shard_00047.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3383d3dda288ee2577818fad2324b7467d5784f545584934356b983c703fe5d2
3
+ size 884675