| { | |
| "builder_name": "common_voice_11_0", | |
| "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", | |
| "config_name": "sv-SE", | |
| "dataset_name": "common_voice_11_0", | |
| "dataset_size": 13956341, | |
| "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech in 100 languages, but more voices and languages are always added.", | |
| "download_checksums": { | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/n_shards.json": { | |
| "num_bytes": 12179, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/train/sv-SE_train_0.tar": { | |
| "num_bytes": 196986880, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/dev/sv-SE_dev_0.tar": { | |
| "num_bytes": 139397120, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/test/sv-SE_test_0.tar": { | |
| "num_bytes": 152350720, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/other/sv-SE_other_0.tar": { | |
| "num_bytes": 153456640, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/invalidated/sv-SE_invalidated_0.tar": { | |
| "num_bytes": 42608640, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/train.tsv": { | |
| "num_bytes": 1705559, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/dev.tsv": { | |
| "num_bytes": 1136891, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/test.tsv": { | |
| "num_bytes": 1130915, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/other.tsv": { | |
| "num_bytes": 1336562, | |
| "checksum": null | |
| }, | |
| "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/invalidated.tsv": { | |
| "num_bytes": 312302, | |
| "checksum": null | |
| } | |
| }, | |
| "download_size": 690434408, | |
| "features": { | |
| "input_features": { | |
| "feature": { | |
| "feature": { | |
| "dtype": "float32", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "_type": "Sequence" | |
| }, | |
| "labels": { | |
| "feature": { | |
| "dtype": "int64", | |
| "_type": "Value" | |
| }, | |
| "_type": "Sequence" | |
| } | |
| }, | |
| "homepage": "https://commonvoice.mozilla.org/en/datasets", | |
| "license": "https://creativecommons.org/publicdomain/zero/1.0/", | |
| "size_in_bytes": 704390749, | |
| "splits": { | |
| "train": { | |
| "name": "train", | |
| "num_bytes": 4198423, | |
| "num_examples": 7308, | |
| "dataset_name": "common_voice_11_0" | |
| }, | |
| "validation": { | |
| "name": "validation", | |
| "num_bytes": 2839969, | |
| "num_examples": 5052, | |
| "dataset_name": "common_voice_11_0" | |
| }, | |
| "test": { | |
| "name": "test", | |
| "num_bytes": 2849862, | |
| "num_examples": 5069, | |
| "dataset_name": "common_voice_11_0" | |
| }, | |
| "other": { | |
| "name": "other", | |
| "num_bytes": 3280556, | |
| "num_examples": 5699, | |
| "dataset_name": "common_voice_11_0" | |
| }, | |
| "invalidated": { | |
| "name": "invalidated", | |
| "num_bytes": 787531, | |
| "num_examples": 1346, | |
| "dataset_name": "common_voice_11_0" | |
| } | |
| }, | |
| "version": { | |
| "version_str": "11.0.0", | |
| "major": 11, | |
| "minor": 0, | |
| "patch": 0 | |
| } | |
| } |