File size: 4,199 Bytes
2aad0b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "builder_name": "common_voice_11_0",
  "citation": "@inproceedings{commonvoice:2020,\n  author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n  title = {Common Voice: A Massively-Multilingual Speech Corpus},\n  booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n  pages = {4211--4215},\n  year = 2020\n}\n",
  "config_name": "sv-SE",
  "dataset_name": "common_voice_11_0",
  "dataset_size": 13956341,
  "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 16413 validated hours of speech  in 100 languages, but more voices and languages are always added.",
  "download_checksums": {
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/n_shards.json": {
      "num_bytes": 12179,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/train/sv-SE_train_0.tar": {
      "num_bytes": 196986880,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/dev/sv-SE_dev_0.tar": {
      "num_bytes": 139397120,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/test/sv-SE_test_0.tar": {
      "num_bytes": 152350720,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/other/sv-SE_other_0.tar": {
      "num_bytes": 153456640,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/audio/sv-SE/invalidated/sv-SE_invalidated_0.tar": {
      "num_bytes": 42608640,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/train.tsv": {
      "num_bytes": 1705559,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/dev.tsv": {
      "num_bytes": 1136891,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/test.tsv": {
      "num_bytes": 1130915,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/other.tsv": {
      "num_bytes": 1336562,
      "checksum": null
    },
    "https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/main/transcript/sv-SE/invalidated.tsv": {
      "num_bytes": 312302,
      "checksum": null
    }
  },
  "download_size": 690434408,
  "features": {
    "input_features": {
      "feature": {
        "feature": {
          "dtype": "float32",
          "_type": "Value"
        },
        "_type": "Sequence"
      },
      "_type": "Sequence"
    },
    "labels": {
      "feature": {
        "dtype": "int64",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "https://commonvoice.mozilla.org/en/datasets",
  "license": "https://creativecommons.org/publicdomain/zero/1.0/",
  "size_in_bytes": 704390749,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 4198423,
      "num_examples": 7308,
      "dataset_name": "common_voice_11_0"
    },
    "validation": {
      "name": "validation",
      "num_bytes": 2839969,
      "num_examples": 5052,
      "dataset_name": "common_voice_11_0"
    },
    "test": {
      "name": "test",
      "num_bytes": 2849862,
      "num_examples": 5069,
      "dataset_name": "common_voice_11_0"
    },
    "other": {
      "name": "other",
      "num_bytes": 3280556,
      "num_examples": 5699,
      "dataset_name": "common_voice_11_0"
    },
    "invalidated": {
      "name": "invalidated",
      "num_bytes": 787531,
      "num_examples": 1346,
      "dataset_name": "common_voice_11_0"
    }
  },
  "version": {
    "version_str": "11.0.0",
    "major": 11,
    "minor": 0,
    "patch": 0
  }
}