| license: cc | |
| size_categories: | |
| - 100K<n<1M | |
| task_categories: | |
| - audio-to-audio | |
| - audio-classification | |
| pretty_name: Free Music Archive - Full | |
| dataset_info: | |
| features: | |
| - name: audio | |
| dtype: audio | |
| - name: title | |
| dtype: string | |
| - name: url | |
| dtype: string | |
| - name: artist | |
| dtype: string | |
| - name: composer | |
| dtype: string | |
| - name: lyricist | |
| dtype: string | |
| - name: publisher | |
| dtype: string | |
| - name: genres | |
| sequence: | |
| class_label: | |
| names: | |
| '0': 20th Century Classical | |
| '1': Abstract Hip-Hop | |
| '2': African | |
| '3': Afrobeat | |
| '4': Alternative Hip-Hop | |
| '5': Ambient | |
| '6': Ambient Electronic | |
| '7': Americana | |
| '8': Asia-Far East | |
| '9': Audio Collage | |
| '10': Avant-Garde | |
| '11': Balkan | |
| '12': Banter | |
| '13': Be-Bop | |
| '14': Big Band/Swing | |
| '15': Bigbeat | |
| '16': Black-Metal | |
| '17': Bluegrass | |
| '18': Blues | |
| '19': Bollywood | |
| '20': Brazilian | |
| '21': Breakbeat | |
| '22': Breakcore - Hard | |
| '23': British Folk | |
| '24': Celtic | |
| '25': Chamber Music | |
| '26': Chill-out | |
| '27': Chip Music | |
| '28': Chiptune | |
| '29': Choral Music | |
| '30': Christmas | |
| '31': Classical | |
| '32': Comedy | |
| '33': Compilation | |
| '34': Composed Music | |
| '35': Contemporary Classical | |
| '36': Country | |
| '37': Country & Western | |
| '38': Cumbia | |
| '39': Dance | |
| '40': Death-Metal | |
| '41': Deep Funk | |
| '42': Disco | |
| '43': Downtempo | |
| '44': Drone | |
| '45': Drum & Bass | |
| '46': Dubstep | |
| '47': Easy Listening | |
| '48': 'Easy Listening: Vocal' | |
| '49': Electro-Punk | |
| '50': Electroacoustic | |
| '51': Electronic | |
| '52': Europe | |
| '53': Experimental | |
| '54': Experimental Pop | |
| '55': Fado | |
| '56': Field Recordings | |
| '57': Flamenco | |
| '58': Folk | |
| '59': Freak-Folk | |
| '60': Free-Folk | |
| '61': Free-Jazz | |
| '62': French | |
| '63': Funk | |
| '64': Garage | |
| '65': Glitch | |
| '66': Gospel | |
| '67': Goth | |
| '68': Grindcore | |
| '69': Hardcore | |
| '70': Hip-Hop | |
| '71': Hip-Hop Beats | |
| '72': Holiday | |
| '73': House | |
| '74': IDM | |
| '75': Improv | |
| '76': Indian | |
| '77': Indie-Rock | |
| '78': Industrial | |
| '79': Instrumental | |
| '80': International | |
| '81': Interview | |
| '82': Jazz | |
| '83': 'Jazz: Out' | |
| '84': 'Jazz: Vocal' | |
| '85': Jungle | |
| '86': Kid-Friendly | |
| '87': Klezmer | |
| '88': Krautrock | |
| '89': Latin | |
| '90': Latin America | |
| '91': Lo-Fi | |
| '92': Loud-Rock | |
| '93': Lounge | |
| '94': Metal | |
| '95': Middle East | |
| '96': Minimal Electronic | |
| '97': Minimalism | |
| '98': Modern Jazz | |
| '99': Musical Theater | |
| '100': Musique Concrete | |
| '101': N. Indian Traditional | |
| '102': Nerdcore | |
| '103': New Age | |
| '104': New Wave | |
| '105': No Wave | |
| '106': Noise | |
| '107': Noise-Rock | |
| '108': North African | |
| '109': Novelty | |
| '110': Nu-Jazz | |
| '111': Old-Time / Historic | |
| '112': Opera | |
| '113': Pacific | |
| '114': Poetry | |
| '115': Polka | |
| '116': Pop | |
| '117': Post-Punk | |
| '118': Post-Rock | |
| '119': Power-Pop | |
| '120': Progressive | |
| '121': Psych-Folk | |
| '122': Psych-Rock | |
| '123': Punk | |
| '124': Radio | |
| '125': Radio Art | |
| '126': Radio Theater | |
| '127': Rap | |
| '128': Reggae - Dancehall | |
| '129': Reggae - Dub | |
| '130': Rock | |
| '131': Rock Opera | |
| '132': Rockabilly | |
| '133': Romany (Gypsy) | |
| '134': Salsa | |
| '135': Shoegaze | |
| '136': Singer-Songwriter | |
| '137': Skweee | |
| '138': Sludge | |
| '139': Soul-RnB | |
| '140': Sound Art | |
| '141': Sound Collage | |
| '142': Sound Effects | |
| '143': Sound Poetry | |
| '144': Soundtrack | |
| '145': South Indian Traditional | |
| '146': Space-Rock | |
| '147': Spanish | |
| '148': Spoken | |
| '149': Spoken Weird | |
| '150': Spoken Word | |
| '151': Surf | |
| '152': Symphony | |
| '153': Synth Pop | |
| '154': Talk Radio | |
| '155': Tango | |
| '156': Techno | |
| '157': Thrash | |
| '158': Trip-Hop | |
| '159': Turkish | |
| '160': Unclassifiable | |
| '161': Western Swing | |
| '162': Wonky | |
| '163': hiphop | |
| - name: tags | |
| sequence: string | |
| - name: released | |
| dtype: timestamp[s] | |
| - name: language | |
| dtype: string | |
| - name: listens | |
| dtype: uint64 | |
| - name: artist_url | |
| dtype: string | |
| - name: artist_website | |
| dtype: string | |
| - name: album_title | |
| dtype: string | |
| - name: album_url | |
| dtype: string | |
| - name: license | |
| dtype: | |
| class_label: | |
| names: | |
| '0': CC-BY 1.0 | |
| '1': CC-BY 2.0 | |
| '2': CC-BY 2.5 | |
| '3': CC-BY 3.0 | |
| '4': CC-BY 4.0 | |
| '5': CC-BY-NC 2.0 | |
| '6': CC-BY-NC 2.1 | |
| '7': CC-BY-NC 2.5 | |
| '8': CC-BY-NC 3.0 | |
| '9': CC-BY-NC 4.0 | |
| '10': CC-BY-NC-ND 2.0 | |
| '11': CC-BY-NC-ND 2.1 | |
| '12': CC-BY-NC-ND 2.5 | |
| '13': CC-BY-NC-ND 3.0 | |
| '14': CC-BY-NC-ND 4.0 | |
| '15': CC-BY-NC-SA 2.0 | |
| '16': CC-BY-NC-SA 2.1 | |
| '17': CC-BY-NC-SA 2.5 | |
| '18': CC-BY-NC-SA 3.0 | |
| '19': CC-BY-NC-SA 4.0 | |
| '20': CC-BY-ND 2.0 | |
| '21': CC-BY-ND 2.5 | |
| '22': CC-BY-ND 3.0 | |
| '23': CC-BY-ND 4.0 | |
| '24': CC-BY-SA 2.0 | |
| '25': CC-BY-SA 2.5 | |
| '26': CC-BY-SA 3.0 | |
| '27': CC-BY-SA 4.0 | |
| '28': CC-NC-Sampling+ 1.0 | |
| '29': CC-Sampling+ 1.0 | |
| '30': CC0 1.0 | |
| '31': FMA Sound Recording Common Law | |
| '32': Free Art License | |
| '33': Free Music Philosophy (FMP) | |
| '34': Public Domain Mark 1.0 | |
| - name: copyright | |
| dtype: string | |
| - name: explicit | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| - name: instrumental | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| - name: allow_commercial_use | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| - name: allow_derivatives | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| - name: require_attribution | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| - name: require_share_alike | |
| dtype: | |
| class_label: | |
| names: | |
| '0': 'No' | |
| '1': 'Yes' | |
| splits: | |
| - name: train | |
| num_bytes: 485817096595.736 | |
| num_examples: 106198 | |
| download_size: 523813114501 | |
| dataset_size: 485817096595.736 | |
| configs: | |
| - config_name: default | |
| data_files: | |
| - split: train | |
| path: data/train-* | |
| tags: | |
| - fma | |
| - free-music-archive | |
| # FMA: A Dataset for Music Analysis | |
| [Michaël Defferrard](https://deff.ch/), [Kirell Benzi](https://kirellbenzi.com/), [Pierre Vandergheynst](https://people.epfl.ch/pierre.vandergheynst), [Xavier Bresson](https://www.ntu.edu.sg/home/xbresson). | |
| **International Society for Music Information Retrieval Conference (ISMIR), 2017.** | |
| > We introduce the Free Music Archive (FMA), an open and easily accessible dataset suitable for evaluating several tasks in MIR, a field concerned with browsing, searching, and organizing large music collections. The community's growing interest in feature and end-to-end learning is however restrained by the limited availability of large audio datasets. The FMA aims to overcome this hurdle by providing 917 GiB and 343 days of Creative Commons-licensed audio from 106,574 tracks from 16,341 artists and 14,854 albums, arranged in a hierarchical taxonomy of 161 genres. It provides full-length and high-quality audio, pre-computed features, together with track- and user-level metadata, tags, and free-form text such as biographies. We here describe the dataset and how it was created, propose a train/validation/test split and three subsets, discuss some suitable MIR tasks, and evaluate some baselines for genre recognition. Code, data, and usage examples are available at https://github.com/mdeff/fma. | |
| Paper: [arXiv:1612.01840](https://arxiv.org/abs/1612.01840) - [latex and reviews](https://github.com/mdeff/paper-fma-ismir2017) | |
| Slides: [doi:10.5281/zenodo.1066119](https://doi.org/10.5281/zenodo.1066119) | |
| Poster: [doi:10.5281/zenodo.1035847](https://doi.org/10.5281/zenodo.1035847) | |
| # This Pack | |
| This is the **full** dataset, comprising a total of **106,199** clips of **untrimmed length** over **16** *unbalanced* genres totaling **8,104 hours** of audio. | |
| Packed as Parquet files, this dataset is 593 GB in size, roughly a 34% size saving over the original ZIP file. | |
| ## Repack Notes | |
| - 173 files were unreadable by `libsndfile / libmpg123`, these were removed. | |
| - 202 files had licenses that were unclear on whether or not they permitted redistribution, or the full license text was unavailable. These were removed. | |
| - Many of the remaining files had mixed or inconsistent encoding. To homogenize the dataset, all audio was re-encoded using `libmpg123`. | |
| # License | |
| - The [FMA codebase](https://github.com/mdeff/fma) is released under [The MIT License](https://github.com/mdeff/fma/blob/master/LICENSE.txt). | |
| - The FMA metadata is released under [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0). | |
| - The individual files are released under various Creative Commons family licenses, with a small amount of additional licenses. **Each file has its license attached and important details of the license enumerated.** To make it easy to use for developers and trainers, a configuration is available to limit only to commercially-usable data. | |
| Please refer to any of the following URLs for additional details. | |
| | Class Label | License Name | URL | | |
| | ----------- | ------------ | --- | | |
| | 0 | CC-BY 1.0 | https://creativecommons.org/licenses/by/1.0/ | | |
| | 1 | CC-BY 2.0 | https://creativecommons.org/licenses/by/2.0/ | | |
| | 2 | CC-BY 2.5 | https://creativecommons.org/licenses/by/2.5/ | | |
| | 3 | CC-BY 3.0 | https://creativecommons.org/licenses/by/3.0/ | | |
| | 4 | CC-BY 4.0 | https://creativecommons.org/licenses/by/4.0/ | | |
| | 5 | CC-BY-NC 2.0 | https://creativecommons.org/licenses/by-nc/2.0/ | | |
| | 6 | CC-BY-NC 2.1 | https://creativecommons.org/licenses/by-nc/2.1/ | | |
| | 7 | CC-BY-NC 2.5 | https://creativecommons.org/licenses/by-nc/2.5/ | | |
| | 8 | CC-BY-NC 3.0 | https://creativecommons.org/licenses/by-nc/3.0/ | | |
| | 9 | CC-BY-NC 4.0 | https://creativecommons.org/licenses/by-nc/4.0/ | | |
| | 10 | CC-BY-NC-ND 2.0 | https://creativecommons.org/licenses/by-nc-nd/2.0/ | | |
| | 11 | CC-BY-NC-ND 2.1 | https://creativecommons.org/licenses/by-nc-nd/2.1/ | | |
| | 12 | CC-BY-NC-ND 2.5 | https://creativecommons.org/licenses/by-nc-nd/2.5/ | | |
| | 13 | CC-BY-NC-ND 3.0 | https://creativecommons.org/licenses/by-nc-nd/3.0/ | | |
| | 14 | CC-BY-NC-ND 4.0 | https://creativecommons.org/licenses/by-nc-nd/4.0/ | | |
| | 15 | CC-BY-NC-SA 2.0 | https://creativecommons.org/licenses/by-nc-sa/2.0/ | | |
| | 16 | CC-BY-NC-SA 2.1 | https://creativecommons.org/licenses/by-nc-sa/2.1/ | | |
| | 17 | CC-BY-NC-SA 2.5 | https://creativecommons.org/licenses/by-nc-sa/2.5/ | | |
| | 18 | CC-BY-NC-SA 3.0 | https://creativecommons.org/licenses/by-nc-sa/3.0/ | | |
| | 19 | CC-BY-NC-SA 4.0 | https://creativecommons.org/licenses/by-nc-sa/4.0/ | | |
| | 20 | CC-BY-ND 2.0 | https://creativecommons.org/licenses/by-nd/2.0/ | | |
| | 21 | CC-BY-ND 2.5 | https://creativecommons.org/licenses/by-nd/2.5/ | | |
| | 22 | CC-BY-ND 3.0 | https://creativecommons.org/licenses/by-nd/3.0/ | | |
| | 23 | CC-BY-ND 4.0 | https://creativecommons.org/licenses/by-nd/4.0/ | | |
| | 24 | CC-BY-SA 2.0 | https://creativecommons.org/licenses/by-sa/2.0/ | | |
| | 25 | CC-BY-SA 2.5 | https://creativecommons.org/licenses/by-sa/2.5/ | | |
| | 26 | CC-BY-SA 3.0 | https://creativecommons.org/licenses/by-sa/3.0/ | | |
| | 27 | CC-BY-SA 4.0 | https://creativecommons.org/licenses/by-sa/4.0/ | | |
| | 28 | CC-NC-Sampling+ 1.0 | https://creativecommons.org/licenses/nc-sampling+/1.0/ | | |
| | 29 | CC-Sampling+ 1.0 | https://creativecommons.org/licenses/sampling+/1.0/ | | |
| | 30 | FMA Sound Recording Common Law | https://freemusicarchive.org/Sound_Recording_Common_Law | | |
| | 31 | Free Art License | https://artlibre.org/licence/lal/en | | |
| | 32 | Free Music Philosophy (FMP) | https://irdial.com/free_and_easy.htm | | |
| ## Total Duration by License | |
| | License | Total Duration (Percentage) | | |
| | ------- | --------------------------- | | |
| | CC-BY-NC-SA 3.0 | 2768.3 hours (34.16%) | | |
| | CC-BY-NC-ND 3.0 | 2296.4 hours (28.34%) | | |
| | CC-BY-NC-ND 4.0 | 1018.4 hours (12.57%) | | |
| | CC-BY-NC-SA 4.0 | 533.2 hours (6.58%) | | |
| | CC-BY 4.0 | 377.0 hours (4.65%) | | |
| | CC-BY-NC 3.0 | 288.9 hours (3.56%) | | |
| | CC-BY-NC 4.0 | 232.6 hours (2.87%) | | |
| | CC-BY 3.0 | 106.9 hours (1.32%) | | |
| | CC-BY-SA 4.0 | 99.4 hours (1.23%) | | |
| | CC-BY-SA 3.0 | 79.7 hours (0.98%) | | |
| | CC-BY-NC-SA 2.0 | 65.1 hours (0.80%) | | |
| | CC-BY-NC-ND 2.0 | 56.2 hours (0.69%) | | |
| | CC-BY-ND 3.0 | 36.8 hours (0.45%) | | |
| | CC-BY-ND 4.0 | 25.0 hours (0.31%) | | |
| | CC-BY-NC-ND 2.5 | 24.2 hours (0.30%) | | |
| | FMA Sound Recording Common Law | 19.9 hours (0.25%) | | |
| | CC-BY-NC-SA 2.5 | 18.0 hours (0.22%) | | |
| | CC-BY-NC 2.5 | 13.3 hours (0.16%) | | |
| | CC0 1.0 | 10.5 hours (0.13%) | | |
| | CC-BY 1.0 | 10.4 hours (0.13%) | | |
| | Free Music Philosophy (FMP) | 4.4 hours (0.05%) | | |
| | Free Art License | 2.7 hours (0.03%) | | |
| | CC-BY 2.0 | 2.5 hours (0.03%) | | |
| | CC-BY-NC 2.1 | 2.4 hours (0.03%) | | |
| | CC-BY-NC-SA 2.1 | 2.3 hours (0.03%) | | |
| | CC-BY-SA 2.0 | 1.9 hours (0.02%) | | |
| | CC-BY-NC 2.0 | 1.6 hours (0.02%) | | |
| | CC-BY-ND 2.5 | 1.6 hours (0.02%) | | |
| | CC-NC-Sampling+ 1.0 | 1.4 hours (0.02%) | | |
| | CC-BY-NC-ND 2.1 | 65.0 minutes (0.01%) | | |
| | CC-Sampling+ 1.0 | 53.9 minutes (0.01%) | | |
| | CC-BY-SA 2.5 | 31.8 minutes (0.01%) | | |
| | CC-BY-ND 2.0 | 29.7 minutes (0.01%) | | |
| | CC-BY 2.5 | 11.2 minutes (0.00%) | | |
| # Citations | |
| ``` | |
| @inproceedings{fma_dataset, | |
| title = {{FMA}: A Dataset for Music Analysis}, | |
| author = {Defferrard, Micha\"el and Benzi, Kirell and Vandergheynst, Pierre and Bresson, Xavier}, | |
| booktitle = {18th International Society for Music Information Retrieval Conference (ISMIR)}, | |
| year = {2017}, | |
| archiveprefix = {arXiv}, | |
| eprint = {1612.01840}, | |
| url = {https://arxiv.org/abs/1612.01840}, | |
| } | |
| ``` | |
| ``` | |
| @inproceedings{fma_challenge, | |
| title = {Learning to Recognize Musical Genre from Audio}, | |
| subtitle = {Challenge Overview}, | |
| author = {Defferrard, Micha\"el and Mohanty, Sharada P. and Carroll, Sean F. and Salath\'e, Marcel}, | |
| booktitle = {The 2018 Web Conference Companion}, | |
| year = {2018}, | |
| publisher = {ACM Press}, | |
| isbn = {9781450356404}, | |
| doi = {10.1145/3184558.3192310}, | |
| archiveprefix = {arXiv}, | |
| eprint = {1803.05337}, | |
| url = {https://arxiv.org/abs/1803.05337}, | |
| } | |
| ``` |
Xet Storage Details
- Size:
- 15.2 kB
- Xet hash:
- a0917141e83d9ee367ab41767e979a629957cdeb692679f3cd36b79589b9e608
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.