Buckets:

rtrm's picture
download
raw
29.4 kB
import{s as zt,n as Lt,o as Et}from"../chunks/scheduler.d75c11ed.js";import{S as St,i as Pt,e as p,s as l,c as d,h as Ot,a as i,d as e,b as n,f as Qt,g as u,j as o,k as Ms,l as Kt,m as a,n as r,t as c,o as m,p as f}from"../chunks/index.4ec9dfe9.js";import{C as se,H as yt,E as te}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5462c66d.js";import{C as h}from"../chunks/CodeBlock.8e9b95bc.js";function ee(jt){let M,ys,fs,js,J,Ts,w,Js,b,Tt="You can share a dataset with your team or with anyone in the community by creating a dataset repository on the Hugging Face Hub:",ws,Z,bs,$,Jt="There are several methods for creating and sharing an audio dataset:",Zs,g,wt='<li><p>Create an audio dataset from local files in python with <a href="/docs/datasets/pr_8128/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a>. This is an easy way that requires only a few steps in python.</p></li> <li><p>Create an audio dataset repository with the <code>AudioFolder</code> builder. This is a no-code solution for quickly creating an audio dataset with several thousand audio files.</p></li>',$s,y,bt='<p>You can control access to your dataset by requiring users to share their contact information first. Check out the <a href="https://huggingface.co/docs/hub/datasets-gated" rel="nofollow">Gated datasets</a> guide for more information about how to enable this feature on the Hub.</p>',gs,U,Us,_,Zt='You can load your own dataset using the paths to your audio files. Use the <a href="/docs/datasets/pr_8128/en/package_reference/main_classes#datasets.Dataset.cast_column">cast_column()</a> function to take a column of audio file paths, and cast it to the <a href="/docs/datasets/pr_8128/en/package_reference/main_classes#datasets.Audio">Audio</a> feature:',_s,k,ks,I,$t='Then upload the dataset to the Hugging Face Hub using <a href="/docs/datasets/pr_8128/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a>:',Is,v,vs,G,gt="This will create a dataset repository containing your audio dataset:",Gs,q,qs,x,xs,C,Ut="The <code>AudioFolder</code> is a dataset builder designed to quickly load an audio dataset with several thousand audio files without requiring you to write any code.",Cs,j,_t='<p>💡 Take a look at the <a href="repository_structure#split-pattern-hierarchy">Split pattern hierarchy</a> to learn more about how <code>AudioFolder</code> creates dataset splits based on your dataset repository structure.</p>',Rs,R,kt="<code>AudioFolder</code> automatically infers the class labels of your dataset based on the directory name. Store your dataset in a directory structure like:",Xs,X,Ws,W,It='If the dataset follows the <code>AudioFolder</code> structure, then you can load it directly with <a href="/docs/datasets/pr_8128/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',Fs,F,Ys,Y,vt='This is equivalent to passing <code>audiofolder</code> manually in <a href="/docs/datasets/pr_8128/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a> and the directory in <code>data_dir</code>:',Vs,V,Bs,B,Gt="You can also use <code>audiofolder</code> to load datasets involving multiple splits. To do so, your dataset directory should have the following structure:",Ns,N,Ds,T,qt="<p>If all audio files are contained in a single directory or if they are not on the same level of directory structure, <code>label</code> column won’t be added automatically. If you need it, set <code>drop_labels=False</code> explicitly.</p>",As,D,xt="If there is additional information you’d like to include about your dataset, like text captions or bounding boxes, add it as a <code>metadata.csv</code> file in your folder. This lets you quickly create datasets for different computer vision tasks like text captioning or object detection. You can also use a JSONL file <code>metadata.jsonl</code> or a Parquet file <code>metadata.parquet</code>.",Hs,A,Qs,H,Ct="You can also zip your audio files, and in this case each zip should contain both the audio files and the metadata",zs,Q,Ls,z,Rt="Your <code>metadata.csv</code> file must have a <code>file_name</code> or <code>*_file_name</code> field which links audio files with their metadata:",Es,L,Ss,E,Xt="or using <code>metadata.jsonl</code>:",Ps,S,Os,P,Wt="Here the <code>file_name</code> must be the name of the audio file next to the metadata file. More generally, it must be the relative path from the directory containing the metadata to the audio file.",Ks,O,Ft="It’s possible to point to more than one audio in each row in your dataset, for example if both your input and output are audio files:",st,K,tt,ss,Yt="You can also define lists of audio files. In that case you need to name the field <code>file_names</code> or <code>*_file_names</code>. Here is an example:",et,ts,at,es,lt,as,Vt=`The <a href="https://github.com/webdataset/webdataset" rel="nofollow">WebDataset</a> format is based on TAR archives and is suitable for big audio datasets.
Indeed you can group your audio files in TAR archives (e.g. 1GB of audio files per TAR archive) and have thousands of TAR archives:`,nt,ls,pt,ns,Bt="In the archives, each example is made of files sharing the same prefix:",it,ps,ot,is,Nt="You can put your audio files labels/captions/bounding boxes using JSON or text files for example.",dt,os,Dt="Load your WebDataset and it will create on column per file suffix (here “mp3” and “json”):",ut,ds,rt,us,At="It’s also possible to have several audio files per example like this:",ct,rs,mt,cs,Ht='For more details on the WebDataset format and the python library, please check the <a href="https://webdataset.github.io/webdataset" rel="nofollow">WebDataset documentation</a>.',ft,ms,ht,hs,Mt;return J=new se({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new yt({props:{title:"Create an audio dataset",local:"create-an-audio-dataset",headingTag:"h1"}}),Z=new h({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjIlM0N1c2VybmFtZSUzRSUyRm15X2RhdGFzZXQlMjIp",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
dataset = load_dataset(<span class="hljs-string">&quot;&lt;username&gt;/my_dataset&quot;</span>)`,wrap:!1}}),U=new yt({props:{title:"Local files",local:"local-files",headingTag:"h2"}}),k=new h({props:{code:"YXVkaW9fZGF0YXNldCUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMmF1ZGlvJTIyJTNBJTIwJTVCJTIycGF0aCUyRnRvJTJGYXVkaW9fMSUyMiUyQyUyMCUyMnBhdGglMkZ0byUyRmF1ZGlvXzIlMjIlMkMlMjAuLi4lMkMlMjAlMjJwYXRoJTJGdG8lMkZhdWRpb19uJTIyJTVEJTdEKS5jYXN0X2NvbHVtbiglMjJhdWRpbyUyMiUyQyUyMEF1ZGlvKCkpJTBBYXVkaW9fZGF0YXNldCU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>audio_dataset = Dataset.from_dict({<span class="hljs-string">&quot;audio&quot;</span>: [<span class="hljs-string">&quot;path/to/audio_1&quot;</span>, <span class="hljs-string">&quot;path/to/audio_2&quot;</span>, ..., <span class="hljs-string">&quot;path/to/audio_n&quot;</span>]}).cast_column(<span class="hljs-string">&quot;audio&quot;</span>, Audio())
<span class="hljs-meta">&gt;&gt;&gt; </span>audio_dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
&lt;datasets.features._torchcodec.AudioDecoder <span class="hljs-built_in">object</span> at <span class="hljs-number">0x11642b6a0</span>&gt;`,wrap:!1}}),v=new h({props:{code:"YXVkaW9fZGF0YXNldC5wdXNoX3RvX2h1YiglMjIlM0N1c2VybmFtZSUzRSUyRm15X2RhdGFzZXQlMjIp",highlighted:'audio_dataset.push_to_hub(<span class="hljs-string">&quot;&lt;username&gt;/my_dataset&quot;</span>)',wrap:!1}}),q=new h({props:{code:"bXlfZGF0YXNldCUyRiUwQSVFMiU5NCU5QyVFMiU5NCU4MCVFMiU5NCU4MCUyMFJFQURNRS5tZCUwQSVFMiU5NCU5NCVFMiU5NCU4MCVFMiU5NCU4MCUyMGRhdGElMkYlMEElMjAlMjAlMjAlMjAlRTIlOTQlOTQlRTIlOTQlODAlRTIlOTQlODAlMjB0cmFpbi0wMDAwMC1vZi0wMDAwMS5wYXJxdWV0",highlighted:`<span class="hljs-title">my_dataset</span>/
├── <span class="hljs-type">README</span>.md
└── <span class="hljs-class"><span class="hljs-keyword">data</span>/</span>
└── train-<span class="hljs-number">00000</span>-<span class="hljs-keyword">of</span>-<span class="hljs-number">00001</span>.parquet`,wrap:!1}}),x=new yt({props:{title:"AudioFolder",local:"audiofolder",headingTag:"h2"}}),X=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLm1wMyUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLm1wMyUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGY2hpaHVhaHVhLm1wMyUwQSUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5tcDMlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJlbmdhbC5tcDMlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJpcm1hbi5tcDM=",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.mp3
folder<span class="hljs-regexp">/train/</span>dog/german_shepherd.mp3
folder<span class="hljs-regexp">/train/</span>dog/chihuahua.mp3
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.mp3
folder<span class="hljs-regexp">/train/</span>cat/bengal.mp3
folder<span class="hljs-regexp">/train/</span>cat/birman.mp3`,wrap:!1}}),F=new h({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ1c2VybmFtZSUyRmRhdGFzZXRfbmFtZSUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;username/dataset_name&quot;</span>)`,wrap:!1}}),V=new h({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJhdWRpb2ZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;audiofolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>)',wrap:!1}}),N=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLm1wMyUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5tcDMlMEFmb2xkZXIlMkZ0ZXN0JTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLm1wMyUwQWZvbGRlciUyRnRlc3QlMkZjYXQlMkZiZW5nYWwubXAz",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.mp3
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.mp3
folder<span class="hljs-regexp">/test/</span>dog/german_shepherd.mp3
folder<span class="hljs-regexp">/test/</span>cat/bengal.mp3`,wrap:!1}}),A=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZtZXRhZGF0YS5jc3YlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDEubXAzJTBBZm9sZGVyJTJGdHJhaW4lMkYwMDAyLm1wMyUwQWZvbGRlciUyRnRyYWluJTJGMDAwMy5tcDM=",highlighted:`folder<span class="hljs-regexp">/train/m</span>etadata.csv
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0001</span>.mp3
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0002</span>.mp3
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0003</span>.mp3`,wrap:!1}}),Q=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4uemlwJTBBZm9sZGVyJTJGdGVzdC56aXAlMEFmb2xkZXIlMkZ2YWxpZGF0aW9uLnppcA==",highlighted:`folder/train.<span class="hljs-keyword">zip</span>
folder/<span class="hljs-keyword">test</span>.<span class="hljs-keyword">zip</span>
folder/validation.<span class="hljs-keyword">zip</span>`,wrap:!1}}),L=new h({props:{code:"ZmlsZV9uYW1lJTJDYWRkaXRpb25hbF9mZWF0dXJlJTBBMDAwMS5tcDMlMkNUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBhdWRpbyUyMGZpbGVzJTBBMDAwMi5tcDMlMkNUaGlzJTIwaXMlMjBhJTIwc2Vjb25kJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwYXVkaW8lMjBmaWxlcyUwQTAwMDMubXAzJTJDVGhpcyUyMGlzJTIwYSUyMHRoaXJkJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwYXVkaW8lMjBmaWxlcw==",highlighted:`file_name,additional_feature
<span class="hljs-number">0001.</span>mp3,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">first</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your audio <span class="hljs-built_in">files</span>
<span class="hljs-number">0002.</span>mp3,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">second</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your audio <span class="hljs-built_in">files</span>
<span class="hljs-number">0003.</span>mp3,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">third</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your audio <span class="hljs-built_in">files</span>`,wrap:!1}}),S=new h({props:{code:"JTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5tcDMlMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBhdWRpbyUyMGZpbGVzJTIyJTdEJTBBJTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5tcDMlMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwc2Vjb25kJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwYXVkaW8lMjBmaWxlcyUyMiU3RCUwQSU3QiUyMmZpbGVfbmFtZSUyMiUzQSUyMCUyMjAwMDMubXAzJTIyJTJDJTIwJTIyYWRkaXRpb25hbF9mZWF0dXJlJTIyJTNBJTIwJTIyVGhpcyUyMGlzJTIwYSUyMHRoaXJkJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwYXVkaW8lMjBmaWxlcyUyMiU3RA==",highlighted:`{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0001.mp3&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a first value of a text feature you added to your audio files&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0002.mp3&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a second value of a text feature you added to your audio files&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0003.mp3&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a third value of a text feature you added to your audio files&quot;</span>}`,wrap:!1}}),K=new h({props:{code:"JTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5tcDMlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMV9vdXRwdXQubXAzJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5tcDMlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMl9vdXRwdXQubXAzJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMy5tcDMlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwM19vdXRwdXQubXAzJTIyJTdE",highlighted:`<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001.mp3&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001_output.mp3&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002.mp3&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002_output.mp3&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003.mp3&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003_output.mp3&quot;</span><span class="hljs-punctuation">}</span>`,wrap:!1}}),ts=new h({props:{code:"JTdCJTIycmVjb3JkaW5nc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwMV9yMC5tcDMlMjIlMkMlMjAlMjIwMDAxX3IxLm1wMyUyMiU1RCUyQyUyMGxhYmVsJTNBJTIwJTIyc2FtZV9wZXJzb24lMjIlN0QlMEElN0IlMjJyZWNvcmRpbmdzX2ZpbGVfbmFtZXMlMjIlM0ElMjAlNUIlMjIwMDAyX3IwLm1wMyUyMiUyQyUyMCUyMjAwMDJfcjEubXAzJTIyJTVEJTJDJTIwbGFiZWwlM0ElMjAlMjJzYW1lX3BlcnNvbiUyMiU3RCUwQSU3QiUyMnJlY29yZGluZ3NfZmlsZV9uYW1lcyUyMiUzQSUyMCU1QiUyMjAwMDNfcjAubXAzJTIyJTJDJTIwJTIyMDAwM19yMS5tcDMlMjIlNUQlMkMlMjBsYWJlbCUzQSUyMCUyMmRpZmZlcmVudF9wZXJzb24lMjIlN0Q=",highlighted:`{<span class="hljs-string">&quot;recordings_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0001_r0.mp3&quot;</span>, <span class="hljs-string">&quot;0001_r1.mp3&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;same_person&quot;</span>}
{<span class="hljs-string">&quot;recordings_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0002_r0.mp3&quot;</span>, <span class="hljs-string">&quot;0002_r1.mp3&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;same_person&quot;</span>}
{<span class="hljs-string">&quot;recordings_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0003_r0.mp3&quot;</span>, <span class="hljs-string">&quot;0003_r1.mp3&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;different_person&quot;</span>}`,wrap:!1}}),es=new yt({props:{title:"WebDataset",local:"webdataset",headingTag:"h2"}}),ls=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkYwMDAwMC50YXIlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDAxLnRhciUwQWZvbGRlciUyRnRyYWluJTJGMDAwMDIudGFyJTBBLi4u",highlighted:`folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00000</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00001</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00002</span>.tar
...`,wrap:!1}}),ps=new h({props:{code:"ZTM5ODcxZmQ5ZmQ3NGY1NS5tcDMlMEFlMzk4NzFmZDlmZDc0ZjU1Lmpzb24lMEFmMThiOTE1ODVjNGQzZjNlLm1wMyUwQWYxOGI5MTU4NWM0ZDNmM2UuanNvbiUwQWVkZTZlNjZiMmZiNTlhYWIubXAzJTBBZWRlNmU2NmIyZmI1OWFhYi5qc29uJTBBZWQ2MDBkNTdmY2VlNGY5NC5tcDMlMEFlZDYwMGQ1N2ZjZWU0Zjk0Lmpzb24lMEEuLi4=",highlighted:`e39871fd9fd74f55<span class="hljs-selector-class">.mp3</span>
e39871fd9fd74f55<span class="hljs-selector-class">.json</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.mp3</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.json</span>
ede6e66b2fb59aab<span class="hljs-selector-class">.mp3</span>
ede6e66b2fb59aab<span class="hljs-selector-class">.json</span>
ed600d57fcee4f94<span class="hljs-selector-class">.mp3</span>
ed600d57fcee4f94<span class="hljs-selector-class">.json</span>
...`,wrap:!1}}),ds=new h({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ3ZWJkYXRhc2V0JTIyJTJDJTIwZGF0YV9kaXIlM0QlMjIlMkZwYXRoJTJGdG8lMkZmb2xkZXIlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQWRhdGFzZXQlNUIwJTVEJTVCJTIyanNvbiUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;webdataset&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;json&quot;</span>]
{<span class="hljs-string">&quot;transcript&quot;</span>: <span class="hljs-string">&quot;Hello there !&quot;</span>, <span class="hljs-string">&quot;speaker&quot;</span>: <span class="hljs-string">&quot;Obi-Wan Kenobi&quot;</span>}`,wrap:!1}}),rs=new h({props:{code:"ZTM5ODcxZmQ5ZmQ3NGY1NS5pbnB1dC5tcDMlMEFlMzk4NzFmZDlmZDc0ZjU1Lm91dHB1dC5tcDMlMEFlMzk4NzFmZDlmZDc0ZjU1Lmpzb24lMEFmMThiOTE1ODVjNGQzZjNlLmlucHV0Lm1wMyUwQWYxOGI5MTU4NWM0ZDNmM2Uub3V0cHV0Lm1wMyUwQWYxOGI5MTU4NWM0ZDNmM2UuanNvbiUwQS4uLg==",highlighted:`e39871fd9fd74f55<span class="hljs-selector-class">.input</span><span class="hljs-selector-class">.mp3</span>
e39871fd9fd74f55<span class="hljs-selector-class">.output</span><span class="hljs-selector-class">.mp3</span>
e39871fd9fd74f55<span class="hljs-selector-class">.json</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.input</span><span class="hljs-selector-class">.mp3</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.output</span><span class="hljs-selector-class">.mp3</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.json</span>
...`,wrap:!1}}),ms=new te({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/audio_dataset.mdx"}}),{c(){M=p("meta"),ys=l(),fs=p("p"),js=l(),d(J.$$.fragment),Ts=l(),d(w.$$.fragment),Js=l(),b=p("p"),b.textContent=Tt,ws=l(),d(Z.$$.fragment),bs=l(),$=p("p"),$.textContent=Jt,Zs=l(),g=p("ul"),g.innerHTML=wt,$s=l(),y=p("blockquote"),y.innerHTML=bt,gs=l(),d(U.$$.fragment),Us=l(),_=p("p"),_.innerHTML=Zt,_s=l(),d(k.$$.fragment),ks=l(),I=p("p"),I.innerHTML=$t,Is=l(),d(v.$$.fragment),vs=l(),G=p("p"),G.textContent=gt,Gs=l(),d(q.$$.fragment),qs=l(),d(x.$$.fragment),xs=l(),C=p("p"),C.innerHTML=Ut,Cs=l(),j=p("blockquote"),j.innerHTML=_t,Rs=l(),R=p("p"),R.innerHTML=kt,Xs=l(),d(X.$$.fragment),Ws=l(),W=p("p"),W.innerHTML=It,Fs=l(),d(F.$$.fragment),Ys=l(),Y=p("p"),Y.innerHTML=vt,Vs=l(),d(V.$$.fragment),Bs=l(),B=p("p"),B.innerHTML=Gt,Ns=l(),d(N.$$.fragment),Ds=l(),T=p("blockquote"),T.innerHTML=qt,As=l(),D=p("p"),D.innerHTML=xt,Hs=l(),d(A.$$.fragment),Qs=l(),H=p("p"),H.textContent=Ct,zs=l(),d(Q.$$.fragment),Ls=l(),z=p("p"),z.innerHTML=Rt,Es=l(),d(L.$$.fragment),Ss=l(),E=p("p"),E.innerHTML=Xt,Ps=l(),d(S.$$.fragment),Os=l(),P=p("p"),P.innerHTML=Wt,Ks=l(),O=p("p"),O.textContent=Ft,st=l(),d(K.$$.fragment),tt=l(),ss=p("p"),ss.innerHTML=Yt,et=l(),d(ts.$$.fragment),at=l(),d(es.$$.fragment),lt=l(),as=p("p"),as.innerHTML=Vt,nt=l(),d(ls.$$.fragment),pt=l(),ns=p("p"),ns.textContent=Bt,it=l(),d(ps.$$.fragment),ot=l(),is=p("p"),is.textContent=Nt,dt=l(),os=p("p"),os.textContent=Dt,ut=l(),d(ds.$$.fragment),rt=l(),us=p("p"),us.textContent=At,ct=l(),d(rs.$$.fragment),mt=l(),cs=p("p"),cs.innerHTML=Ht,ft=l(),d(ms.$$.fragment),ht=l(),hs=p("p"),this.h()},l(s){const t=Ot("svelte-u9bgzb",document.head);M=i(t,"META",{name:!0,content:!0}),t.forEach(e),ys=n(s),fs=i(s,"P",{}),Qt(fs).forEach(e),js=n(s),u(J.$$.fragment,s),Ts=n(s),u(w.$$.fragment,s),Js=n(s),b=i(s,"P",{"data-svelte-h":!0}),o(b)!=="svelte-12uzys7"&&(b.textContent=Tt),ws=n(s),u(Z.$$.fragment,s),bs=n(s),$=i(s,"P",{"data-svelte-h":!0}),o($)!=="svelte-37xnq4"&&($.textContent=Jt),Zs=n(s),g=i(s,"UL",{"data-svelte-h":!0}),o(g)!=="svelte-m6sv6w"&&(g.innerHTML=wt),$s=n(s),y=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(y)!=="svelte-diqosn"&&(y.innerHTML=bt),gs=n(s),u(U.$$.fragment,s),Us=n(s),_=i(s,"P",{"data-svelte-h":!0}),o(_)!=="svelte-p1mbdi"&&(_.innerHTML=Zt),_s=n(s),u(k.$$.fragment,s),ks=n(s),I=i(s,"P",{"data-svelte-h":!0}),o(I)!=="svelte-1p4qot7"&&(I.innerHTML=$t),Is=n(s),u(v.$$.fragment,s),vs=n(s),G=i(s,"P",{"data-svelte-h":!0}),o(G)!=="svelte-132x6mi"&&(G.textContent=gt),Gs=n(s),u(q.$$.fragment,s),qs=n(s),u(x.$$.fragment,s),xs=n(s),C=i(s,"P",{"data-svelte-h":!0}),o(C)!=="svelte-qqcvaq"&&(C.innerHTML=Ut),Cs=n(s),j=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(j)!=="svelte-jddkcu"&&(j.innerHTML=_t),Rs=n(s),R=i(s,"P",{"data-svelte-h":!0}),o(R)!=="svelte-1nwvss9"&&(R.innerHTML=kt),Xs=n(s),u(X.$$.fragment,s),Ws=n(s),W=i(s,"P",{"data-svelte-h":!0}),o(W)!=="svelte-onr5ga"&&(W.innerHTML=It),Fs=n(s),u(F.$$.fragment,s),Ys=n(s),Y=i(s,"P",{"data-svelte-h":!0}),o(Y)!=="svelte-1b3va04"&&(Y.innerHTML=vt),Vs=n(s),u(V.$$.fragment,s),Bs=n(s),B=i(s,"P",{"data-svelte-h":!0}),o(B)!=="svelte-1uavpsj"&&(B.innerHTML=Gt),Ns=n(s),u(N.$$.fragment,s),Ds=n(s),T=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(T)!=="svelte-ku3rpn"&&(T.innerHTML=qt),As=n(s),D=i(s,"P",{"data-svelte-h":!0}),o(D)!=="svelte-69vkpo"&&(D.innerHTML=xt),Hs=n(s),u(A.$$.fragment,s),Qs=n(s),H=i(s,"P",{"data-svelte-h":!0}),o(H)!=="svelte-5ri7au"&&(H.textContent=Ct),zs=n(s),u(Q.$$.fragment,s),Ls=n(s),z=i(s,"P",{"data-svelte-h":!0}),o(z)!=="svelte-eo3moj"&&(z.innerHTML=Rt),Es=n(s),u(L.$$.fragment,s),Ss=n(s),E=i(s,"P",{"data-svelte-h":!0}),o(E)!=="svelte-16ywdcf"&&(E.innerHTML=Xt),Ps=n(s),u(S.$$.fragment,s),Os=n(s),P=i(s,"P",{"data-svelte-h":!0}),o(P)!=="svelte-1y828rv"&&(P.innerHTML=Wt),Ks=n(s),O=i(s,"P",{"data-svelte-h":!0}),o(O)!=="svelte-1xyi0pz"&&(O.textContent=Ft),st=n(s),u(K.$$.fragment,s),tt=n(s),ss=i(s,"P",{"data-svelte-h":!0}),o(ss)!=="svelte-yw4s2r"&&(ss.innerHTML=Yt),et=n(s),u(ts.$$.fragment,s),at=n(s),u(es.$$.fragment,s),lt=n(s),as=i(s,"P",{"data-svelte-h":!0}),o(as)!=="svelte-1tcrvka"&&(as.innerHTML=Vt),nt=n(s),u(ls.$$.fragment,s),pt=n(s),ns=i(s,"P",{"data-svelte-h":!0}),o(ns)!=="svelte-69nbou"&&(ns.textContent=Bt),it=n(s),u(ps.$$.fragment,s),ot=n(s),is=i(s,"P",{"data-svelte-h":!0}),o(is)!=="svelte-1a0am8x"&&(is.textContent=Nt),dt=n(s),os=i(s,"P",{"data-svelte-h":!0}),o(os)!=="svelte-c725xy"&&(os.textContent=Dt),ut=n(s),u(ds.$$.fragment,s),rt=n(s),us=i(s,"P",{"data-svelte-h":!0}),o(us)!=="svelte-8vri1z"&&(us.textContent=At),ct=n(s),u(rs.$$.fragment,s),mt=n(s),cs=i(s,"P",{"data-svelte-h":!0}),o(cs)!=="svelte-12q6x7f"&&(cs.innerHTML=Ht),ft=n(s),u(ms.$$.fragment,s),ht=n(s),hs=i(s,"P",{}),Qt(hs).forEach(e),this.h()},h(){Ms(M,"name","hf:doc:metadata"),Ms(M,"content",ae),Ms(y,"class","tip"),Ms(j,"class","tip"),Ms(T,"class","warning")},m(s,t){Kt(document.head,M),a(s,ys,t),a(s,fs,t),a(s,js,t),r(J,s,t),a(s,Ts,t),r(w,s,t),a(s,Js,t),a(s,b,t),a(s,ws,t),r(Z,s,t),a(s,bs,t),a(s,$,t),a(s,Zs,t),a(s,g,t),a(s,$s,t),a(s,y,t),a(s,gs,t),r(U,s,t),a(s,Us,t),a(s,_,t),a(s,_s,t),r(k,s,t),a(s,ks,t),a(s,I,t),a(s,Is,t),r(v,s,t),a(s,vs,t),a(s,G,t),a(s,Gs,t),r(q,s,t),a(s,qs,t),r(x,s,t),a(s,xs,t),a(s,C,t),a(s,Cs,t),a(s,j,t),a(s,Rs,t),a(s,R,t),a(s,Xs,t),r(X,s,t),a(s,Ws,t),a(s,W,t),a(s,Fs,t),r(F,s,t),a(s,Ys,t),a(s,Y,t),a(s,Vs,t),r(V,s,t),a(s,Bs,t),a(s,B,t),a(s,Ns,t),r(N,s,t),a(s,Ds,t),a(s,T,t),a(s,As,t),a(s,D,t),a(s,Hs,t),r(A,s,t),a(s,Qs,t),a(s,H,t),a(s,zs,t),r(Q,s,t),a(s,Ls,t),a(s,z,t),a(s,Es,t),r(L,s,t),a(s,Ss,t),a(s,E,t),a(s,Ps,t),r(S,s,t),a(s,Os,t),a(s,P,t),a(s,Ks,t),a(s,O,t),a(s,st,t),r(K,s,t),a(s,tt,t),a(s,ss,t),a(s,et,t),r(ts,s,t),a(s,at,t),r(es,s,t),a(s,lt,t),a(s,as,t),a(s,nt,t),r(ls,s,t),a(s,pt,t),a(s,ns,t),a(s,it,t),r(ps,s,t),a(s,ot,t),a(s,is,t),a(s,dt,t),a(s,os,t),a(s,ut,t),r(ds,s,t),a(s,rt,t),a(s,us,t),a(s,ct,t),r(rs,s,t),a(s,mt,t),a(s,cs,t),a(s,ft,t),r(ms,s,t),a(s,ht,t),a(s,hs,t),Mt=!0},p:Lt,i(s){Mt||(c(J.$$.fragment,s),c(w.$$.fragment,s),c(Z.$$.fragment,s),c(U.$$.fragment,s),c(k.$$.fragment,s),c(v.$$.fragment,s),c(q.$$.fragment,s),c(x.$$.fragment,s),c(X.$$.fragment,s),c(F.$$.fragment,s),c(V.$$.fragment,s),c(N.$$.fragment,s),c(A.$$.fragment,s),c(Q.$$.fragment,s),c(L.$$.fragment,s),c(S.$$.fragment,s),c(K.$$.fragment,s),c(ts.$$.fragment,s),c(es.$$.fragment,s),c(ls.$$.fragment,s),c(ps.$$.fragment,s),c(ds.$$.fragment,s),c(rs.$$.fragment,s),c(ms.$$.fragment,s),Mt=!0)},o(s){m(J.$$.fragment,s),m(w.$$.fragment,s),m(Z.$$.fragment,s),m(U.$$.fragment,s),m(k.$$.fragment,s),m(v.$$.fragment,s),m(q.$$.fragment,s),m(x.$$.fragment,s),m(X.$$.fragment,s),m(F.$$.fragment,s),m(V.$$.fragment,s),m(N.$$.fragment,s),m(A.$$.fragment,s),m(Q.$$.fragment,s),m(L.$$.fragment,s),m(S.$$.fragment,s),m(K.$$.fragment,s),m(ts.$$.fragment,s),m(es.$$.fragment,s),m(ls.$$.fragment,s),m(ps.$$.fragment,s),m(ds.$$.fragment,s),m(rs.$$.fragment,s),m(ms.$$.fragment,s),Mt=!1},d(s){s&&(e(ys),e(fs),e(js),e(Ts),e(Js),e(b),e(ws),e(bs),e($),e(Zs),e(g),e($s),e(y),e(gs),e(Us),e(_),e(_s),e(ks),e(I),e(Is),e(vs),e(G),e(Gs),e(qs),e(xs),e(C),e(Cs),e(j),e(Rs),e(R),e(Xs),e(Ws),e(W),e(Fs),e(Ys),e(Y),e(Vs),e(Bs),e(B),e(Ns),e(Ds),e(T),e(As),e(D),e(Hs),e(Qs),e(H),e(zs),e(Ls),e(z),e(Es),e(Ss),e(E),e(Ps),e(Os),e(P),e(Ks),e(O),e(st),e(tt),e(ss),e(et),e(at),e(lt),e(as),e(nt),e(pt),e(ns),e(it),e(ot),e(is),e(dt),e(os),e(ut),e(rt),e(us),e(ct),e(mt),e(cs),e(ft),e(ht),e(hs)),e(M),f(J,s),f(w,s),f(Z,s),f(U,s),f(k,s),f(v,s),f(q,s),f(x,s),f(X,s),f(F,s),f(V,s),f(N,s),f(A,s),f(Q,s),f(L,s),f(S,s),f(K,s),f(ts,s),f(es,s),f(ls,s),f(ps,s),f(ds,s),f(rs,s),f(ms,s)}}}const ae='{"title":"Create an audio dataset","local":"create-an-audio-dataset","sections":[{"title":"Local files","local":"local-files","sections":[],"depth":2},{"title":"AudioFolder","local":"audiofolder","sections":[],"depth":2},{"title":"WebDataset","local":"webdataset","sections":[],"depth":2}],"depth":1}';function le(jt){return Et(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class de extends St{constructor(M){super(),Pt(this,M,le,ee,zt,{})}}export{de as component};

Xet Storage Details

Size:
29.4 kB
·
Xet hash:
4e3c1a45d7037bf0987806bfd5e1da3db7206042a407b6c7167d428647da1f24

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.