Buckets:
| import{s as et,o as lt,n as Ia}from"../chunks/scheduler.bdbef820.js";import{S as nt,i as pt,g as r,s as l,r as h,A as rt,h as c,f as t,c as n,j as at,u as i,x as m,k as tt,y as ct,a as e,v as d,d as o,t as u,w as j}from"../chunks/index.c0aea24a.js";import{T as Ua}from"../chunks/Tip.31005f7d.js";import{C as J}from"../chunks/CodeBlock.6ccca92e.js";import{H as b,E as mt}from"../chunks/EditOnGithub.725ee0c1.js";function ht(w){let p,y='A <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset">Dataset</a> object is a wrapper of an Arrow table, which allows fast zero-copy reads from arrays in the dataset to PyTorch tensors.';return{c(){p=r("p"),p.innerHTML=y},l(g){p=c(g,"P",{"data-svelte-h":!0}),m(p)!=="svelte-1bbq9ig"&&(p.innerHTML=y)},m(g,M){e(g,p,M)},p:Ia,d(g){g&&t(p)}}}function it(w){let p,y=`To use the <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Image">Image</a> feature type, you’ll need to install the <code>vision</code> extra as | |
| <code>pip install datasets[vision]</code>.`;return{c(){p=r("p"),p.innerHTML=y},l(g){p=c(g,"P",{"data-svelte-h":!0}),m(p)!=="svelte-1go8nao"&&(p.innerHTML=y)},m(g,M){e(g,p,M)},p:Ia,d(g){g&&t(p)}}}function dt(w){let p,y=`To use the <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Audio">Audio</a> feature type, you’ll need to install the <code>audio</code> extra as | |
| <code>pip install datasets[audio]</code>.`;return{c(){p=r("p"),p.innerHTML=y},l(g){p=c(g,"P",{"data-svelte-h":!0}),m(p)!=="svelte-118qika"&&(p.innerHTML=y)},m(g,M){e(g,p,M)},p:Ia,d(g){g&&t(p)}}}function ot(w){let p,y,g,M,I,fs,$,$a=`This document is a quick introduction to using <code>datasets</code> with PyTorch, with a particular focus on how to get | |
| <code>torch.Tensor</code> objects out of our datasets, and how to use a PyTorch <code>DataLoader</code> and a Hugging Face <code>Dataset</code> | |
| with the best performance.`,Us,R,Is,k,Ra="By default, datasets return regular python objects: integers, floats, strings, lists, etc.",$s,C,ka='To get PyTorch tensors instead, you can set the format of the dataset to <code>pytorch</code> using <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.with_format">Dataset.with_format()</a>:',Rs,G,ks,T,Cs,_,Ca="To load the data as tensors on a GPU, specify the <code>device</code> argument:",Gs,Z,_s,X,Zs,v,Ga="If your dataset consists of N-dimensional arrays, you will see that by default they are considered as the same tensor if the shape is fixed:",Xs,Q,vs,Y,Qs,F,_a=`However this logic often requires slow shape comparisons and data copies. | |
| To avoid this, you must explicitly use the <code>Array</code> feature type and specify the shape of your tensors:`,Ys,z,Fs,x,zs,N,Za='<a href="/docs/datasets/main/en/package_reference/main_classes#datasets.ClassLabel">ClassLabel</a> data are properly converted to tensors:',xs,V,Ns,W,Xa="String and binary objects are unchanged, since PyTorch only supports numbers.",Vs,q,va='The <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Image">Image</a> and <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Audio">Audio</a> feature types are also supported.',Ws,f,qs,E,Es,U,Bs,B,Ds,D,Hs,H,Qa='Like <code>torch.utils.data.Dataset</code> objects, a <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset">Dataset</a> can be passed directly to a PyTorch <code>DataLoader</code>:',Ss,S,Ls,L,As,A,Ya=`There are several ways you can increase the speed your data is loaded which can save you time, especially if you are working with large datasets. | |
| PyTorch offers parallelized data loading, retrieving batches of indices instead of individually, and streaming to iterate over the dataset without downloading it on disk.`,Ps,P,Ks,K,Fa="You can parallelize data loading with the <code>num_workers</code> argument of a PyTorch <code>DataLoader</code> and get a higher throughput.",Os,O,za=`Under the hood, the <code>DataLoader</code> starts <code>num_workers</code> processes. | |
| Each process reloads the dataset passed to the <code>DataLoader</code> and is used to query examples. | |
| Reloading the dataset inside a worker doesn’t fill up your RAM, since it simply memory-maps the dataset again from your disk.`,sa,ss,aa,as,ta,ts,xa=`Stream a dataset by loading it as an <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.IterableDataset">IterableDataset</a>. This allows you to progressively iterate over a remote dataset without downloading it on disk and or over local data files. | |
| Learn more about which type of dataset is best for your use case in the <a href="./about_mapstyle_vs_iterable">choosing between a regular dataset or an iterable dataset</a> guide.`,ea,es,Na="An iterable dataset from <code>datasets</code> inherits from <code>torch.utils.data.IterableDataset</code> so you can pass it to a <code>torch.utils.data.DataLoader</code>:",la,ls,na,ns,Va="If the dataset is split in several shards (i.e. if the dataset consists of multiple data files), then you can stream in parallel using <code>num_workers</code>:",pa,ps,ra,rs,Wa="In this case each worker is given a subset of the list of shards to stream from.",ca,cs,ma,ms,qa='If you need a DataLoader that you can checkpoint and resume in the middle of training, you can use the <code>StatefulDataLoader</code> from <a href="https://github.com/pytorch/data" rel="nofollow">torchdata</a>:',ha,hs,ia,is,Ea='This is possible thanks to <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.IterableDataset.state_dict">IterableDataset.state_dict()</a> and <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.IterableDataset.load_state_dict">IterableDataset.load_state_dict()</a>.',da,ds,oa,os,Ba='To split your dataset across your training nodes, you can use <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.distributed.split_dataset_by_node">datasets.distributed.split_dataset_by_node()</a>:',ua,us,ja,js,Da=`This works for both map-style datasets and iterable datasets. | |
| The dataset is split for the node at rank <code>rank</code> in a pool of nodes of size <code>world_size</code>.`,ga,gs,Ha="For map-style datasets:",Ja,Js,Sa="Each node is assigned a chunk of data, e.g. rank 0 is given the first chunk of the dataset.",ya,ys,La="For iterable datasets:",Ma,Ms,Aa=`If the dataset has a number of shards that is a factor of <code>world_size</code> (i.e. if <code>dataset.n_shards % world_size == 0</code>), | |
| then the shards are evenly assigned across the nodes, which is the most optimized. | |
| Otherwise, each node keeps 1 example out of <code>world_size</code>, skipping the other examples.`,ba,bs,Pa="This can also be combined with a <code>torch.utils.data.DataLoader</code> if you want each node to use multiple workers to load the data.",wa,ws,Ta,Ts,fa;return I=new b({props:{title:"Use with PyTorch",local:"use-with-pytorch",headingTag:"h1"}}),R=new b({props:{title:"Dataset format",local:"dataset-format",headingTag:"h2"}}),G=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJkYXRhJTIyJTNBJTIwZGF0YSU3RCklMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUwQWRzJTVCMCU1RCUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]] | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>])} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])}`,wrap:!1}}),T=new Ua({props:{$$slots:{default:[ht]},$$scope:{ctx:w}}}),Z=new J({props:{code:"aW1wb3J0JTIwdG9yY2glMEFkZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiUyMGlmJTIwdG9yY2guY3VkYS5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJjcHUlMjIpJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiUyQyUyMGRldmljZSUzRGRldmljZSklMEFkcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>device = torch.device(<span class="hljs-string">"cuda"</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">"cpu"</span>) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>, device=device) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], device=<span class="hljs-string">'cuda:0'</span>)}`,wrap:!1}}),X=new b({props:{title:"N-dimensional arrays",local:"n-dimensional-arrays",headingTag:"h3"}}),Q=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTJDJTVCJTVCNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUyMCUyMCUyMyUyMGZpeGVkJTIwc2hhcGUlMEFkcyUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMmRhdGElMjIlM0ElMjBkYXRhJTdEKSUwQWRzJTIwJTNEJTIwZHMud2l0aF9mb3JtYXQoJTIydG9yY2glMjIpJTBBZHMlNUIwJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]],[[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] <span class="hljs-comment"># fixed shape</span> | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])}`,wrap:!1}}),Y=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlNUQlNUQlMkMlNUIlNUI0JTJDJTIwNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUyMCUyMCUyMyUyMHZhcnlpbmclMjBzaGFwZSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElN0QpJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>]],[[<span class="hljs-number">4</span>, <span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] <span class="hljs-comment"># varying shape</span> | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: [tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>]), tensor([<span class="hljs-number">3</span>])]}`,wrap:!1}}),z=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXJyYXkyRCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTJDJTVCJTVCNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIyZGF0YSUyMiUzQSUyMEFycmF5MkQoc2hhcGUlM0QoMiUyQyUyMDIpJTJDJTIwZHR5cGUlM0QnaW50MzInKSU3RCklMEFkcyUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMmRhdGElMjIlM0ElMjBkYXRhJTdEJTJDJTIwZmVhdHVyZXMlM0RmZWF0dXJlcyklMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUwQWRzJTVCMCU1RCUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Array2D | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]],[[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"data"</span>: Array2D(shape=(<span class="hljs-number">2</span>, <span class="hljs-number">2</span>), dtype=<span class="hljs-string">'int32'</span>)}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]], | |
| [[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>], | |
| [<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]])}`,wrap:!1}}),x=new b({props:{title:"Other feature types",local:"other-feature-types",headingTag:"h3"}}),V=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQ2xhc3NMYWJlbCUwQWxhYmVscyUyMCUzRCUyMCU1QjAlMkMlMjAwJTJDJTIwMSU1RCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIybGFiZWwlMjIlM0ElMjBDbGFzc0xhYmVsKG5hbWVzJTNEJTVCJTIybmVnYXRpdmUlMjIlMkMlMjAlMjJwb3NpdGl2ZSUyMiU1RCklN0QpJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJsYWJlbCUyMiUzQSUyMGxhYmVscyU3RCUyQyUyMGZlYXR1cmVzJTNEZmVhdHVyZXMpJTIwJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMjAlMjAlMEFkcyU1QiUzQTMlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, ClassLabel | |
| <span class="hljs-meta">>>> </span>labels = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>] | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"label"</span>: ClassLabel(names=[<span class="hljs-string">"negative"</span>, <span class="hljs-string">"positive"</span>])}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"label"</span>: labels}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">3</span>] | |
| {<span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>])}`,wrap:!1}}),f=new Ua({props:{$$slots:{default:[it]},$$scope:{ctx:w}}}),E=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXVkaW8lMkMlMjBJbWFnZSUwQWltYWdlcyUyMCUzRCUyMCU1QiUyMnBhdGglMkZ0byUyRmltYWdlLnBuZyUyMiU1RCUyMColMjAxMCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIyaW1hZ2UlMjIlM0ElMjBJbWFnZSgpJTdEKSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyaW1hZ2UlMjIlM0ElMjBpbWFnZXMlN0QlMkMlMjBmZWF0dXJlcyUzRGZlYXR1cmVzKSUyMCUwQWRzJTIwJTNEJTIwZHMud2l0aF9mb3JtYXQoJTIydG9yY2glMjIpJTBBZHMlNUIwJTVEJTVCJTIyaW1hZ2UlMjIlNUQuc2hhcGUlMEFkcyU1QjAlNUQlMEFkcyU1QiUzQTIlNUQlNUIlMjJpbWFnZSUyMiU1RC5zaGFwZSUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Audio, Image | |
| <span class="hljs-meta">>>> </span>images = [<span class="hljs-string">"path/to/image.png"</span>] * <span class="hljs-number">10</span> | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"image"</span>: Image()}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"image"</span>: images}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>].shape | |
| torch.Size([<span class="hljs-number">512</span>, <span class="hljs-number">512</span>, <span class="hljs-number">4</span>]) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'image'</span>: tensor([[[<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| ..., | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>]]], dtype=torch.uint8)} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>][<span class="hljs-string">"image"</span>].shape | |
| torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">512</span>, <span class="hljs-number">512</span>, <span class="hljs-number">4</span>]) | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'image'</span>: tensor([[[[<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| ..., | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>]]]], dtype=torch.uint8)}`,wrap:!1}}),U=new Ua({props:{$$slots:{default:[dt]},$$scope:{ctx:w}}}),B=new J({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXVkaW8lMkMlMjBJbWFnZSUwQWF1ZGlvJTIwJTNEJTIwJTVCJTIycGF0aCUyRnRvJTJGYXVkaW8ud2F2JTIyJTVEJTIwKiUyMDEwJTBBZmVhdHVyZXMlMjAlM0QlMjBGZWF0dXJlcyglN0IlMjJhdWRpbyUyMiUzQSUyMEF1ZGlvKCklN0QpJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJhdWRpbyUyMiUzQSUyMGF1ZGlvJTdEJTJDJTIwZmVhdHVyZXMlM0RmZWF0dXJlcyklMjAlMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUyMCUyMCUwQWRzJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlMEFkcyU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMnNhbXBsaW5nX3JhdGUlMjIlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Audio, Image | |
| <span class="hljs-meta">>>> </span>audio = [<span class="hljs-string">"path/to/audio.wav"</span>] * <span class="hljs-number">10</span> | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"audio"</span>: Audio()}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"audio"</span>: audio}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>] | |
| tensor([ <span class="hljs-number">6.1035e-05</span>, <span class="hljs-number">1.5259e-05</span>, <span class="hljs-number">1.6785e-04</span>, ..., -<span class="hljs-number">1.5259e-05</span>, | |
| -<span class="hljs-number">1.5259e-05</span>, <span class="hljs-number">1.5259e-05</span>]) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"sampling_rate"</span>] | |
| tensor(<span class="hljs-number">44100</span>)`,wrap:!1}}),D=new b({props:{title:"Data loading",local:"data-loading",headingTag:"h2"}}),S=new J({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyMCUwQWZyb20lMjB0b3JjaC51dGlscy5kYXRhJTIwaW1wb3J0JTIwRGF0YUxvYWRlciUwQWRhdGElMjAlM0QlMjBucC5yYW5kb20ucmFuZCgxNiklMEFsYWJlbCUyMCUzRCUyMG5wLnJhbmRvbS5yYW5kaW50KDAlMkMlMjAyJTJDJTIwc2l6ZSUzRDE2KSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElMkMlMjAlMjJsYWJlbCUyMiUzQSUyMGxhYmVsJTdEKS53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihkcyUyQyUyMGJhdGNoX3NpemUlM0Q0KSUwQWZvciUyMGJhdGNoJTIwaW4lMjBkYXRhbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoYmF0Y2gpJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIw",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">16</span>) | |
| <span class="hljs-meta">>>> </span>label = np.random.randint(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, size=<span class="hljs-number">16</span>) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data, <span class="hljs-string">"label"</span>: label}).with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(ds, batch_size=<span class="hljs-number">4</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> dataloader: | |
| <span class="hljs-meta">... </span> <span class="hljs-built_in">print</span>(batch) | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.0047</span>, <span class="hljs-number">0.4979</span>, <span class="hljs-number">0.6726</span>, <span class="hljs-number">0.8105</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.4832</span>, <span class="hljs-number">0.2723</span>, <span class="hljs-number">0.4259</span>, <span class="hljs-number">0.2224</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.5837</span>, <span class="hljs-number">0.3444</span>, <span class="hljs-number">0.4658</span>, <span class="hljs-number">0.6417</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.7022</span>, <span class="hljs-number">0.1225</span>, <span class="hljs-number">0.7228</span>, <span class="hljs-number">0.8259</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>])}`,wrap:!1}}),L=new b({props:{title:"Optimize data loading",local:"optimize-data-loading",headingTag:"h3"}}),P=new b({props:{title:"Use multiple Workers",local:"use-multiple-workers",headingTag:"h4"}}),ss=new J({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMGxvYWRfZnJvbV9kaXNrJTBBZnJvbSUyMHRvcmNoLnV0aWxzLmRhdGElMjBpbXBvcnQlMjBEYXRhTG9hZGVyJTBBZGF0YSUyMCUzRCUyMG5wLnJhbmRvbS5yYW5kKDEwXzAwMCklMEFEYXRhc2V0LmZyb21fZGljdCglN0IlMjJkYXRhJTIyJTNBJTIwZGF0YSU3RCkuc2F2ZV90b19kaXNrKCUyMm15X2RhdGFzZXQlMjIpJTBBZHMlMjAlM0QlMjBsb2FkX2Zyb21fZGlzayglMjJteV9kYXRhc2V0JTIyKS53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihkcyUyQyUyMGJhdGNoX3NpemUlM0QzMiUyQyUyMG51bV93b3JrZXJzJTNENCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, load_from_disk | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">10_000</span>) | |
| <span class="hljs-meta">>>> </span>Dataset.from_dict({<span class="hljs-string">"data"</span>: data}).save_to_disk(<span class="hljs-string">"my_dataset"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_from_disk(<span class="hljs-string">"my_dataset"</span>).with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(ds, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>)`,wrap:!1}}),as=new b({props:{title:"Stream data",local:"stream-data",headingTag:"h3"}}),ls=new J({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMGxvYWRfZGF0YXNldCUwQWZyb20lMjB0b3JjaC51dGlscy5kYXRhJTIwaW1wb3J0JTIwRGF0YUxvYWRlciUwQWRhdGElMjAlM0QlMjBucC5yYW5kb20ucmFuZCgxMF8wMDApJTBBRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElN0QpLnB1c2hfdG9faHViKCUyMiUzQ3VzZXJuYW1lJTNFJTJGbXlfZGF0YXNldCUyMiklMjAlMjAlMjMlMjBVcGxvYWQlMjB0byUyMHRoZSUyMEh1Z2dpbmclMjBGYWNlJTIwSHViJTBBbXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjIlM0N1c2VybmFtZSUzRSUyRm15X2RhdGFzZXQlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihteV9pdGVyYWJsZV9kYXRhc2V0JTJDJTIwYmF0Y2hfc2l6ZSUzRDMyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, load_dataset | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">10_000</span>) | |
| <span class="hljs-meta">>>> </span>Dataset.from_dict({<span class="hljs-string">"data"</span>: data}).push_to_hub(<span class="hljs-string">"<username>/my_dataset"</span>) <span class="hljs-comment"># Upload to the Hugging Face Hub</span> | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"<username>/my_dataset"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>)`,wrap:!1}}),ps=new J({props:{code:"bXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJkZWVwbWluZCUyRmNvZGVfY29udGVzdHMlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFteV9pdGVyYWJsZV9kYXRhc2V0Lm5fc2hhcmRzJTBBZGF0YWxvYWRlciUyMCUzRCUyMERhdGFMb2FkZXIobXlfaXRlcmFibGVfZGF0YXNldCUyQyUyMGJhdGNoX3NpemUlM0QzMiUyQyUyMG51bV93b3JrZXJzJTNENCk=",highlighted:`<span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"deepmind/code_contests"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset.n_shards | |
| <span class="hljs-number">39</span> | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>)`,wrap:!1}}),cs=new b({props:{title:"Checkpoint and resume",local:"checkpoint-and-resume",headingTag:"h3"}}),hs=new J({props:{code:"ZnJvbSUyMHRvcmNoZGF0YS5zdGF0ZWZ1bF9kYXRhbG9hZGVyJTIwaW1wb3J0JTIwU3RhdGVmdWxEYXRhTG9hZGVyJTBBbXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJkZWVwbWluZCUyRmNvZGVfY29udGVzdHMlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwU3RhdGVmdWxEYXRhTG9hZGVyKG15X2l0ZXJhYmxlX2RhdGFzZXQlMkMlMjBiYXRjaF9zaXplJTNEMzIlMkMlMjBudW1fd29ya2VycyUzRDQpJTBBJTIzJTIwc2F2ZSUyMGluJTIwdGhlJTIwbWlkZGxlJTIwb2YlMjB0cmFpbmluZyUwQXN0YXRlX2RpY3QlMjAlM0QlMjBkYXRhbG9hZGVyLnN0YXRlX2RpY3QoKSUwQSUyMyUyMGFuZCUyMHJlc3VtZSUyMGxhdGVyJTBBZGF0YWxvYWRlci5sb2FkX3N0YXRlX2RpY3Qoc3RhdGVfZGljdCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torchdata.stateful_dataloader <span class="hljs-keyword">import</span> StatefulDataLoader | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"deepmind/code_contests"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = StatefulDataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># save in the middle of training</span> | |
| <span class="hljs-meta">>>> </span>state_dict = dataloader.state_dict() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and resume later</span> | |
| <span class="hljs-meta">>>> </span>dataloader.load_state_dict(state_dict)`,wrap:!1}}),ds=new b({props:{title:"Distributed",local:"distributed",headingTag:"h3"}}),us=new J({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZGF0YXNldHMuZGlzdHJpYnV0ZWQlMjBpbXBvcnQlMjBzcGxpdF9kYXRhc2V0X2J5X25vZGUlMEElMEFkcyUyMCUzRCUyMHNwbGl0X2RhdGFzZXRfYnlfbm9kZShkcyUyQyUyMHJhbmslM0RpbnQob3MuZW52aXJvbiU1QiUyMlJBTkslMjIlNUQpJTJDJTIwd29ybGRfc2l6ZSUzRGludChvcy5lbnZpcm9uJTVCJTIyV09STERfU0laRSUyMiU1RCkp",highlighted:`<span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">from</span> datasets.distributed <span class="hljs-keyword">import</span> split_dataset_by_node | |
| ds = split_dataset_by_node(ds, rank=<span class="hljs-built_in">int</span>(os.environ[<span class="hljs-string">"RANK"</span>]), world_size=<span class="hljs-built_in">int</span>(os.environ[<span class="hljs-string">"WORLD_SIZE"</span>]))`,wrap:!1}}),ws=new mt({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/use_with_pytorch.mdx"}}),{c(){p=r("meta"),y=l(),g=r("p"),M=l(),h(I.$$.fragment),fs=l(),$=r("p"),$.innerHTML=$a,Us=l(),h(R.$$.fragment),Is=l(),k=r("p"),k.textContent=Ra,$s=l(),C=r("p"),C.innerHTML=ka,Rs=l(),h(G.$$.fragment),ks=l(),h(T.$$.fragment),Cs=l(),_=r("p"),_.innerHTML=Ca,Gs=l(),h(Z.$$.fragment),_s=l(),h(X.$$.fragment),Zs=l(),v=r("p"),v.textContent=Ga,Xs=l(),h(Q.$$.fragment),vs=l(),h(Y.$$.fragment),Qs=l(),F=r("p"),F.innerHTML=_a,Ys=l(),h(z.$$.fragment),Fs=l(),h(x.$$.fragment),zs=l(),N=r("p"),N.innerHTML=Za,xs=l(),h(V.$$.fragment),Ns=l(),W=r("p"),W.textContent=Xa,Vs=l(),q=r("p"),q.innerHTML=va,Ws=l(),h(f.$$.fragment),qs=l(),h(E.$$.fragment),Es=l(),h(U.$$.fragment),Bs=l(),h(B.$$.fragment),Ds=l(),h(D.$$.fragment),Hs=l(),H=r("p"),H.innerHTML=Qa,Ss=l(),h(S.$$.fragment),Ls=l(),h(L.$$.fragment),As=l(),A=r("p"),A.textContent=Ya,Ps=l(),h(P.$$.fragment),Ks=l(),K=r("p"),K.innerHTML=Fa,Os=l(),O=r("p"),O.innerHTML=za,sa=l(),h(ss.$$.fragment),aa=l(),h(as.$$.fragment),ta=l(),ts=r("p"),ts.innerHTML=xa,ea=l(),es=r("p"),es.innerHTML=Na,la=l(),h(ls.$$.fragment),na=l(),ns=r("p"),ns.innerHTML=Va,pa=l(),h(ps.$$.fragment),ra=l(),rs=r("p"),rs.textContent=Wa,ca=l(),h(cs.$$.fragment),ma=l(),ms=r("p"),ms.innerHTML=qa,ha=l(),h(hs.$$.fragment),ia=l(),is=r("p"),is.innerHTML=Ea,da=l(),h(ds.$$.fragment),oa=l(),os=r("p"),os.innerHTML=Ba,ua=l(),h(us.$$.fragment),ja=l(),js=r("p"),js.innerHTML=Da,ga=l(),gs=r("p"),gs.textContent=Ha,Ja=l(),Js=r("p"),Js.textContent=Sa,ya=l(),ys=r("p"),ys.textContent=La,Ma=l(),Ms=r("p"),Ms.innerHTML=Aa,ba=l(),bs=r("p"),bs.innerHTML=Pa,wa=l(),h(ws.$$.fragment),Ta=l(),Ts=r("p"),this.h()},l(s){const a=rt("svelte-u9bgzb",document.head);p=c(a,"META",{name:!0,content:!0}),a.forEach(t),y=n(s),g=c(s,"P",{}),at(g).forEach(t),M=n(s),i(I.$$.fragment,s),fs=n(s),$=c(s,"P",{"data-svelte-h":!0}),m($)!=="svelte-1116k3w"&&($.innerHTML=$a),Us=n(s),i(R.$$.fragment,s),Is=n(s),k=c(s,"P",{"data-svelte-h":!0}),m(k)!=="svelte-j9f3ms"&&(k.textContent=Ra),$s=n(s),C=c(s,"P",{"data-svelte-h":!0}),m(C)!=="svelte-wuauow"&&(C.innerHTML=ka),Rs=n(s),i(G.$$.fragment,s),ks=n(s),i(T.$$.fragment,s),Cs=n(s),_=c(s,"P",{"data-svelte-h":!0}),m(_)!=="svelte-1ezbzoy"&&(_.innerHTML=Ca),Gs=n(s),i(Z.$$.fragment,s),_s=n(s),i(X.$$.fragment,s),Zs=n(s),v=c(s,"P",{"data-svelte-h":!0}),m(v)!=="svelte-smjp9l"&&(v.textContent=Ga),Xs=n(s),i(Q.$$.fragment,s),vs=n(s),i(Y.$$.fragment,s),Qs=n(s),F=c(s,"P",{"data-svelte-h":!0}),m(F)!=="svelte-1gw41y9"&&(F.innerHTML=_a),Ys=n(s),i(z.$$.fragment,s),Fs=n(s),i(x.$$.fragment,s),zs=n(s),N=c(s,"P",{"data-svelte-h":!0}),m(N)!=="svelte-9al131"&&(N.innerHTML=Za),xs=n(s),i(V.$$.fragment,s),Ns=n(s),W=c(s,"P",{"data-svelte-h":!0}),m(W)!=="svelte-1hobffv"&&(W.textContent=Xa),Vs=n(s),q=c(s,"P",{"data-svelte-h":!0}),m(q)!=="svelte-1g2r59q"&&(q.innerHTML=va),Ws=n(s),i(f.$$.fragment,s),qs=n(s),i(E.$$.fragment,s),Es=n(s),i(U.$$.fragment,s),Bs=n(s),i(B.$$.fragment,s),Ds=n(s),i(D.$$.fragment,s),Hs=n(s),H=c(s,"P",{"data-svelte-h":!0}),m(H)!=="svelte-12en5kh"&&(H.innerHTML=Qa),Ss=n(s),i(S.$$.fragment,s),Ls=n(s),i(L.$$.fragment,s),As=n(s),A=c(s,"P",{"data-svelte-h":!0}),m(A)!=="svelte-nw1d6c"&&(A.textContent=Ya),Ps=n(s),i(P.$$.fragment,s),Ks=n(s),K=c(s,"P",{"data-svelte-h":!0}),m(K)!=="svelte-1wyypch"&&(K.innerHTML=Fa),Os=n(s),O=c(s,"P",{"data-svelte-h":!0}),m(O)!=="svelte-1u9vub"&&(O.innerHTML=za),sa=n(s),i(ss.$$.fragment,s),aa=n(s),i(as.$$.fragment,s),ta=n(s),ts=c(s,"P",{"data-svelte-h":!0}),m(ts)!=="svelte-a5l4ib"&&(ts.innerHTML=xa),ea=n(s),es=c(s,"P",{"data-svelte-h":!0}),m(es)!=="svelte-1t3p6zv"&&(es.innerHTML=Na),la=n(s),i(ls.$$.fragment,s),na=n(s),ns=c(s,"P",{"data-svelte-h":!0}),m(ns)!=="svelte-gqi02o"&&(ns.innerHTML=Va),pa=n(s),i(ps.$$.fragment,s),ra=n(s),rs=c(s,"P",{"data-svelte-h":!0}),m(rs)!=="svelte-i7csup"&&(rs.textContent=Wa),ca=n(s),i(cs.$$.fragment,s),ma=n(s),ms=c(s,"P",{"data-svelte-h":!0}),m(ms)!=="svelte-1x5nhsq"&&(ms.innerHTML=qa),ha=n(s),i(hs.$$.fragment,s),ia=n(s),is=c(s,"P",{"data-svelte-h":!0}),m(is)!=="svelte-1f8p0ie"&&(is.innerHTML=Ea),da=n(s),i(ds.$$.fragment,s),oa=n(s),os=c(s,"P",{"data-svelte-h":!0}),m(os)!=="svelte-1xpq9l0"&&(os.innerHTML=Ba),ua=n(s),i(us.$$.fragment,s),ja=n(s),js=c(s,"P",{"data-svelte-h":!0}),m(js)!=="svelte-utgciv"&&(js.innerHTML=Da),ga=n(s),gs=c(s,"P",{"data-svelte-h":!0}),m(gs)!=="svelte-1a3gkys"&&(gs.textContent=Ha),Ja=n(s),Js=c(s,"P",{"data-svelte-h":!0}),m(Js)!=="svelte-41cx6v"&&(Js.textContent=Sa),ya=n(s),ys=c(s,"P",{"data-svelte-h":!0}),m(ys)!=="svelte-1kujsme"&&(ys.textContent=La),Ma=n(s),Ms=c(s,"P",{"data-svelte-h":!0}),m(Ms)!=="svelte-nsc411"&&(Ms.innerHTML=Aa),ba=n(s),bs=c(s,"P",{"data-svelte-h":!0}),m(bs)!=="svelte-19jtkan"&&(bs.innerHTML=Pa),wa=n(s),i(ws.$$.fragment,s),Ta=n(s),Ts=c(s,"P",{}),at(Ts).forEach(t),this.h()},h(){tt(p,"name","hf:doc:metadata"),tt(p,"content",ut)},m(s,a){ct(document.head,p),e(s,y,a),e(s,g,a),e(s,M,a),d(I,s,a),e(s,fs,a),e(s,$,a),e(s,Us,a),d(R,s,a),e(s,Is,a),e(s,k,a),e(s,$s,a),e(s,C,a),e(s,Rs,a),d(G,s,a),e(s,ks,a),d(T,s,a),e(s,Cs,a),e(s,_,a),e(s,Gs,a),d(Z,s,a),e(s,_s,a),d(X,s,a),e(s,Zs,a),e(s,v,a),e(s,Xs,a),d(Q,s,a),e(s,vs,a),d(Y,s,a),e(s,Qs,a),e(s,F,a),e(s,Ys,a),d(z,s,a),e(s,Fs,a),d(x,s,a),e(s,zs,a),e(s,N,a),e(s,xs,a),d(V,s,a),e(s,Ns,a),e(s,W,a),e(s,Vs,a),e(s,q,a),e(s,Ws,a),d(f,s,a),e(s,qs,a),d(E,s,a),e(s,Es,a),d(U,s,a),e(s,Bs,a),d(B,s,a),e(s,Ds,a),d(D,s,a),e(s,Hs,a),e(s,H,a),e(s,Ss,a),d(S,s,a),e(s,Ls,a),d(L,s,a),e(s,As,a),e(s,A,a),e(s,Ps,a),d(P,s,a),e(s,Ks,a),e(s,K,a),e(s,Os,a),e(s,O,a),e(s,sa,a),d(ss,s,a),e(s,aa,a),d(as,s,a),e(s,ta,a),e(s,ts,a),e(s,ea,a),e(s,es,a),e(s,la,a),d(ls,s,a),e(s,na,a),e(s,ns,a),e(s,pa,a),d(ps,s,a),e(s,ra,a),e(s,rs,a),e(s,ca,a),d(cs,s,a),e(s,ma,a),e(s,ms,a),e(s,ha,a),d(hs,s,a),e(s,ia,a),e(s,is,a),e(s,da,a),d(ds,s,a),e(s,oa,a),e(s,os,a),e(s,ua,a),d(us,s,a),e(s,ja,a),e(s,js,a),e(s,ga,a),e(s,gs,a),e(s,Ja,a),e(s,Js,a),e(s,ya,a),e(s,ys,a),e(s,Ma,a),e(s,Ms,a),e(s,ba,a),e(s,bs,a),e(s,wa,a),d(ws,s,a),e(s,Ta,a),e(s,Ts,a),fa=!0},p(s,[a]){const Ka={};a&2&&(Ka.$$scope={dirty:a,ctx:s}),T.$set(Ka);const Oa={};a&2&&(Oa.$$scope={dirty:a,ctx:s}),f.$set(Oa);const st={};a&2&&(st.$$scope={dirty:a,ctx:s}),U.$set(st)},i(s){fa||(o(I.$$.fragment,s),o(R.$$.fragment,s),o(G.$$.fragment,s),o(T.$$.fragment,s),o(Z.$$.fragment,s),o(X.$$.fragment,s),o(Q.$$.fragment,s),o(Y.$$.fragment,s),o(z.$$.fragment,s),o(x.$$.fragment,s),o(V.$$.fragment,s),o(f.$$.fragment,s),o(E.$$.fragment,s),o(U.$$.fragment,s),o(B.$$.fragment,s),o(D.$$.fragment,s),o(S.$$.fragment,s),o(L.$$.fragment,s),o(P.$$.fragment,s),o(ss.$$.fragment,s),o(as.$$.fragment,s),o(ls.$$.fragment,s),o(ps.$$.fragment,s),o(cs.$$.fragment,s),o(hs.$$.fragment,s),o(ds.$$.fragment,s),o(us.$$.fragment,s),o(ws.$$.fragment,s),fa=!0)},o(s){u(I.$$.fragment,s),u(R.$$.fragment,s),u(G.$$.fragment,s),u(T.$$.fragment,s),u(Z.$$.fragment,s),u(X.$$.fragment,s),u(Q.$$.fragment,s),u(Y.$$.fragment,s),u(z.$$.fragment,s),u(x.$$.fragment,s),u(V.$$.fragment,s),u(f.$$.fragment,s),u(E.$$.fragment,s),u(U.$$.fragment,s),u(B.$$.fragment,s),u(D.$$.fragment,s),u(S.$$.fragment,s),u(L.$$.fragment,s),u(P.$$.fragment,s),u(ss.$$.fragment,s),u(as.$$.fragment,s),u(ls.$$.fragment,s),u(ps.$$.fragment,s),u(cs.$$.fragment,s),u(hs.$$.fragment,s),u(ds.$$.fragment,s),u(us.$$.fragment,s),u(ws.$$.fragment,s),fa=!1},d(s){s&&(t(y),t(g),t(M),t(fs),t($),t(Us),t(Is),t(k),t($s),t(C),t(Rs),t(ks),t(Cs),t(_),t(Gs),t(_s),t(Zs),t(v),t(Xs),t(vs),t(Qs),t(F),t(Ys),t(Fs),t(zs),t(N),t(xs),t(Ns),t(W),t(Vs),t(q),t(Ws),t(qs),t(Es),t(Bs),t(Ds),t(Hs),t(H),t(Ss),t(Ls),t(As),t(A),t(Ps),t(Ks),t(K),t(Os),t(O),t(sa),t(aa),t(ta),t(ts),t(ea),t(es),t(la),t(na),t(ns),t(pa),t(ra),t(rs),t(ca),t(ma),t(ms),t(ha),t(ia),t(is),t(da),t(oa),t(os),t(ua),t(ja),t(js),t(ga),t(gs),t(Ja),t(Js),t(ya),t(ys),t(Ma),t(Ms),t(ba),t(bs),t(wa),t(Ta),t(Ts)),t(p),j(I,s),j(R,s),j(G,s),j(T,s),j(Z,s),j(X,s),j(Q,s),j(Y,s),j(z,s),j(x,s),j(V,s),j(f,s),j(E,s),j(U,s),j(B,s),j(D,s),j(S,s),j(L,s),j(P,s),j(ss,s),j(as,s),j(ls,s),j(ps,s),j(cs,s),j(hs,s),j(ds,s),j(us,s),j(ws,s)}}}const ut='{"title":"Use with PyTorch","local":"use-with-pytorch","sections":[{"title":"Dataset format","local":"dataset-format","sections":[{"title":"N-dimensional arrays","local":"n-dimensional-arrays","sections":[],"depth":3},{"title":"Other feature types","local":"other-feature-types","sections":[],"depth":3}],"depth":2},{"title":"Data loading","local":"data-loading","sections":[{"title":"Optimize data loading","local":"optimize-data-loading","sections":[{"title":"Use multiple Workers","local":"use-multiple-workers","sections":[],"depth":4}],"depth":3},{"title":"Stream data","local":"stream-data","sections":[],"depth":3},{"title":"Checkpoint and resume","local":"checkpoint-and-resume","sections":[],"depth":3},{"title":"Distributed","local":"distributed","sections":[],"depth":3}],"depth":2}],"depth":1}';function jt(w){return lt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class wt extends nt{constructor(p){super(),pt(this,p,jt,ot,et,{})}}export{wt as component}; | |
Xet Storage Details
- Size:
- 43.1 kB
- Xet hash:
- 71c9fd0c0a86ae54ade3a4fa8e245f2e5676e9aeb042c0b591d3e5b7ef8b965c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.