Buckets:
| import{s as pt,n as rt,o as ct}from"../chunks/scheduler.d75c11ed.js";import{S as ht,i as it,e as p,s as l,c as h,h as mt,a as r,d as t,b as n,f as nt,g as i,j as c,k as bs,l as dt,m as e,n as m,t as d,o,p as u}from"../chunks/index.4ec9dfe9.js";import{C as ot,H as g,E as ut}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ee0f129e.js";import{C as j}from"../chunks/CodeBlock.5919a092.js";function jt(Ca){let J,fs,Ts,Us,w,Is,f,Rs,U,$a=`This document is a quick introduction to using <code>datasets</code> with PyTorch, with a particular focus on how to get | |
| <code>torch.Tensor</code> objects out of our datasets, and how to use a PyTorch <code>DataLoader</code> and a Hugging Face <code>Dataset</code> | |
| with the best performance.`,ks,I,Cs,R,Ga="By default, datasets return regular python objects: integers, floats, strings, lists, etc.",$s,k,_a='To get PyTorch tensors instead, you can set the format of the dataset to <code>pytorch</code> using <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.with_format">Dataset.with_format()</a>:',Gs,C,_s,y,Za='<p>A <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset">Dataset</a> object is a wrapper of an Arrow table, which allows fast zero-copy reads from arrays in the dataset to PyTorch tensors.</p>',Zs,$,va="To load the data as tensors on a GPU, specify the <code>device</code> argument:",vs,G,Qs,_,Xs,Z,Qa="If your dataset consists of N-dimensional arrays, you will see that by default they are considered as the same tensor if the shape is fixed:",Ys,v,Fs,Q,xs,X,Xa=`However this logic often requires slow shape comparisons and data copies. | |
| To avoid this, you must explicitly use the <code>Array</code> feature type and specify the shape of your tensors:`,zs,Y,Ns,F,Vs,x,Ya='<a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.ClassLabel">ClassLabel</a> data are properly converted to tensors:',Ws,z,qs,N,Fa="String and binary objects are unchanged, since PyTorch only supports numbers.",Es,V,xa='The <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Image">Image</a> and <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Audio">Audio</a> feature types are also supported.',Bs,M,za=`<p>To use the <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Image">Image</a> feature type, you’ll need to install the <code>vision</code> extra as | |
| <code>pip install datasets[vision]</code>.</p>`,Ds,W,Hs,b,Na=`<p>To use the <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Audio">Audio</a> feature type, you’ll need to install the <code>audio</code> extra as | |
| <code>pip install datasets[audio]</code>.</p>`,Ls,q,Ss,E,As,B,Va='Like <code>torch.utils.data.Dataset</code> objects, a <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset">Dataset</a> can be passed directly to a PyTorch <code>DataLoader</code>:',Ps,D,Ks,H,Os,L,Wa=`There are several ways you can increase the speed your data is loaded which can save you time, especially if you are working with large datasets. | |
| PyTorch offers parallelized data loading, retrieving batches of indices instead of individually, and streaming to iterate over the dataset without downloading it on disk.`,sa,S,aa,A,qa="You can parallelize data loading with the <code>num_workers</code> argument of a PyTorch <code>DataLoader</code> and get a higher throughput.",ta,P,Ea=`Under the hood, the <code>DataLoader</code> starts <code>num_workers</code> processes. | |
| Each process reloads the dataset passed to the <code>DataLoader</code> and is used to query examples. | |
| Reloading the dataset inside a worker doesn’t fill up your RAM, since it simply memory-maps the dataset again from your disk.`,ea,K,la,O,na,ss,Ba=`Stream a dataset by loading it as an <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.IterableDataset">IterableDataset</a>. This allows you to progressively iterate over a remote dataset without downloading it on disk and or over local data files. | |
| Learn more about which type of dataset is best for your use case in the <a href="./about_mapstyle_vs_iterable">choosing between a regular dataset or an iterable dataset</a> guide.`,pa,as,Da="An iterable dataset from <code>datasets</code> inherits from <code>torch.utils.data.IterableDataset</code> so you can pass it to a <code>torch.utils.data.DataLoader</code>:",ra,ts,ca,es,Ha="If the dataset is split in several shards (i.e. if the dataset consists of multiple data files), then you can stream in parallel using <code>num_workers</code>:",ha,ls,ia,ns,La="In this case each worker is given a subset of the list of shards to stream from.",ma,ps,da,rs,Sa='If you need a DataLoader that you can checkpoint and resume in the middle of training, you can use the <code>StatefulDataLoader</code> from <a href="https://github.com/pytorch/data" rel="nofollow">torchdata</a>:',oa,cs,ua,hs,Aa='This is possible thanks to <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.IterableDataset.state_dict">IterableDataset.state_dict()</a> and <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.IterableDataset.load_state_dict">IterableDataset.load_state_dict()</a>.',ja,is,ga,ms,Pa='To split your dataset across your training nodes, you can use <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.distributed.split_dataset_by_node">datasets.distributed.split_dataset_by_node()</a>:',Ja,ds,ya,os,Ka=`This works for both map-style datasets and iterable datasets. | |
| The dataset is split for the node at rank <code>rank</code> in a pool of nodes of size <code>world_size</code>.`,Ma,us,Oa="For map-style datasets:",ba,js,st="Each node is assigned a chunk of data, e.g. rank 0 is given the first chunk of the dataset.",Ta,gs,at="For iterable datasets:",wa,Js,tt=`If the dataset has a number of shards that is a factor of <code>world_size</code> (i.e. if <code>dataset.num_shards % world_size == 0</code>), | |
| then the shards are evenly assigned across the nodes, which is the most optimized. | |
| Otherwise, each node keeps 1 example out of <code>world_size</code>, skipping the other examples.`,fa,ys,et="This can also be combined with a <code>torch.utils.data.DataLoader</code> if you want each node to use multiple workers to load the data.",Ua,T,lt='<p>If you shuffle your iterable dataset in a distributed setup, make sure to set a fixed <code>seed</code> in <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.IterableDataset.shuffle">IterableDataset.shuffle()</a> so the same shuffled list of shards is used on every node to know which shards the node should skip.</p>',Ia,Ms,Ra,ws,ka;return w=new ot({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new g({props:{title:"Use with PyTorch",local:"use-with-pytorch",headingTag:"h1"}}),I=new g({props:{title:"Dataset format",local:"dataset-format",headingTag:"h2"}}),C=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJkYXRhJTIyJTNBJTIwZGF0YSU3RCklMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUwQWRzJTVCMCU1RCUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]] | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>])} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])}`,wrap:!1}}),G=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFkZXZpY2UlMjAlM0QlMjB0b3JjaC5kZXZpY2UoJTIyY3VkYSUyMiUyMGlmJTIwdG9yY2guY3VkYS5pc19hdmFpbGFibGUoKSUyMGVsc2UlMjAlMjJjcHUlMjIpJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiUyQyUyMGRldmljZSUzRGRldmljZSklMEFkcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>device = torch.device(<span class="hljs-string">"cuda"</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">"cpu"</span>) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>, device=device) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], device=<span class="hljs-string">'cuda:0'</span>)}`,wrap:!1}}),_=new g({props:{title:"N-dimensional arrays",local:"n-dimensional-arrays",headingTag:"h3"}}),v=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTJDJTVCJTVCNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUyMCUyMCUyMyUyMGZpeGVkJTIwc2hhcGUlMEFkcyUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMmRhdGElMjIlM0ElMjBkYXRhJTdEKSUwQWRzJTIwJTNEJTIwZHMud2l0aF9mb3JtYXQoJTIydG9yY2glMjIpJTBBZHMlNUIwJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]],[[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] <span class="hljs-comment"># fixed shape</span> | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])}`,wrap:!1}}),Q=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlNUQlNUQlMkMlNUIlNUI0JTJDJTIwNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUyMCUyMCUyMyUyMHZhcnlpbmclMjBzaGFwZSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElN0QpJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkcyU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>]],[[<span class="hljs-number">4</span>, <span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] <span class="hljs-comment"># varying shape</span> | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: [tensor([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>]), tensor([<span class="hljs-number">3</span>])]}`,wrap:!1}}),Y=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXJyYXkyRCUwQWRhdGElMjAlM0QlMjAlNUIlNUIlNUIxJTJDJTIwMiU1RCUyQyU1QjMlMkMlMjA0JTVEJTVEJTJDJTVCJTVCNSUyQyUyMDYlNUQlMkMlNUI3JTJDJTIwOCU1RCU1RCU1RCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIyZGF0YSUyMiUzQSUyMEFycmF5MkQoc2hhcGUlM0QoMiUyQyUyMDIpJTJDJTIwZHR5cGUlM0QnaW50MzInKSU3RCklMEFkcyUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMmRhdGElMjIlM0ElMjBkYXRhJTdEJTJDJTIwZmVhdHVyZXMlM0RmZWF0dXJlcyklMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUwQWRzJTVCMCU1RCUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Array2D | |
| <span class="hljs-meta">>>> </span>data = [[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>],[<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]],[[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>],[<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]] | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"data"</span>: Array2D(shape=(<span class="hljs-number">2</span>, <span class="hljs-number">2</span>), dtype=<span class="hljs-string">'int32'</span>)}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]])} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'data'</span>: tensor([[[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], | |
| [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]], | |
| [[<span class="hljs-number">5</span>, <span class="hljs-number">6</span>], | |
| [<span class="hljs-number">7</span>, <span class="hljs-number">8</span>]]])}`,wrap:!1}}),F=new g({props:{title:"Other feature types",local:"other-feature-types",headingTag:"h3"}}),z=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQ2xhc3NMYWJlbCUwQWxhYmVscyUyMCUzRCUyMCU1QjAlMkMlMjAwJTJDJTIwMSU1RCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIybGFiZWwlMjIlM0ElMjBDbGFzc0xhYmVsKG5hbWVzJTNEJTVCJTIybmVnYXRpdmUlMjIlMkMlMjAlMjJwb3NpdGl2ZSUyMiU1RCklN0QpJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJsYWJlbCUyMiUzQSUyMGxhYmVscyU3RCUyQyUyMGZlYXR1cmVzJTNEZmVhdHVyZXMpJTIwJTBBZHMlMjAlM0QlMjBkcy53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMjAlMjAlMEFkcyU1QiUzQTMlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, ClassLabel | |
| <span class="hljs-meta">>>> </span>labels = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>] | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"label"</span>: ClassLabel(names=[<span class="hljs-string">"negative"</span>, <span class="hljs-string">"positive"</span>])}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"label"</span>: labels}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">3</span>] | |
| {<span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>])}`,wrap:!1}}),W=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXVkaW8lMkMlMjBJbWFnZSUwQWltYWdlcyUyMCUzRCUyMCU1QiUyMnBhdGglMkZ0byUyRmltYWdlLnBuZyUyMiU1RCUyMColMjAxMCUwQWZlYXR1cmVzJTIwJTNEJTIwRmVhdHVyZXMoJTdCJTIyaW1hZ2UlMjIlM0ElMjBJbWFnZSgpJTdEKSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyaW1hZ2UlMjIlM0ElMjBpbWFnZXMlN0QlMkMlMjBmZWF0dXJlcyUzRGZlYXR1cmVzKSUyMCUwQWRzJTIwJTNEJTIwZHMud2l0aF9mb3JtYXQoJTIydG9yY2glMjIpJTBBZHMlNUIwJTVEJTVCJTIyaW1hZ2UlMjIlNUQuc2hhcGUlMEFkcyU1QjAlNUQlMEFkcyU1QiUzQTIlNUQlNUIlMjJpbWFnZSUyMiU1RC5zaGFwZSUwQWRzJTVCJTNBMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Audio, Image | |
| <span class="hljs-meta">>>> </span>images = [<span class="hljs-string">"path/to/image.png"</span>] * <span class="hljs-number">10</span> | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"image"</span>: Image()}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"image"</span>: images}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"image"</span>].shape | |
| torch.Size([<span class="hljs-number">512</span>, <span class="hljs-number">512</span>, <span class="hljs-number">4</span>]) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'image'</span>: tensor([[[<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| ..., | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>]]], dtype=torch.uint8)} | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>][<span class="hljs-string">"image"</span>].shape | |
| torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">512</span>, <span class="hljs-number">512</span>, <span class="hljs-number">4</span>]) | |
| <span class="hljs-meta">>>> </span>ds[:<span class="hljs-number">2</span>] | |
| {<span class="hljs-string">'image'</span>: tensor([[[[<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">215</span>, <span class="hljs-number">106</span>, <span class="hljs-number">255</span>], | |
| ..., | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>], | |
| [<span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>, <span class="hljs-number">255</span>]]]], dtype=torch.uint8)}`,wrap:!1}}),q=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMEZlYXR1cmVzJTJDJTIwQXVkaW8lMkMlMjBJbWFnZSUwQWF1ZGlvJTIwJTNEJTIwJTVCJTIycGF0aCUyRnRvJTJGYXVkaW8ud2F2JTIyJTVEJTIwKiUyMDEwJTBBZmVhdHVyZXMlMjAlM0QlMjBGZWF0dXJlcyglN0IlMjJhdWRpbyUyMiUzQSUyMEF1ZGlvKCklN0QpJTBBZHMlMjAlM0QlMjBEYXRhc2V0LmZyb21fZGljdCglN0IlMjJhdWRpbyUyMiUzQSUyMGF1ZGlvJTdEJTJDJTIwZmVhdHVyZXMlM0RmZWF0dXJlcyklMjAlMEFkcyUyMCUzRCUyMGRzLndpdGhfZm9ybWF0KCUyMnRvcmNoJTIyKSUyMCUyMCUwQWRzJTVCMCU1RCU1QiUyMmF1ZGlvJTIyJTVEJTVCJTIyYXJyYXklMjIlNUQlMEFkcyU1QjAlNUQlNUIlMjJhdWRpbyUyMiU1RCU1QiUyMnNhbXBsaW5nX3JhdGUlMjIlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Features, Audio, Image | |
| <span class="hljs-meta">>>> </span>audio = [<span class="hljs-string">"path/to/audio.wav"</span>] * <span class="hljs-number">10</span> | |
| <span class="hljs-meta">>>> </span>features = Features({<span class="hljs-string">"audio"</span>: Audio()}) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"audio"</span>: audio}, features=features) | |
| <span class="hljs-meta">>>> </span>ds = ds.with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"array"</span>] | |
| tensor([ <span class="hljs-number">6.1035e-05</span>, <span class="hljs-number">1.5259e-05</span>, <span class="hljs-number">1.6785e-04</span>, ..., -<span class="hljs-number">1.5259e-05</span>, | |
| -<span class="hljs-number">1.5259e-05</span>, <span class="hljs-number">1.5259e-05</span>]) | |
| <span class="hljs-meta">>>> </span>ds[<span class="hljs-number">0</span>][<span class="hljs-string">"audio"</span>][<span class="hljs-string">"sampling_rate"</span>] | |
| tensor(<span class="hljs-number">44100</span>)`,wrap:!1}}),E=new g({props:{title:"Data loading",local:"data-loading",headingTag:"h2"}}),D=new j({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyMCUwQWZyb20lMjB0b3JjaC51dGlscy5kYXRhJTIwaW1wb3J0JTIwRGF0YUxvYWRlciUwQWRhdGElMjAlM0QlMjBucC5yYW5kb20ucmFuZCgxNiklMEFsYWJlbCUyMCUzRCUyMG5wLnJhbmRvbS5yYW5kaW50KDAlMkMlMjAyJTJDJTIwc2l6ZSUzRDE2KSUwQWRzJTIwJTNEJTIwRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElMkMlMjAlMjJsYWJlbCUyMiUzQSUyMGxhYmVsJTdEKS53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihkcyUyQyUyMGJhdGNoX3NpemUlM0Q0KSUwQWZvciUyMGJhdGNoJTIwaW4lMjBkYXRhbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoYmF0Y2gpJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIw",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">16</span>) | |
| <span class="hljs-meta">>>> </span>label = np.random.randint(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, size=<span class="hljs-number">16</span>) | |
| <span class="hljs-meta">>>> </span>ds = Dataset.from_dict({<span class="hljs-string">"data"</span>: data, <span class="hljs-string">"label"</span>: label}).with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(ds, batch_size=<span class="hljs-number">4</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> dataloader: | |
| <span class="hljs-meta">... </span> <span class="hljs-built_in">print</span>(batch) | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.0047</span>, <span class="hljs-number">0.4979</span>, <span class="hljs-number">0.6726</span>, <span class="hljs-number">0.8105</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.4832</span>, <span class="hljs-number">0.2723</span>, <span class="hljs-number">0.4259</span>, <span class="hljs-number">0.2224</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.5837</span>, <span class="hljs-number">0.3444</span>, <span class="hljs-number">0.4658</span>, <span class="hljs-number">0.6417</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>])} | |
| {<span class="hljs-string">'data'</span>: tensor([<span class="hljs-number">0.7022</span>, <span class="hljs-number">0.1225</span>, <span class="hljs-number">0.7228</span>, <span class="hljs-number">0.8259</span>]), <span class="hljs-string">'label'</span>: tensor([<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>])}`,wrap:!1}}),H=new g({props:{title:"Optimize data loading",local:"optimize-data-loading",headingTag:"h3"}}),S=new g({props:{title:"Use multiple Workers",local:"use-multiple-workers",headingTag:"h4"}}),K=new j({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMGxvYWRfZnJvbV9kaXNrJTBBZnJvbSUyMHRvcmNoLnV0aWxzLmRhdGElMjBpbXBvcnQlMjBEYXRhTG9hZGVyJTBBZGF0YSUyMCUzRCUyMG5wLnJhbmRvbS5yYW5kKDEwXzAwMCklMEFEYXRhc2V0LmZyb21fZGljdCglN0IlMjJkYXRhJTIyJTNBJTIwZGF0YSU3RCkuc2F2ZV90b19kaXNrKCUyMm15X2RhdGFzZXQlMjIpJTBBZHMlMjAlM0QlMjBsb2FkX2Zyb21fZGlzayglMjJteV9kYXRhc2V0JTIyKS53aXRoX2Zvcm1hdCglMjJ0b3JjaCUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihkcyUyQyUyMGJhdGNoX3NpemUlM0QzMiUyQyUyMG51bV93b3JrZXJzJTNENCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, load_from_disk | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">10_000</span>) | |
| <span class="hljs-meta">>>> </span>Dataset.from_dict({<span class="hljs-string">"data"</span>: data}).save_to_disk(<span class="hljs-string">"my_dataset"</span>) | |
| <span class="hljs-meta">>>> </span>ds = load_from_disk(<span class="hljs-string">"my_dataset"</span>).with_format(<span class="hljs-string">"torch"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(ds, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>)`,wrap:!1}}),O=new g({props:{title:"Stream data",local:"stream-data",headingTag:"h3"}}),ts=new j({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMGxvYWRfZGF0YXNldCUwQWZyb20lMjB0b3JjaC51dGlscy5kYXRhJTIwaW1wb3J0JTIwRGF0YUxvYWRlciUwQWRhdGElMjAlM0QlMjBucC5yYW5kb20ucmFuZCgxMF8wMDApJTBBRGF0YXNldC5mcm9tX2RpY3QoJTdCJTIyZGF0YSUyMiUzQSUyMGRhdGElN0QpLnB1c2hfdG9faHViKCUyMiUzQ3VzZXJuYW1lJTNFJTJGbXlfZGF0YXNldCUyMiklMjAlMjAlMjMlMjBVcGxvYWQlMjB0byUyMHRoZSUyMEh1Z2dpbmclMjBGYWNlJTIwSHViJTBBbXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjIlM0N1c2VybmFtZSUzRSUyRm15X2RhdGFzZXQlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihteV9pdGVyYWJsZV9kYXRhc2V0JTJDJTIwYmF0Y2hfc2l6ZSUzRDMyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, load_dataset | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-meta">>>> </span>data = np.random.rand(<span class="hljs-number">10_000</span>) | |
| <span class="hljs-meta">>>> </span>Dataset.from_dict({<span class="hljs-string">"data"</span>: data}).push_to_hub(<span class="hljs-string">"<username>/my_dataset"</span>) <span class="hljs-comment"># Upload to the Hugging Face Hub</span> | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"<username>/my_dataset"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>)`,wrap:!1}}),ls=new j({props:{code:"bXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJkZWVwbWluZCUyRmNvZGVfY29udGVzdHMlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFteV9pdGVyYWJsZV9kYXRhc2V0Lm51bV9zaGFyZHMlMEFkYXRhbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihteV9pdGVyYWJsZV9kYXRhc2V0JTJDJTIwYmF0Y2hfc2l6ZSUzRDMyJTJDJTIwbnVtX3dvcmtlcnMlM0Q0KQ==",highlighted:`<span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"deepmind/code_contests"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset.num_shards | |
| <span class="hljs-number">39</span> | |
| <span class="hljs-meta">>>> </span>dataloader = DataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>)`,wrap:!1}}),ps=new g({props:{title:"Checkpoint and resume",local:"checkpoint-and-resume",headingTag:"h3"}}),cs=new j({props:{code:"ZnJvbSUyMHRvcmNoZGF0YS5zdGF0ZWZ1bF9kYXRhbG9hZGVyJTIwaW1wb3J0JTIwU3RhdGVmdWxEYXRhTG9hZGVyJTBBbXlfaXRlcmFibGVfZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJkZWVwbWluZCUyRmNvZGVfY29udGVzdHMlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhbG9hZGVyJTIwJTNEJTIwU3RhdGVmdWxEYXRhTG9hZGVyKG15X2l0ZXJhYmxlX2RhdGFzZXQlMkMlMjBiYXRjaF9zaXplJTNEMzIlMkMlMjBudW1fd29ya2VycyUzRDQpJTBBJTIzJTIwc2F2ZSUyMGluJTIwdGhlJTIwbWlkZGxlJTIwb2YlMjB0cmFpbmluZyUwQXN0YXRlX2RpY3QlMjAlM0QlMjBkYXRhbG9hZGVyLnN0YXRlX2RpY3QoKSUwQSUyMyUyMGFuZCUyMHJlc3VtZSUyMGxhdGVyJTBBZGF0YWxvYWRlci5sb2FkX3N0YXRlX2RpY3Qoc3RhdGVfZGljdCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torchdata.stateful_dataloader <span class="hljs-keyword">import</span> StatefulDataLoader | |
| <span class="hljs-meta">>>> </span>my_iterable_dataset = load_dataset(<span class="hljs-string">"deepmind/code_contests"</span>, streaming=<span class="hljs-literal">True</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataloader = StatefulDataLoader(my_iterable_dataset, batch_size=<span class="hljs-number">32</span>, num_workers=<span class="hljs-number">4</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># save in the middle of training</span> | |
| <span class="hljs-meta">>>> </span>state_dict = dataloader.state_dict() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># and resume later</span> | |
| <span class="hljs-meta">>>> </span>dataloader.load_state_dict(state_dict)`,wrap:!1}}),is=new g({props:{title:"Distributed",local:"distributed",headingTag:"h3"}}),ds=new j({props:{code:"aW1wb3J0JTIwb3MlMEFmcm9tJTIwZGF0YXNldHMuZGlzdHJpYnV0ZWQlMjBpbXBvcnQlMjBzcGxpdF9kYXRhc2V0X2J5X25vZGUlMEElMEFkcyUyMCUzRCUyMHNwbGl0X2RhdGFzZXRfYnlfbm9kZShkcyUyQyUyMHJhbmslM0RpbnQob3MuZW52aXJvbiU1QiUyMlJBTkslMjIlNUQpJTJDJTIwd29ybGRfc2l6ZSUzRGludChvcy5lbnZpcm9uJTVCJTIyV09STERfU0laRSUyMiU1RCkp",highlighted:`<span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">from</span> datasets.distributed <span class="hljs-keyword">import</span> split_dataset_by_node | |
| ds = split_dataset_by_node(ds, rank=<span class="hljs-built_in">int</span>(os.environ[<span class="hljs-string">"RANK"</span>]), world_size=<span class="hljs-built_in">int</span>(os.environ[<span class="hljs-string">"WORLD_SIZE"</span>]))`,wrap:!1}}),Ms=new ut({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/use_with_pytorch.mdx"}}),{c(){J=p("meta"),fs=l(),Ts=p("p"),Us=l(),h(w.$$.fragment),Is=l(),h(f.$$.fragment),Rs=l(),U=p("p"),U.innerHTML=$a,ks=l(),h(I.$$.fragment),Cs=l(),R=p("p"),R.textContent=Ga,$s=l(),k=p("p"),k.innerHTML=_a,Gs=l(),h(C.$$.fragment),_s=l(),y=p("blockquote"),y.innerHTML=Za,Zs=l(),$=p("p"),$.innerHTML=va,vs=l(),h(G.$$.fragment),Qs=l(),h(_.$$.fragment),Xs=l(),Z=p("p"),Z.textContent=Qa,Ys=l(),h(v.$$.fragment),Fs=l(),h(Q.$$.fragment),xs=l(),X=p("p"),X.innerHTML=Xa,zs=l(),h(Y.$$.fragment),Ns=l(),h(F.$$.fragment),Vs=l(),x=p("p"),x.innerHTML=Ya,Ws=l(),h(z.$$.fragment),qs=l(),N=p("p"),N.textContent=Fa,Es=l(),V=p("p"),V.innerHTML=xa,Bs=l(),M=p("blockquote"),M.innerHTML=za,Ds=l(),h(W.$$.fragment),Hs=l(),b=p("blockquote"),b.innerHTML=Na,Ls=l(),h(q.$$.fragment),Ss=l(),h(E.$$.fragment),As=l(),B=p("p"),B.innerHTML=Va,Ps=l(),h(D.$$.fragment),Ks=l(),h(H.$$.fragment),Os=l(),L=p("p"),L.textContent=Wa,sa=l(),h(S.$$.fragment),aa=l(),A=p("p"),A.innerHTML=qa,ta=l(),P=p("p"),P.innerHTML=Ea,ea=l(),h(K.$$.fragment),la=l(),h(O.$$.fragment),na=l(),ss=p("p"),ss.innerHTML=Ba,pa=l(),as=p("p"),as.innerHTML=Da,ra=l(),h(ts.$$.fragment),ca=l(),es=p("p"),es.innerHTML=Ha,ha=l(),h(ls.$$.fragment),ia=l(),ns=p("p"),ns.textContent=La,ma=l(),h(ps.$$.fragment),da=l(),rs=p("p"),rs.innerHTML=Sa,oa=l(),h(cs.$$.fragment),ua=l(),hs=p("p"),hs.innerHTML=Aa,ja=l(),h(is.$$.fragment),ga=l(),ms=p("p"),ms.innerHTML=Pa,Ja=l(),h(ds.$$.fragment),ya=l(),os=p("p"),os.innerHTML=Ka,Ma=l(),us=p("p"),us.textContent=Oa,ba=l(),js=p("p"),js.textContent=st,Ta=l(),gs=p("p"),gs.textContent=at,wa=l(),Js=p("p"),Js.innerHTML=tt,fa=l(),ys=p("p"),ys.innerHTML=et,Ua=l(),T=p("blockquote"),T.innerHTML=lt,Ia=l(),h(Ms.$$.fragment),Ra=l(),ws=p("p"),this.h()},l(s){const a=mt("svelte-u9bgzb",document.head);J=r(a,"META",{name:!0,content:!0}),a.forEach(t),fs=n(s),Ts=r(s,"P",{}),nt(Ts).forEach(t),Us=n(s),i(w.$$.fragment,s),Is=n(s),i(f.$$.fragment,s),Rs=n(s),U=r(s,"P",{"data-svelte-h":!0}),c(U)!=="svelte-1116k3w"&&(U.innerHTML=$a),ks=n(s),i(I.$$.fragment,s),Cs=n(s),R=r(s,"P",{"data-svelte-h":!0}),c(R)!=="svelte-j9f3ms"&&(R.textContent=Ga),$s=n(s),k=r(s,"P",{"data-svelte-h":!0}),c(k)!=="svelte-16qfs1n"&&(k.innerHTML=_a),Gs=n(s),i(C.$$.fragment,s),_s=n(s),y=r(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(y)!=="svelte-9wh9ue"&&(y.innerHTML=Za),Zs=n(s),$=r(s,"P",{"data-svelte-h":!0}),c($)!=="svelte-1ezbzoy"&&($.innerHTML=va),vs=n(s),i(G.$$.fragment,s),Qs=n(s),i(_.$$.fragment,s),Xs=n(s),Z=r(s,"P",{"data-svelte-h":!0}),c(Z)!=="svelte-smjp9l"&&(Z.textContent=Qa),Ys=n(s),i(v.$$.fragment,s),Fs=n(s),i(Q.$$.fragment,s),xs=n(s),X=r(s,"P",{"data-svelte-h":!0}),c(X)!=="svelte-1gw41y9"&&(X.innerHTML=Xa),zs=n(s),i(Y.$$.fragment,s),Ns=n(s),i(F.$$.fragment,s),Vs=n(s),x=r(s,"P",{"data-svelte-h":!0}),c(x)!=="svelte-4rrzns"&&(x.innerHTML=Ya),Ws=n(s),i(z.$$.fragment,s),qs=n(s),N=r(s,"P",{"data-svelte-h":!0}),c(N)!=="svelte-1hobffv"&&(N.textContent=Fa),Es=n(s),V=r(s,"P",{"data-svelte-h":!0}),c(V)!=="svelte-1lwbecc"&&(V.innerHTML=xa),Bs=n(s),M=r(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(M)!=="svelte-hl86i8"&&(M.innerHTML=za),Ds=n(s),i(W.$$.fragment,s),Hs=n(s),b=r(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(b)!=="svelte-18st58y"&&(b.innerHTML=Na),Ls=n(s),i(q.$$.fragment,s),Ss=n(s),i(E.$$.fragment,s),As=n(s),B=r(s,"P",{"data-svelte-h":!0}),c(B)!=="svelte-djsuo4"&&(B.innerHTML=Va),Ps=n(s),i(D.$$.fragment,s),Ks=n(s),i(H.$$.fragment,s),Os=n(s),L=r(s,"P",{"data-svelte-h":!0}),c(L)!=="svelte-nw1d6c"&&(L.textContent=Wa),sa=n(s),i(S.$$.fragment,s),aa=n(s),A=r(s,"P",{"data-svelte-h":!0}),c(A)!=="svelte-1wyypch"&&(A.innerHTML=qa),ta=n(s),P=r(s,"P",{"data-svelte-h":!0}),c(P)!=="svelte-1u9vub"&&(P.innerHTML=Ea),ea=n(s),i(K.$$.fragment,s),la=n(s),i(O.$$.fragment,s),na=n(s),ss=r(s,"P",{"data-svelte-h":!0}),c(ss)!=="svelte-yf92us"&&(ss.innerHTML=Ba),pa=n(s),as=r(s,"P",{"data-svelte-h":!0}),c(as)!=="svelte-1t3p6zv"&&(as.innerHTML=Da),ra=n(s),i(ts.$$.fragment,s),ca=n(s),es=r(s,"P",{"data-svelte-h":!0}),c(es)!=="svelte-gqi02o"&&(es.innerHTML=Ha),ha=n(s),i(ls.$$.fragment,s),ia=n(s),ns=r(s,"P",{"data-svelte-h":!0}),c(ns)!=="svelte-i7csup"&&(ns.textContent=La),ma=n(s),i(ps.$$.fragment,s),da=n(s),rs=r(s,"P",{"data-svelte-h":!0}),c(rs)!=="svelte-1x5nhsq"&&(rs.innerHTML=Sa),oa=n(s),i(cs.$$.fragment,s),ua=n(s),hs=r(s,"P",{"data-svelte-h":!0}),c(hs)!=="svelte-1v6oi1e"&&(hs.innerHTML=Aa),ja=n(s),i(is.$$.fragment,s),ga=n(s),ms=r(s,"P",{"data-svelte-h":!0}),c(ms)!=="svelte-1slqirp"&&(ms.innerHTML=Pa),Ja=n(s),i(ds.$$.fragment,s),ya=n(s),os=r(s,"P",{"data-svelte-h":!0}),c(os)!=="svelte-utgciv"&&(os.innerHTML=Ka),Ma=n(s),us=r(s,"P",{"data-svelte-h":!0}),c(us)!=="svelte-1a3gkys"&&(us.textContent=Oa),ba=n(s),js=r(s,"P",{"data-svelte-h":!0}),c(js)!=="svelte-41cx6v"&&(js.textContent=st),Ta=n(s),gs=r(s,"P",{"data-svelte-h":!0}),c(gs)!=="svelte-1kujsme"&&(gs.textContent=at),wa=n(s),Js=r(s,"P",{"data-svelte-h":!0}),c(Js)!=="svelte-tqahqj"&&(Js.innerHTML=tt),fa=n(s),ys=r(s,"P",{"data-svelte-h":!0}),c(ys)!=="svelte-19jtkan"&&(ys.innerHTML=et),Ua=n(s),T=r(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),c(T)!=="svelte-j6g1c5"&&(T.innerHTML=lt),Ia=n(s),i(Ms.$$.fragment,s),Ra=n(s),ws=r(s,"P",{}),nt(ws).forEach(t),this.h()},h(){bs(J,"name","hf:doc:metadata"),bs(J,"content",gt),bs(y,"class","tip"),bs(M,"class","tip"),bs(b,"class","tip"),bs(T,"class","warning")},m(s,a){dt(document.head,J),e(s,fs,a),e(s,Ts,a),e(s,Us,a),m(w,s,a),e(s,Is,a),m(f,s,a),e(s,Rs,a),e(s,U,a),e(s,ks,a),m(I,s,a),e(s,Cs,a),e(s,R,a),e(s,$s,a),e(s,k,a),e(s,Gs,a),m(C,s,a),e(s,_s,a),e(s,y,a),e(s,Zs,a),e(s,$,a),e(s,vs,a),m(G,s,a),e(s,Qs,a),m(_,s,a),e(s,Xs,a),e(s,Z,a),e(s,Ys,a),m(v,s,a),e(s,Fs,a),m(Q,s,a),e(s,xs,a),e(s,X,a),e(s,zs,a),m(Y,s,a),e(s,Ns,a),m(F,s,a),e(s,Vs,a),e(s,x,a),e(s,Ws,a),m(z,s,a),e(s,qs,a),e(s,N,a),e(s,Es,a),e(s,V,a),e(s,Bs,a),e(s,M,a),e(s,Ds,a),m(W,s,a),e(s,Hs,a),e(s,b,a),e(s,Ls,a),m(q,s,a),e(s,Ss,a),m(E,s,a),e(s,As,a),e(s,B,a),e(s,Ps,a),m(D,s,a),e(s,Ks,a),m(H,s,a),e(s,Os,a),e(s,L,a),e(s,sa,a),m(S,s,a),e(s,aa,a),e(s,A,a),e(s,ta,a),e(s,P,a),e(s,ea,a),m(K,s,a),e(s,la,a),m(O,s,a),e(s,na,a),e(s,ss,a),e(s,pa,a),e(s,as,a),e(s,ra,a),m(ts,s,a),e(s,ca,a),e(s,es,a),e(s,ha,a),m(ls,s,a),e(s,ia,a),e(s,ns,a),e(s,ma,a),m(ps,s,a),e(s,da,a),e(s,rs,a),e(s,oa,a),m(cs,s,a),e(s,ua,a),e(s,hs,a),e(s,ja,a),m(is,s,a),e(s,ga,a),e(s,ms,a),e(s,Ja,a),m(ds,s,a),e(s,ya,a),e(s,os,a),e(s,Ma,a),e(s,us,a),e(s,ba,a),e(s,js,a),e(s,Ta,a),e(s,gs,a),e(s,wa,a),e(s,Js,a),e(s,fa,a),e(s,ys,a),e(s,Ua,a),e(s,T,a),e(s,Ia,a),m(Ms,s,a),e(s,Ra,a),e(s,ws,a),ka=!0},p:rt,i(s){ka||(d(w.$$.fragment,s),d(f.$$.fragment,s),d(I.$$.fragment,s),d(C.$$.fragment,s),d(G.$$.fragment,s),d(_.$$.fragment,s),d(v.$$.fragment,s),d(Q.$$.fragment,s),d(Y.$$.fragment,s),d(F.$$.fragment,s),d(z.$$.fragment,s),d(W.$$.fragment,s),d(q.$$.fragment,s),d(E.$$.fragment,s),d(D.$$.fragment,s),d(H.$$.fragment,s),d(S.$$.fragment,s),d(K.$$.fragment,s),d(O.$$.fragment,s),d(ts.$$.fragment,s),d(ls.$$.fragment,s),d(ps.$$.fragment,s),d(cs.$$.fragment,s),d(is.$$.fragment,s),d(ds.$$.fragment,s),d(Ms.$$.fragment,s),ka=!0)},o(s){o(w.$$.fragment,s),o(f.$$.fragment,s),o(I.$$.fragment,s),o(C.$$.fragment,s),o(G.$$.fragment,s),o(_.$$.fragment,s),o(v.$$.fragment,s),o(Q.$$.fragment,s),o(Y.$$.fragment,s),o(F.$$.fragment,s),o(z.$$.fragment,s),o(W.$$.fragment,s),o(q.$$.fragment,s),o(E.$$.fragment,s),o(D.$$.fragment,s),o(H.$$.fragment,s),o(S.$$.fragment,s),o(K.$$.fragment,s),o(O.$$.fragment,s),o(ts.$$.fragment,s),o(ls.$$.fragment,s),o(ps.$$.fragment,s),o(cs.$$.fragment,s),o(is.$$.fragment,s),o(ds.$$.fragment,s),o(Ms.$$.fragment,s),ka=!1},d(s){s&&(t(fs),t(Ts),t(Us),t(Is),t(Rs),t(U),t(ks),t(Cs),t(R),t($s),t(k),t(Gs),t(_s),t(y),t(Zs),t($),t(vs),t(Qs),t(Xs),t(Z),t(Ys),t(Fs),t(xs),t(X),t(zs),t(Ns),t(Vs),t(x),t(Ws),t(qs),t(N),t(Es),t(V),t(Bs),t(M),t(Ds),t(Hs),t(b),t(Ls),t(Ss),t(As),t(B),t(Ps),t(Ks),t(Os),t(L),t(sa),t(aa),t(A),t(ta),t(P),t(ea),t(la),t(na),t(ss),t(pa),t(as),t(ra),t(ca),t(es),t(ha),t(ia),t(ns),t(ma),t(da),t(rs),t(oa),t(ua),t(hs),t(ja),t(ga),t(ms),t(Ja),t(ya),t(os),t(Ma),t(us),t(ba),t(js),t(Ta),t(gs),t(wa),t(Js),t(fa),t(ys),t(Ua),t(T),t(Ia),t(Ra),t(ws)),t(J),u(w,s),u(f,s),u(I,s),u(C,s),u(G,s),u(_,s),u(v,s),u(Q,s),u(Y,s),u(F,s),u(z,s),u(W,s),u(q,s),u(E,s),u(D,s),u(H,s),u(S,s),u(K,s),u(O,s),u(ts,s),u(ls,s),u(ps,s),u(cs,s),u(is,s),u(ds,s),u(Ms,s)}}}const gt='{"title":"Use with PyTorch","local":"use-with-pytorch","sections":[{"title":"Dataset format","local":"dataset-format","sections":[{"title":"N-dimensional arrays","local":"n-dimensional-arrays","sections":[],"depth":3},{"title":"Other feature types","local":"other-feature-types","sections":[],"depth":3}],"depth":2},{"title":"Data loading","local":"data-loading","sections":[{"title":"Optimize data loading","local":"optimize-data-loading","sections":[{"title":"Use multiple Workers","local":"use-multiple-workers","sections":[],"depth":4}],"depth":3},{"title":"Stream data","local":"stream-data","sections":[],"depth":3},{"title":"Checkpoint and resume","local":"checkpoint-and-resume","sections":[],"depth":3},{"title":"Distributed","local":"distributed","sections":[],"depth":3}],"depth":2}],"depth":1}';function Jt(Ca){return ct(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class wt extends ht{constructor(J){super(),it(this,J,Jt,jt,pt,{})}}export{wt as component}; | |
Xet Storage Details
- Size:
- 43.3 kB
- Xet hash:
- 37a074bc98568252d8a9611459cb4794df18c772a5a181998f292c34d412dc55
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.