Buckets:

HuggingFaceDocBuilder's picture
download
raw
46.1 kB
import{s as Ba,n as Sa,o as xa}from"../chunks/scheduler.d75c11ed.js";import{S as Aa,i as Wa,e as p,s as e,c as M,h as Ra,a as i,d as a,b as n,f as va,g as y,j as o,k as vs,l as _a,m as l,n as c,t as d,o as u,p as r}from"../chunks/index.4ec9dfe9.js";import{C as Qa,H as qs,E as Ya}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.12c641e2.js";import{C as m}from"../chunks/CodeBlock.64e007a4.js";function Va(zt){let h,Bs,ks,Ss,w,xs,f,As,U,Pt="There are two methods for creating and sharing an image dataset. This guide will show you how to:",Ws,g,Ot='<li><p>Create an image dataset from local files in python with <a href="/docs/datasets/pr_8170/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a>. This is an easy way that requires only a few steps in python.</p></li> <li><p>Create an image dataset with <code>ImageFolder</code> and some metadata. This is a no-code solution for quickly creating an image dataset with several thousand images.</p></li>',Rs,j,Kt='<p>You can control access to your dataset by requiring users to share their contact information first. Check out the <a href="https://huggingface.co/docs/hub/datasets-gated" rel="nofollow">Gated datasets</a> guide for more information about how to enable this feature on the Hub.</p>',_s,b,Qs,I,sa="The <code>ImageFolder</code> is a dataset builder designed to quickly load an image dataset with several thousand images without requiring you to write any code.",Ys,J,ta='<p>💡 Take a look at the <a href="repository_structure#split-pattern-hierarchy">Split pattern hierarchy</a> to learn more about how <code>ImageFolder</code> creates dataset splits based on your dataset repository structure.</p>',Vs,C,aa="<code>ImageFolder</code> automatically infers the class labels of your dataset based on the directory name. Store your dataset in a directory structure like:",Es,Z,Hs,G,la='If the dataset follows the <code>ImageFolder</code> structure, then you can load it directly with <a href="/docs/datasets/pr_8170/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',Ns,$,Ds,q,ea='This is equivalent to passing <code>imagefolder</code> manually in <a href="/docs/datasets/pr_8170/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a> and the directory in <code>data_dir</code>:',Fs,k,Xs,L,na="You can also use <code>imagefolder</code> to load datasets involving multiple splits. To do so, your dataset directory should have the following structure:",zs,v,Ps,T,pa="<p>If all image files are contained in a single directory or if they are not on the same level of directory structure, <code>label</code> column won’t be added automatically. If you need it, set <code>drop_labels=False</code> explicitly.</p>",Os,B,ia="If there is additional information you’d like to include about your dataset, like text captions or bounding boxes, add it as a <code>metadata.csv</code> file in your folder. This lets you quickly create datasets for different computer vision tasks like text captioning or object detection. You can also use a JSONL file <code>metadata.jsonl</code> or a Parquet file <code>metadata.parquet</code>.",Ks,S,st,x,oa="You can also zip your images, and in this case each zip should contain both the images and the metadata",tt,A,at,W,Ma="Your <code>metadata.csv</code> file must have a <code>file_name</code> or <code>*_file_name</code> field which links image files with their metadata:",lt,R,et,_,ya="or using <code>metadata.jsonl</code>:",nt,Q,pt,Y,ca="Here the <code>file_name</code> must be the name of the image file next to the metadata file. More generally, it must be the relative path from the directory containing the metadata to the image file.",it,V,da="It’s possible to point to more than one image in each row in your dataset, for example if both your input and output are images:",ot,E,Mt,H,ua="You can also define lists of images. In that case you need to name the field <code>file_names</code> or <code>*_file_names</code>. Here is an example:",yt,N,ct,D,dt,F,ra="Image captioning datasets have text describing an image. An example <code>metadata.csv</code> may look like:",ut,X,rt,z,ma="Load the dataset with <code>ImageFolder</code>, and it will create a <code>text</code> column for the image captions:",mt,P,ht,O,jt,K,ha="Object detection datasets have bounding boxes and categories identifying objects in an image. An example <code>metadata.jsonl</code> may look like:",Jt,ss,Tt,ts,ja="Load the dataset with <code>ImageFolder</code>, and it will create a <code>objects</code> column with the bounding boxes and the categories:",wt,as,ft,ls,Ut,es,Ja='Once you’ve created a dataset, you can share it to the Hub with the <a href="/docs/datasets/pr_8170/en/package_reference/main_classes#datasets.DatasetDict.push_to_hub">push_to_hub()</a> method. Make sure you have the <a href="https://huggingface.co/docs/huggingface_hub/index" rel="nofollow">huggingface_hub</a> library installed and you’re logged in to your Hugging Face account (see the <a href="upload_dataset#upload-with-python">Upload with Python tutorial</a> for more details).',gt,ns,Ta='Upload your dataset with <a href="/docs/datasets/pr_8170/en/package_reference/main_classes#datasets.DatasetDict.push_to_hub">push_to_hub()</a>:',bt,ps,It,is,Ct,os,wa=`The <a href="https://github.com/webdataset/webdataset" rel="nofollow">WebDataset</a> format is based on TAR archives and is suitable for big image datasets.
Indeed you can group your images in TAR archives (e.g. 1GB of images per TAR archive) and have thousands of TAR archives:`,Zt,Ms,Gt,ys,fa="In the archives, each example is made of files sharing the same prefix:",$t,cs,qt,ds,Ua="You can put your images labels/captions/bounding boxes using JSON or text files for example.",kt,us,ga="Load your WebDataset and it will create on column per file suffix (here “jpg” and “json”):",Lt,rs,vt,ms,ba="It’s also possible to have several images per example like this:",Bt,hs,St,js,Ia='For more details on the WebDataset format and the python library, please check the <a href="https://webdataset.github.io/webdataset" rel="nofollow">WebDataset documentation</a>.',xt,Js,At,Ts,Ca=`<a href="https://lance.org" rel="nofollow">Lance</a> is an open multimodal lakehouse table format. Lance tables can natively store not only text and scalar values,
but also large binary objects (blobs) such as images, audio, and video alongside your tabular data.`,Wt,ws,Za=`Starting from image files on disk plus associated metadata (for example, captions and dimensions), you can write a self-contained Lance dataset to a
local <code>*.lance</code> directory. The resulting table can store your metadata columns alongside an <code>image</code> column containing the encoded image bytes.`,Rt,fs,Ga="For example, you might start with metadata like:",_t,Us,Qt,gs,$a="You can define a <code>pyarrow</code> schema for your metadata and image bytes, build a table, and write it as a Lance dataset:",Yt,bs,Vt,Is,qa="Here’s a representative view of what a Lance table storing images might look like (the <code>image</code> column contains encoded bytes):",Et,Cs,Ht,Zs,ka=`Using this approach, you can store arbitrarily large image datasets in Lance. The resulting <code>images.lance/</code> directory with
its <code>*.lance</code> files can be uploaded to the Hugging Face Hub, just like the other examples above. See the <code>lance-format/laion-1m</code> <a href="https://huggingface.co/datasets/lance-format/laion-1m" rel="nofollow">on the Hub</a> dataset for an example of a Lance image dataset.`,Nt,Gs,La='For more details on working with Lance datasets, see the <a href="https://lance.org" rel="nofollow">Lance documentation</a>.',Dt,$s,Ft,Ls,Xt;return w=new Qa({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),f=new qs({props:{title:"Create an image dataset",local:"create-an-image-dataset",headingTag:"h1"}}),b=new qs({props:{title:"ImageFolder",local:"imagefolder",headingTag:"h2"}}),Z=new m({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLnBuZyUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLnBuZyUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGY2hpaHVhaHVhLnBuZyUwQSUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5wbmclMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJlbmdhbC5wbmclMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJpcm1hbi5wbmc=",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.png
folder<span class="hljs-regexp">/train/</span>dog/german_shepherd.png
folder<span class="hljs-regexp">/train/</span>dog/chihuahua.png
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.png
folder<span class="hljs-regexp">/train/</span>cat/bengal.png
folder<span class="hljs-regexp">/train/</span>cat/birman.png`,wrap:!1}}),$=new m({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJwYXRoJTJGdG8lMkZmb2xkZXIlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;path/to/folder&quot;</span>)`,wrap:!1}}),k=new m({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWFnZWZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;imagefolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>)',wrap:!1}}),v=new m({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLnBuZyUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5wbmclMEFmb2xkZXIlMkZ0ZXN0JTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLnBuZyUwQWZvbGRlciUyRnRlc3QlMkZjYXQlMkZiZW5nYWwucG5n",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.png
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.png
folder<span class="hljs-regexp">/test/</span>dog/german_shepherd.png
folder<span class="hljs-regexp">/test/</span>cat/bengal.png`,wrap:!1}}),S=new m({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZtZXRhZGF0YS5jc3YlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDEucG5nJTBBZm9sZGVyJTJGdHJhaW4lMkYwMDAyLnBuZyUwQWZvbGRlciUyRnRyYWluJTJGMDAwMy5wbmc=",highlighted:`folder<span class="hljs-regexp">/train/m</span>etadata.csv
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0001</span>.png
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0002</span>.png
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0003</span>.png`,wrap:!1}}),A=new m({props:{code:"Zm9sZGVyJTJGdHJhaW4uemlwJTBBZm9sZGVyJTJGdGVzdC56aXAlMEFmb2xkZXIlMkZ2YWxpZGF0aW9uLnppcA==",highlighted:`folder/train.<span class="hljs-keyword">zip</span>
folder/<span class="hljs-keyword">test</span>.<span class="hljs-keyword">zip</span>
folder/validation.<span class="hljs-keyword">zip</span>`,wrap:!1}}),R=new m({props:{code:"ZmlsZV9uYW1lJTJDYWRkaXRpb25hbF9mZWF0dXJlJTBBMDAwMS5wbmclMkNUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBpbWFnZXMlMEEwMDAyLnBuZyUyQ1RoaXMlMjBpcyUyMGElMjBzZWNvbmQlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBpbWFnZXMlMEEwMDAzLnBuZyUyQ1RoaXMlMjBpcyUyMGElMjB0aGlyZCUyMHZhbHVlJTIwb2YlMjBhJTIwdGV4dCUyMGZlYXR1cmUlMjB5b3UlMjBhZGRlZCUyMHRvJTIweW91ciUyMGltYWdlcw==",highlighted:`file_name,additional_feature
<span class="hljs-number">0001.</span>png,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">first</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your images
<span class="hljs-number">0002.</span>png,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">second</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your images
<span class="hljs-number">0003.</span>png,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">third</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your images`,wrap:!1}}),Q=new m({props:{code:"JTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5wbmclMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBpbWFnZXMlMjIlN0QlMEElN0IlMjJmaWxlX25hbWUlMjIlM0ElMjAlMjIwMDAyLnBuZyUyMiUyQyUyMCUyMmFkZGl0aW9uYWxfZmVhdHVyZSUyMiUzQSUyMCUyMlRoaXMlMjBpcyUyMGElMjBzZWNvbmQlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBpbWFnZXMlMjIlN0QlMEElN0IlMjJmaWxlX25hbWUlMjIlM0ElMjAlMjIwMDAzLnBuZyUyMiUyQyUyMCUyMmFkZGl0aW9uYWxfZmVhdHVyZSUyMiUzQSUyMCUyMlRoaXMlMjBpcyUyMGElMjB0aGlyZCUyMHZhbHVlJTIwb2YlMjBhJTIwdGV4dCUyMGZlYXR1cmUlMjB5b3UlMjBhZGRlZCUyMHRvJTIweW91ciUyMGltYWdlcyUyMiU3RA==",highlighted:`{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0001.png&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a first value of a text feature you added to your images&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0002.png&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a second value of a text feature you added to your images&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0003.png&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a third value of a text feature you added to your images&quot;</span>}`,wrap:!1}}),E=new m({props:{code:"JTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5wbmclMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMV9vdXRwdXQucG5nJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5wbmclMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMl9vdXRwdXQucG5nJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMy5wbmclMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwM19vdXRwdXQucG5nJTIyJTdE",highlighted:`<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001.png&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001_output.png&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002.png&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002_output.png&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003.png&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003_output.png&quot;</span><span class="hljs-punctuation">}</span>`,wrap:!1}}),N=new m({props:{code:"JTdCJTIyZnJhbWVzX2ZpbGVfbmFtZXMlMjIlM0ElMjAlNUIlMjIwMDAxX3QwLnBuZyUyMiUyQyUyMCUyMjAwMDFfdDEucG5nJTIyJTVEJTJDJTIwbGFiZWwlM0ElMjAlMjJtb3ZpbmdfdXAlMjIlN0QlMEElN0IlMjJmcmFtZXNfZmlsZV9uYW1lcyUyMiUzQSUyMCU1QiUyMjAwMDJfdDAucG5nJTIyJTJDJTIwJTIyMDAwMl90MS5wbmclMjIlNUQlMkMlMjBsYWJlbCUzQSUyMCUyMm1vdmluZ19kb3duJTIyJTdEJTBBJTdCJTIyZnJhbWVzX2ZpbGVfbmFtZXMlMjIlM0ElMjAlNUIlMjIwMDAzX3QwLnBuZyUyMiUyQyUyMCUyMjAwMDNfdDEucG5nJTIyJTVEJTJDJTIwbGFiZWwlM0ElMjAlMjJtb3ZpbmdfcmlnaHQlMjIlN0Q=",highlighted:`{<span class="hljs-string">&quot;frames_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0001_t0.png&quot;</span>, <span class="hljs-string">&quot;0001_t1.png&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;moving_up&quot;</span>}
{<span class="hljs-string">&quot;frames_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0002_t0.png&quot;</span>, <span class="hljs-string">&quot;0002_t1.png&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;moving_down&quot;</span>}
{<span class="hljs-string">&quot;frames_file_names&quot;</span>: <span class="hljs-selector-attr">[<span class="hljs-string">&quot;0003_t0.png&quot;</span>, <span class="hljs-string">&quot;0003_t1.png&quot;</span>]</span>, <span class="hljs-selector-tag">label</span>: <span class="hljs-string">&quot;moving_right&quot;</span>}`,wrap:!1}}),D=new qs({props:{title:"Image captioning",local:"image-captioning",headingTag:"h3"}}),X=new m({props:{code:"ZmlsZV9uYW1lJTJDdGV4dCUwQTAwMDEucG5nJTJDVGhpcyUyMGlzJTIwYSUyMGdvbGRlbiUyMHJldHJpZXZlciUyMHBsYXlpbmclMjB3aXRoJTIwYSUyMGJhbGwlMEEwMDAyLnBuZyUyQ0ElMjBnZXJtYW4lMjBzaGVwaGVyZCUwQTAwMDMucG5nJTJDT25lJTIwY2hpaHVhaHVh",highlighted:`file_name,text
<span class="hljs-number">0001</span><span class="hljs-selector-class">.png</span>,This is <span class="hljs-selector-tag">a</span> golden retriever playing with <span class="hljs-selector-tag">a</span> ball
<span class="hljs-number">0002</span><span class="hljs-selector-class">.png</span>,A german shepherd
<span class="hljs-number">0003</span><span class="hljs-selector-class">.png</span>,One chihuahua`,wrap:!1}}),P=new m({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWFnZWZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMnRleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;imagefolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;text&quot;</span>]
<span class="hljs-string">&quot;This is a golden retriever playing with a ball&quot;</span>`,wrap:!1}}),O=new qs({props:{title:"Object detection",local:"object-detection",headingTag:"h3"}}),ss=new m({props:{code:"JTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5wbmclMjIlMkMlMjAlMjJvYmplY3RzJTIyJTNBJTIwJTdCJTIyYmJveCUyMiUzQSUyMCU1QiU1QjMwMi4wJTJDJTIwMTA5LjAlMkMlMjA3My4wJTJDJTIwNTIuMCU1RCU1RCUyQyUyMCUyMmNhdGVnb3JpZXMlMjIlM0ElMjAlNUIwJTVEJTdEJTdEJTBBJTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5wbmclMjIlMkMlMjAlMjJvYmplY3RzJTIyJTNBJTIwJTdCJTIyYmJveCUyMiUzQSUyMCU1QiU1QjgxMC4wJTJDJTIwMTAwLjAlMkMlMjA1Ny4wJTJDJTIwMjguMCU1RCU1RCUyQyUyMCUyMmNhdGVnb3JpZXMlMjIlM0ElMjAlNUIxJTVEJTdEJTdEJTBBJTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMy5wbmclMjIlMkMlMjAlMjJvYmplY3RzJTIyJTNBJTIwJTdCJTIyYmJveCUyMiUzQSUyMCU1QiU1QjE2MC4wJTJDJTIwMzEuMCUyQyUyMDI0OC4wJTJDJTIwNjE2LjAlNUQlMkMlMjAlNUI3NDEuMCUyQyUyMDY4LjAlMkMlMjAyMDIuMCUyQyUyMDQwMS4wJTVEJTVEJTJDJTIwJTIyY2F0ZWdvcmllcyUyMiUzQSUyMCU1QjIlMkMlMjAyJTVEJTdEJTdE",highlighted:`{<span class="hljs-string">&quot;file_name&quot;</span>: <span class="hljs-string">&quot;0001.png&quot;</span>, <span class="hljs-string">&quot;objects&quot;</span>: {<span class="hljs-string">&quot;bbox&quot;</span>: <span class="hljs-string">[[302.0, 109.0, 73.0, 52.0]]</span>, <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">0</span>]}}
{<span class="hljs-string">&quot;file_name&quot;</span>: <span class="hljs-string">&quot;0002.png&quot;</span>, <span class="hljs-string">&quot;objects&quot;</span>: {<span class="hljs-string">&quot;bbox&quot;</span>: <span class="hljs-string">[[810.0, 100.0, 57.0, 28.0]]</span>, <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">1</span>]}}
{<span class="hljs-string">&quot;file_name&quot;</span>: <span class="hljs-string">&quot;0003.png&quot;</span>, <span class="hljs-string">&quot;objects&quot;</span>: {<span class="hljs-string">&quot;bbox&quot;</span>: <span class="hljs-string">[[160.0, 31.0, 248.0, 616.0], [741.0, 68.0, 202.0, 401.0]]</span>, <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">2</span>, <span class="hljs-number">2</span>]}}`,wrap:!1}}),as=new m({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWFnZWZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMm9iamVjdHMlMjIlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;imagefolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;objects&quot;</span>]
{<span class="hljs-string">&quot;bbox&quot;</span>: [[<span class="hljs-number">302.0</span>, <span class="hljs-number">109.0</span>, <span class="hljs-number">73.0</span>, <span class="hljs-number">52.0</span>]], <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">0</span>]}`,wrap:!1}}),ls=new qs({props:{title:"Upload dataset to the Hub",local:"upload-dataset-to-the-hub",headingTag:"h3"}}),ps=new m({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWFnZWZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0LnB1c2hfdG9faHViKCUyMnN0ZXZobGl1JTJGbXktaW1hZ2UtY2FwdGlvbmluZy1kYXRhc2V0JTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;imagefolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset.push_to_hub(<span class="hljs-string">&quot;stevhliu/my-image-captioning-dataset&quot;</span>)`,wrap:!1}}),is=new qs({props:{title:"WebDataset",local:"webdataset",headingTag:"h2"}}),Ms=new m({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkYwMDAwMC50YXIlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDAxLnRhciUwQWZvbGRlciUyRnRyYWluJTJGMDAwMDIudGFyJTBBLi4u",highlighted:`folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00000</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00001</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00002</span>.tar
...`,wrap:!1}}),cs=new m({props:{code:"ZTM5ODcxZmQ5ZmQ3NGY1NS5qcGclMEFlMzk4NzFmZDlmZDc0ZjU1Lmpzb24lMEFmMThiOTE1ODVjNGQzZjNlLmpwZyUwQWYxOGI5MTU4NWM0ZDNmM2UuanNvbiUwQWVkZTZlNjZiMmZiNTlhYWIuanBnJTBBZWRlNmU2NmIyZmI1OWFhYi5qc29uJTBBZWQ2MDBkNTdmY2VlNGY5NC5qcGclMEFlZDYwMGQ1N2ZjZWU0Zjk0Lmpzb24lMEEuLi4=",highlighted:`e<span class="hljs-symbol">39871f</span>d<span class="hljs-symbol">9f</span>d<span class="hljs-symbol">74f</span>55.<span class="hljs-keyword">jpg
</span>e<span class="hljs-symbol">39871f</span>d<span class="hljs-symbol">9f</span>d<span class="hljs-symbol">74f</span>55.<span class="hljs-keyword">json
</span>f<span class="hljs-symbol">18b</span>91585c4d<span class="hljs-symbol">3f</span>3e.<span class="hljs-keyword">jpg
</span>f<span class="hljs-symbol">18b</span>91585c4d<span class="hljs-symbol">3f</span>3e.<span class="hljs-keyword">json
</span>ede6e<span class="hljs-symbol">66b</span><span class="hljs-symbol">2f</span>b59aab.<span class="hljs-keyword">jpg
</span>ede6e<span class="hljs-symbol">66b</span><span class="hljs-symbol">2f</span>b59aab.<span class="hljs-keyword">json
</span>ed600d<span class="hljs-symbol">57f</span>cee<span class="hljs-symbol">4f</span>94.<span class="hljs-keyword">jpg
</span>ed600d<span class="hljs-symbol">57f</span>cee<span class="hljs-symbol">4f</span>94.<span class="hljs-keyword">json
</span>...`,wrap:!1}}),rs=new m({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ3ZWJkYXRhc2V0JTIyJTJDJTIwZGF0YV9kaXIlM0QlMjIlMkZwYXRoJTJGdG8lMkZmb2xkZXIlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQWRhdGFzZXQlNUIwJTVEJTVCJTIyanNvbiUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;webdataset&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;json&quot;</span>]
{<span class="hljs-string">&quot;bbox&quot;</span>: [[<span class="hljs-number">302.0</span>, <span class="hljs-number">109.0</span>, <span class="hljs-number">73.0</span>, <span class="hljs-number">52.0</span>]], <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">0</span>]}`,wrap:!1}}),hs=new m({props:{code:"ZTM5ODcxZmQ5ZmQ3NGY1NS5pbnB1dC5qcGclMEFlMzk4NzFmZDlmZDc0ZjU1Lm91dHB1dC5qcGclMEFlMzk4NzFmZDlmZDc0ZjU1Lmpzb24lMEFmMThiOTE1ODVjNGQzZjNlLmlucHV0LmpwZyUwQWYxOGI5MTU4NWM0ZDNmM2Uub3V0cHV0LmpwZyUwQWYxOGI5MTU4NWM0ZDNmM2UuanNvbiUwQS4uLg==",highlighted:`e39871fd9fd74f55<span class="hljs-selector-class">.input</span><span class="hljs-selector-class">.jpg</span>
e39871fd9fd74f55<span class="hljs-selector-class">.output</span><span class="hljs-selector-class">.jpg</span>
e39871fd9fd74f55<span class="hljs-selector-class">.json</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.input</span><span class="hljs-selector-class">.jpg</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.output</span><span class="hljs-selector-class">.jpg</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.json</span>
...`,wrap:!1}}),Js=new qs({props:{title:"Lance",local:"lance",headingTag:"h2"}}),Us=new m({props:{code:"JTdCJ2NhcHRpb24nJTNBJTIwJ0NvcmRlbGlhJTIwYW5kJTIwRHVkbGV5JTIwb24lMjB0aGVpciUyMHdlZGRpbmclMjAlMjBkYXklMjBsYXN0JTIweWVhciclMkMlMjAnaGVpZ2h0JyUzQSUyMDMxNSUyQyUyMCd3aWR0aCclM0ElMjAyMzMlN0QlMEElN0InY2FwdGlvbiclM0ElMjAnU3RhdGlzdGljcyUyMG9uJTIwY2hhbGxlbmdlcyUyMGZvciUyMGF1dG9tYXRpb24lMjBpbiUyMDIwMjEnJTJDJTIwJ2hlaWdodCclM0ElMjAyOTklMkMlMjAnd2lkdGgnJTNBJTIwNzAxJTdE",highlighted:`{&#x27;caption&#x27;: &#x27;Cordelia and Dudley on their wedding day last year&#x27;, &#x27;height&#x27;: 315, &#x27;width&#x27;: 233}
{&#x27;caption&#x27;: &#x27;Statistics on challenges for automation in 2021&#x27;, &#x27;height&#x27;: 299, &#x27;width&#x27;: 701}`,wrap:!1}}),bs=new m({props:{code:"aW1wb3J0JTIwbGFuY2UlMEFpbXBvcnQlMjBweWFycm93JTIwYXMlMjBwYSUwQSUwQXNjaGVtYSUyMCUzRCUyMHBhLnNjaGVtYSglMEElMjAlMjAlMjAlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwYS5maWVsZCglMjJjYXB0aW9uJTIyJTJDJTIwcGEudXRmOCgpKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhLmZpZWxkKCUyMmhlaWdodCUyMiUyQyUyMHBhLmludDMyKCkpJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGEuZmllbGQoJTIyd2lkdGglMjIlMkMlMjBwYS5pbnQzMigpKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMC4uLiUyMGFkZCUyMGFueSUyMGFkZGl0aW9uYWwlMjBtZXRhZGF0YSUyMGNvbHVtbnMlMjB5b3UlMjB3YW50JTIwaGVyZSUyMC4uLiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhLmZpZWxkKCUyMmltYWdlJTIyJTJDJTIwcGEuYmluYXJ5KCkpJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTBBKSUwQSUwQSUyMyUyMFByb3ZpZGUlMjBpbWFnZSUyMGZpbGVzJTIwYWxvbmdzaWRlJTIwbWV0YWRhdGElMEFyb3dzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyaW1hZ2VfcGF0aCUyMiUzQSUyMCUyMiUyRnBhdGglMkZ0byUyRmltYWdlcyUyRjAwMDEuanBnJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY2FwdGlvbiUyMiUzQSUyMCUyMkNvcmRlbGlhJTIwYW5kJTIwRHVkbGV5JTIwb24lMjB0aGVpciUyMHdlZGRpbmclMjAlMjBkYXklMjBsYXN0JTIweWVhciUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmhlaWdodCUyMiUzQSUyMDMxNSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndpZHRoJTIyJTNBJTIwMjMzJTJDJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyaW1hZ2VfcGF0aCUyMiUzQSUyMCUyMiUyRnBhdGglMkZ0byUyRmltYWdlcyUyRjAwMDIuanBnJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY2FwdGlvbiUyMiUzQSUyMCUyMlN0YXRpc3RpY3MlMjBvbiUyMGNoYWxsZW5nZXMlMjBmb3IlMjBhdXRvbWF0aW9uJTIwaW4lMjAyMDIxJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyaGVpZ2h0JTIyJTNBJTIwMjk5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyd2lkdGglMjIlM0ElMjA3MDElMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElNUQlMEElMEFpbWFnZV9ieXRlcyUyMCUzRCUyMCU1QiU1RCUwQWZvciUyMHIlMjBpbiUyMHJvd3MlM0ElMEElMjAlMjAlMjAlMjB3aXRoJTIwb3BlbihyJTVCJTIyaW1hZ2VfcGF0aCUyMiU1RCUyQyUyMCUyMnJiJTIyKSUyMGFzJTIwZiUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGltYWdlX2J5dGVzLmFwcGVuZChmLnJlYWQoKSklMEElMEF0YWJsZSUyMCUzRCUyMHBhLnRhYmxlKCUwQSUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNhcHRpb24lMjIlM0ElMjAlNUJyJTVCJTIyY2FwdGlvbiUyMiU1RCUyMGZvciUyMHIlMjBpbiUyMHJvd3MlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJoZWlnaHQlMjIlM0ElMjAlNUJyJTVCJTIyaGVpZ2h0JTIyJTVEJTIwZm9yJTIwciUyMGluJTIwcm93cyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndpZHRoJTIyJTNBJTIwJTVCciU1QiUyMndpZHRoJTIyJTVEJTIwZm9yJTIwciUyMGluJTIwcm93cyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmltYWdlJTIyJTNBJTIwaW1hZ2VfYnl0ZXMlMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBzY2hlbWElM0RzY2hlbWElMkMlMEEpJTBBJTBBZHMlMjAlM0QlMjBsYW5jZS53cml0ZV9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMHRhYmxlJTJDJTBBJTIwJTIwJTIwJTIwJTIyLiUyRmltYWdlcy5sYW5jZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHNjaGVtYSUzRHNjaGVtYSUyQyUwQSUyMCUyMCUyMCUyMG1vZGUlM0QlMjJjcmVhdGUlMjIlMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> lance
<span class="hljs-keyword">import</span> pyarrow <span class="hljs-keyword">as</span> pa
schema = pa.schema(
[
pa.field(<span class="hljs-string">&quot;caption&quot;</span>, pa.utf8()),
pa.field(<span class="hljs-string">&quot;height&quot;</span>, pa.int32()),
pa.field(<span class="hljs-string">&quot;width&quot;</span>, pa.int32()),
<span class="hljs-comment"># ... add any additional metadata columns you want here ...</span>
pa.field(<span class="hljs-string">&quot;image&quot;</span>, pa.binary()),
]
)
<span class="hljs-comment"># Provide image files alongside metadata</span>
rows = [
{
<span class="hljs-string">&quot;image_path&quot;</span>: <span class="hljs-string">&quot;/path/to/images/0001.jpg&quot;</span>,
<span class="hljs-string">&quot;caption&quot;</span>: <span class="hljs-string">&quot;Cordelia and Dudley on their wedding day last year&quot;</span>,
<span class="hljs-string">&quot;height&quot;</span>: <span class="hljs-number">315</span>,
<span class="hljs-string">&quot;width&quot;</span>: <span class="hljs-number">233</span>,
},
{
<span class="hljs-string">&quot;image_path&quot;</span>: <span class="hljs-string">&quot;/path/to/images/0002.jpg&quot;</span>,
<span class="hljs-string">&quot;caption&quot;</span>: <span class="hljs-string">&quot;Statistics on challenges for automation in 2021&quot;</span>,
<span class="hljs-string">&quot;height&quot;</span>: <span class="hljs-number">299</span>,
<span class="hljs-string">&quot;width&quot;</span>: <span class="hljs-number">701</span>,
},
]
image_bytes = []
<span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows:
<span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(r[<span class="hljs-string">&quot;image_path&quot;</span>], <span class="hljs-string">&quot;rb&quot;</span>) <span class="hljs-keyword">as</span> f:
image_bytes.append(f.read())
table = pa.table(
{
<span class="hljs-string">&quot;caption&quot;</span>: [r[<span class="hljs-string">&quot;caption&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows],
<span class="hljs-string">&quot;height&quot;</span>: [r[<span class="hljs-string">&quot;height&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows],
<span class="hljs-string">&quot;width&quot;</span>: [r[<span class="hljs-string">&quot;width&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows],
<span class="hljs-string">&quot;image&quot;</span>: image_bytes,
},
schema=schema,
)
ds = lance.write_dataset(
table,
<span class="hljs-string">&quot;./images.lance&quot;</span>,
schema=schema,
mode=<span class="hljs-string">&quot;create&quot;</span>,
)`,wrap:!1}}),Cs=new m({props:{code:"JTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLS0tLSUyQi0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMGNhcHRpb24lMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMjBoZWlnaHQlMjAlN0MlMjB3aWR0aCUyMCU3QyUyMC4uLiUyMCU3QyUyMGltYWdlJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTBBJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLS0tLSUyQi0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMCUyMkNvcmRlbGlhJTIwYW5kJTIwRHVkbGV5JTIwb24lMjB0aGVpciUyMHdlZGRpbmclMjAuLi4lMjIlMjAlMjAlMjAlMjAlN0MlMjAzMTUlMjAlMjAlMjAlMjAlN0MlMjAyMzMlMjAlMjAlMjAlN0MlMjAuLi4lMjAlN0MlMjBiJTIyJTVDJTVDeGZmJTVDJTVDeGQ4JTVDJTVDeGZmLi4uJTVDJTVDeGQ5JTIyJTIwJTIwJTIwJTdDJTBBJTdDJTIwJTIyU3RhdGlzdGljcyUyMG9uJTIwY2hhbGxlbmdlcyUyMGZvciUyMGF1dG9tYXRpb24lMjAuLi4lMjIlMjAlN0MlMjAyOTklMjAlMjAlMjAlMjAlN0MlMjA3MDElMjAlMjAlMjAlN0MlMjAuLi4lMjAlN0MlMjBiJTIyJTVDJTVDeGZmJTVDJTVDeGQ4JTVDJTVDeGZmLi4uJTVDJTVDeGQ5JTIyJTIwJTIwJTIwJTdDJTBBJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLS0tLSUyQi0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQg==",highlighted:`+-----------------------------------------------+--------+-------+-----+------------------------------+
| caption | height | width | ... | image |
+-----------------------------------------------+--------+-------+-----+------------------------------+
| &quot;Cordelia and Dudley on their wedding ...&quot; | 315 | 233 | ... | b&quot;\\\\xff\\\\xd8\\\\xff...\\\\xd9&quot; |
| &quot;Statistics on challenges for automation ...&quot; | 299 | 701 | ... | b&quot;\\\\xff\\\\xd8\\\\xff...\\\\xd9&quot; |
+-----------------------------------------------+--------+-------+-----+------------------------------+`,wrap:!1}}),$s=new Ya({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/image_dataset.mdx"}}),{c(){h=p("meta"),Bs=e(),ks=p("p"),Ss=e(),M(w.$$.fragment),xs=e(),M(f.$$.fragment),As=e(),U=p("p"),U.textContent=Pt,Ws=e(),g=p("ul"),g.innerHTML=Ot,Rs=e(),j=p("blockquote"),j.innerHTML=Kt,_s=e(),M(b.$$.fragment),Qs=e(),I=p("p"),I.innerHTML=sa,Ys=e(),J=p("blockquote"),J.innerHTML=ta,Vs=e(),C=p("p"),C.innerHTML=aa,Es=e(),M(Z.$$.fragment),Hs=e(),G=p("p"),G.innerHTML=la,Ns=e(),M($.$$.fragment),Ds=e(),q=p("p"),q.innerHTML=ea,Fs=e(),M(k.$$.fragment),Xs=e(),L=p("p"),L.innerHTML=na,zs=e(),M(v.$$.fragment),Ps=e(),T=p("blockquote"),T.innerHTML=pa,Os=e(),B=p("p"),B.innerHTML=ia,Ks=e(),M(S.$$.fragment),st=e(),x=p("p"),x.textContent=oa,tt=e(),M(A.$$.fragment),at=e(),W=p("p"),W.innerHTML=Ma,lt=e(),M(R.$$.fragment),et=e(),_=p("p"),_.innerHTML=ya,nt=e(),M(Q.$$.fragment),pt=e(),Y=p("p"),Y.innerHTML=ca,it=e(),V=p("p"),V.textContent=da,ot=e(),M(E.$$.fragment),Mt=e(),H=p("p"),H.innerHTML=ua,yt=e(),M(N.$$.fragment),ct=e(),M(D.$$.fragment),dt=e(),F=p("p"),F.innerHTML=ra,ut=e(),M(X.$$.fragment),rt=e(),z=p("p"),z.innerHTML=ma,mt=e(),M(P.$$.fragment),ht=e(),M(O.$$.fragment),jt=e(),K=p("p"),K.innerHTML=ha,Jt=e(),M(ss.$$.fragment),Tt=e(),ts=p("p"),ts.innerHTML=ja,wt=e(),M(as.$$.fragment),ft=e(),M(ls.$$.fragment),Ut=e(),es=p("p"),es.innerHTML=Ja,gt=e(),ns=p("p"),ns.innerHTML=Ta,bt=e(),M(ps.$$.fragment),It=e(),M(is.$$.fragment),Ct=e(),os=p("p"),os.innerHTML=wa,Zt=e(),M(Ms.$$.fragment),Gt=e(),ys=p("p"),ys.textContent=fa,$t=e(),M(cs.$$.fragment),qt=e(),ds=p("p"),ds.textContent=Ua,kt=e(),us=p("p"),us.textContent=ga,Lt=e(),M(rs.$$.fragment),vt=e(),ms=p("p"),ms.textContent=ba,Bt=e(),M(hs.$$.fragment),St=e(),js=p("p"),js.innerHTML=Ia,xt=e(),M(Js.$$.fragment),At=e(),Ts=p("p"),Ts.innerHTML=Ca,Wt=e(),ws=p("p"),ws.innerHTML=Za,Rt=e(),fs=p("p"),fs.textContent=Ga,_t=e(),M(Us.$$.fragment),Qt=e(),gs=p("p"),gs.innerHTML=$a,Yt=e(),M(bs.$$.fragment),Vt=e(),Is=p("p"),Is.innerHTML=qa,Et=e(),M(Cs.$$.fragment),Ht=e(),Zs=p("p"),Zs.innerHTML=ka,Nt=e(),Gs=p("p"),Gs.innerHTML=La,Dt=e(),M($s.$$.fragment),Ft=e(),Ls=p("p"),this.h()},l(s){const t=Ra("svelte-u9bgzb",document.head);h=i(t,"META",{name:!0,content:!0}),t.forEach(a),Bs=n(s),ks=i(s,"P",{}),va(ks).forEach(a),Ss=n(s),y(w.$$.fragment,s),xs=n(s),y(f.$$.fragment,s),As=n(s),U=i(s,"P",{"data-svelte-h":!0}),o(U)!=="svelte-wdlz34"&&(U.textContent=Pt),Ws=n(s),g=i(s,"UL",{"data-svelte-h":!0}),o(g)!=="svelte-9696au"&&(g.innerHTML=Ot),Rs=n(s),j=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(j)!=="svelte-diqosn"&&(j.innerHTML=Kt),_s=n(s),y(b.$$.fragment,s),Qs=n(s),I=i(s,"P",{"data-svelte-h":!0}),o(I)!=="svelte-wsqlh"&&(I.innerHTML=sa),Ys=n(s),J=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(J)!=="svelte-bwfs6b"&&(J.innerHTML=ta),Vs=n(s),C=i(s,"P",{"data-svelte-h":!0}),o(C)!=="svelte-14au70e"&&(C.innerHTML=aa),Es=n(s),y(Z.$$.fragment,s),Hs=n(s),G=i(s,"P",{"data-svelte-h":!0}),o(G)!=="svelte-1b2etui"&&(G.innerHTML=la),Ns=n(s),y($.$$.fragment,s),Ds=n(s),q=i(s,"P",{"data-svelte-h":!0}),o(q)!=="svelte-1i01wgw"&&(q.innerHTML=ea),Fs=n(s),y(k.$$.fragment,s),Xs=n(s),L=i(s,"P",{"data-svelte-h":!0}),o(L)!=="svelte-16167ku"&&(L.innerHTML=na),zs=n(s),y(v.$$.fragment,s),Ps=n(s),T=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),o(T)!=="svelte-ewt4b0"&&(T.innerHTML=pa),Os=n(s),B=i(s,"P",{"data-svelte-h":!0}),o(B)!=="svelte-69vkpo"&&(B.innerHTML=ia),Ks=n(s),y(S.$$.fragment,s),st=n(s),x=i(s,"P",{"data-svelte-h":!0}),o(x)!=="svelte-141fb16"&&(x.textContent=oa),tt=n(s),y(A.$$.fragment,s),at=n(s),W=i(s,"P",{"data-svelte-h":!0}),o(W)!=="svelte-r2z4wq"&&(W.innerHTML=Ma),lt=n(s),y(R.$$.fragment,s),et=n(s),_=i(s,"P",{"data-svelte-h":!0}),o(_)!=="svelte-16ywdcf"&&(_.innerHTML=ya),nt=n(s),y(Q.$$.fragment,s),pt=n(s),Y=i(s,"P",{"data-svelte-h":!0}),o(Y)!=="svelte-z70awt"&&(Y.innerHTML=ca),it=n(s),V=i(s,"P",{"data-svelte-h":!0}),o(V)!=="svelte-1dtduh9"&&(V.textContent=da),ot=n(s),y(E.$$.fragment,s),Mt=n(s),H=i(s,"P",{"data-svelte-h":!0}),o(H)!=="svelte-7gw7jc"&&(H.innerHTML=ua),yt=n(s),y(N.$$.fragment,s),ct=n(s),y(D.$$.fragment,s),dt=n(s),F=i(s,"P",{"data-svelte-h":!0}),o(F)!=="svelte-1dhft8d"&&(F.innerHTML=ra),ut=n(s),y(X.$$.fragment,s),rt=n(s),z=i(s,"P",{"data-svelte-h":!0}),o(z)!=="svelte-16eg7ay"&&(z.innerHTML=ma),mt=n(s),y(P.$$.fragment,s),ht=n(s),y(O.$$.fragment,s),jt=n(s),K=i(s,"P",{"data-svelte-h":!0}),o(K)!=="svelte-12voze"&&(K.innerHTML=ha),Jt=n(s),y(ss.$$.fragment,s),Tt=n(s),ts=i(s,"P",{"data-svelte-h":!0}),o(ts)!=="svelte-bvqyp7"&&(ts.innerHTML=ja),wt=n(s),y(as.$$.fragment,s),ft=n(s),y(ls.$$.fragment,s),Ut=n(s),es=i(s,"P",{"data-svelte-h":!0}),o(es)!=="svelte-15z6k9y"&&(es.innerHTML=Ja),gt=n(s),ns=i(s,"P",{"data-svelte-h":!0}),o(ns)!=="svelte-1qlvso2"&&(ns.innerHTML=Ta),bt=n(s),y(ps.$$.fragment,s),It=n(s),y(is.$$.fragment,s),Ct=n(s),os=i(s,"P",{"data-svelte-h":!0}),o(os)!=="svelte-5q4zkd"&&(os.innerHTML=wa),Zt=n(s),y(Ms.$$.fragment,s),Gt=n(s),ys=i(s,"P",{"data-svelte-h":!0}),o(ys)!=="svelte-69nbou"&&(ys.textContent=fa),$t=n(s),y(cs.$$.fragment,s),qt=n(s),ds=i(s,"P",{"data-svelte-h":!0}),o(ds)!=="svelte-17ij2da"&&(ds.textContent=Ua),kt=n(s),us=i(s,"P",{"data-svelte-h":!0}),o(us)!=="svelte-qj3os9"&&(us.textContent=ga),Lt=n(s),y(rs.$$.fragment,s),vt=n(s),ms=i(s,"P",{"data-svelte-h":!0}),o(ms)!=="svelte-5bh63m"&&(ms.textContent=ba),Bt=n(s),y(hs.$$.fragment,s),St=n(s),js=i(s,"P",{"data-svelte-h":!0}),o(js)!=="svelte-12q6x7f"&&(js.innerHTML=Ia),xt=n(s),y(Js.$$.fragment,s),At=n(s),Ts=i(s,"P",{"data-svelte-h":!0}),o(Ts)!=="svelte-1dxqxjd"&&(Ts.innerHTML=Ca),Wt=n(s),ws=i(s,"P",{"data-svelte-h":!0}),o(ws)!=="svelte-g233yy"&&(ws.innerHTML=Za),Rt=n(s),fs=i(s,"P",{"data-svelte-h":!0}),o(fs)!=="svelte-1plp7iv"&&(fs.textContent=Ga),_t=n(s),y(Us.$$.fragment,s),Qt=n(s),gs=i(s,"P",{"data-svelte-h":!0}),o(gs)!=="svelte-1shj45d"&&(gs.innerHTML=$a),Yt=n(s),y(bs.$$.fragment,s),Vt=n(s),Is=i(s,"P",{"data-svelte-h":!0}),o(Is)!=="svelte-1vmvd2j"&&(Is.innerHTML=qa),Et=n(s),y(Cs.$$.fragment,s),Ht=n(s),Zs=i(s,"P",{"data-svelte-h":!0}),o(Zs)!=="svelte-130lexo"&&(Zs.innerHTML=ka),Nt=n(s),Gs=i(s,"P",{"data-svelte-h":!0}),o(Gs)!=="svelte-1vpwwpu"&&(Gs.innerHTML=La),Dt=n(s),y($s.$$.fragment,s),Ft=n(s),Ls=i(s,"P",{}),va(Ls).forEach(a),this.h()},h(){vs(h,"name","hf:doc:metadata"),vs(h,"content",Ea),vs(j,"class","tip"),vs(J,"class","tip"),vs(T,"class","warning")},m(s,t){_a(document.head,h),l(s,Bs,t),l(s,ks,t),l(s,Ss,t),c(w,s,t),l(s,xs,t),c(f,s,t),l(s,As,t),l(s,U,t),l(s,Ws,t),l(s,g,t),l(s,Rs,t),l(s,j,t),l(s,_s,t),c(b,s,t),l(s,Qs,t),l(s,I,t),l(s,Ys,t),l(s,J,t),l(s,Vs,t),l(s,C,t),l(s,Es,t),c(Z,s,t),l(s,Hs,t),l(s,G,t),l(s,Ns,t),c($,s,t),l(s,Ds,t),l(s,q,t),l(s,Fs,t),c(k,s,t),l(s,Xs,t),l(s,L,t),l(s,zs,t),c(v,s,t),l(s,Ps,t),l(s,T,t),l(s,Os,t),l(s,B,t),l(s,Ks,t),c(S,s,t),l(s,st,t),l(s,x,t),l(s,tt,t),c(A,s,t),l(s,at,t),l(s,W,t),l(s,lt,t),c(R,s,t),l(s,et,t),l(s,_,t),l(s,nt,t),c(Q,s,t),l(s,pt,t),l(s,Y,t),l(s,it,t),l(s,V,t),l(s,ot,t),c(E,s,t),l(s,Mt,t),l(s,H,t),l(s,yt,t),c(N,s,t),l(s,ct,t),c(D,s,t),l(s,dt,t),l(s,F,t),l(s,ut,t),c(X,s,t),l(s,rt,t),l(s,z,t),l(s,mt,t),c(P,s,t),l(s,ht,t),c(O,s,t),l(s,jt,t),l(s,K,t),l(s,Jt,t),c(ss,s,t),l(s,Tt,t),l(s,ts,t),l(s,wt,t),c(as,s,t),l(s,ft,t),c(ls,s,t),l(s,Ut,t),l(s,es,t),l(s,gt,t),l(s,ns,t),l(s,bt,t),c(ps,s,t),l(s,It,t),c(is,s,t),l(s,Ct,t),l(s,os,t),l(s,Zt,t),c(Ms,s,t),l(s,Gt,t),l(s,ys,t),l(s,$t,t),c(cs,s,t),l(s,qt,t),l(s,ds,t),l(s,kt,t),l(s,us,t),l(s,Lt,t),c(rs,s,t),l(s,vt,t),l(s,ms,t),l(s,Bt,t),c(hs,s,t),l(s,St,t),l(s,js,t),l(s,xt,t),c(Js,s,t),l(s,At,t),l(s,Ts,t),l(s,Wt,t),l(s,ws,t),l(s,Rt,t),l(s,fs,t),l(s,_t,t),c(Us,s,t),l(s,Qt,t),l(s,gs,t),l(s,Yt,t),c(bs,s,t),l(s,Vt,t),l(s,Is,t),l(s,Et,t),c(Cs,s,t),l(s,Ht,t),l(s,Zs,t),l(s,Nt,t),l(s,Gs,t),l(s,Dt,t),c($s,s,t),l(s,Ft,t),l(s,Ls,t),Xt=!0},p:Sa,i(s){Xt||(d(w.$$.fragment,s),d(f.$$.fragment,s),d(b.$$.fragment,s),d(Z.$$.fragment,s),d($.$$.fragment,s),d(k.$$.fragment,s),d(v.$$.fragment,s),d(S.$$.fragment,s),d(A.$$.fragment,s),d(R.$$.fragment,s),d(Q.$$.fragment,s),d(E.$$.fragment,s),d(N.$$.fragment,s),d(D.$$.fragment,s),d(X.$$.fragment,s),d(P.$$.fragment,s),d(O.$$.fragment,s),d(ss.$$.fragment,s),d(as.$$.fragment,s),d(ls.$$.fragment,s),d(ps.$$.fragment,s),d(is.$$.fragment,s),d(Ms.$$.fragment,s),d(cs.$$.fragment,s),d(rs.$$.fragment,s),d(hs.$$.fragment,s),d(Js.$$.fragment,s),d(Us.$$.fragment,s),d(bs.$$.fragment,s),d(Cs.$$.fragment,s),d($s.$$.fragment,s),Xt=!0)},o(s){u(w.$$.fragment,s),u(f.$$.fragment,s),u(b.$$.fragment,s),u(Z.$$.fragment,s),u($.$$.fragment,s),u(k.$$.fragment,s),u(v.$$.fragment,s),u(S.$$.fragment,s),u(A.$$.fragment,s),u(R.$$.fragment,s),u(Q.$$.fragment,s),u(E.$$.fragment,s),u(N.$$.fragment,s),u(D.$$.fragment,s),u(X.$$.fragment,s),u(P.$$.fragment,s),u(O.$$.fragment,s),u(ss.$$.fragment,s),u(as.$$.fragment,s),u(ls.$$.fragment,s),u(ps.$$.fragment,s),u(is.$$.fragment,s),u(Ms.$$.fragment,s),u(cs.$$.fragment,s),u(rs.$$.fragment,s),u(hs.$$.fragment,s),u(Js.$$.fragment,s),u(Us.$$.fragment,s),u(bs.$$.fragment,s),u(Cs.$$.fragment,s),u($s.$$.fragment,s),Xt=!1},d(s){s&&(a(Bs),a(ks),a(Ss),a(xs),a(As),a(U),a(Ws),a(g),a(Rs),a(j),a(_s),a(Qs),a(I),a(Ys),a(J),a(Vs),a(C),a(Es),a(Hs),a(G),a(Ns),a(Ds),a(q),a(Fs),a(Xs),a(L),a(zs),a(Ps),a(T),a(Os),a(B),a(Ks),a(st),a(x),a(tt),a(at),a(W),a(lt),a(et),a(_),a(nt),a(pt),a(Y),a(it),a(V),a(ot),a(Mt),a(H),a(yt),a(ct),a(dt),a(F),a(ut),a(rt),a(z),a(mt),a(ht),a(jt),a(K),a(Jt),a(Tt),a(ts),a(wt),a(ft),a(Ut),a(es),a(gt),a(ns),a(bt),a(It),a(Ct),a(os),a(Zt),a(Gt),a(ys),a($t),a(qt),a(ds),a(kt),a(us),a(Lt),a(vt),a(ms),a(Bt),a(St),a(js),a(xt),a(At),a(Ts),a(Wt),a(ws),a(Rt),a(fs),a(_t),a(Qt),a(gs),a(Yt),a(Vt),a(Is),a(Et),a(Ht),a(Zs),a(Nt),a(Gs),a(Dt),a(Ft),a(Ls)),a(h),r(w,s),r(f,s),r(b,s),r(Z,s),r($,s),r(k,s),r(v,s),r(S,s),r(A,s),r(R,s),r(Q,s),r(E,s),r(N,s),r(D,s),r(X,s),r(P,s),r(O,s),r(ss,s),r(as,s),r(ls,s),r(ps,s),r(is,s),r(Ms,s),r(cs,s),r(rs,s),r(hs,s),r(Js,s),r(Us,s),r(bs,s),r(Cs,s),r($s,s)}}}const Ea='{"title":"Create an image dataset","local":"create-an-image-dataset","sections":[{"title":"ImageFolder","local":"imagefolder","sections":[{"title":"Image captioning","local":"image-captioning","sections":[],"depth":3},{"title":"Object detection","local":"object-detection","sections":[],"depth":3},{"title":"Upload dataset to the Hub","local":"upload-dataset-to-the-hub","sections":[],"depth":3}],"depth":2},{"title":"WebDataset","local":"webdataset","sections":[],"depth":2},{"title":"Lance","local":"lance","sections":[],"depth":2}],"depth":1}';function Ha(zt){return xa(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class za extends Aa{constructor(h){super(),Wa(this,h,Ha,Va,Ba,{})}}export{za as component};

Xet Storage Details

Size:
46.1 kB
·
Xet hash:
837f3b24f90c81f62b07bf4b4d8e73f8dbf32cd882d8a355f8541d409dd1d70c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.