Buckets:

rtrm's picture
download
raw
43 kB
import{s as Ie,n as Ue,o as be}from"../chunks/scheduler.d75c11ed.js";import{S as Le,i as Se,e as i,s as a,c as M,h as ge,a as o,d as e,b as n,f as fe,g as d,j as p,k as St,l as Ae,m as s,n as J,t as T,o as r,p as c}from"../chunks/index.4ec9dfe9.js";import{C as ve,H as Lt,E as Ze}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ee0f129e.js";import{C as j}from"../chunks/CodeBlock.5919a092.js";function Ce(Rl){let u,vt,gt,Zt,f,Ct,I,$t,U,Yl="This guide will show you how to create a video dataset with <code>VideoFolder</code> and some metadata. This is a no-code solution for quickly creating a video dataset with several thousand videos.",Gt,w,xl='<p>You can control access to your dataset by requiring users to share their contact information first. Check out the <a href="https://huggingface.co/docs/hub/datasets-gated" rel="nofollow">Gated datasets</a> guide for more information about how to enable this feature on the Hub.</p>',Bt,b,kt,L,Nl="The <code>VideoFolder</code> is a dataset builder designed to quickly load a video dataset with several thousand videos without requiring you to write any code.",qt,y,Xl='<p>💡 Take a look at the <a href="repository_structure#split-pattern-hierarchy">Split pattern hierarchy</a> to learn more about how <code>VideoFolder</code> creates dataset splits based on your dataset repository structure.</p>',_t,S,El="<code>VideoFolder</code> automatically infers the class labels of your dataset based on the directory name. Store your dataset in a directory structure like:",Wt,g,Qt,A,Hl='If the dataset follows the <code>VideoFolder</code> structure, then you can load it directly with <a href="/docs/datasets/pr_8021/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',Rt,v,Yt,Z,Vl='This is equivalent to passing <code>videofolder</code> manually in <a href="/docs/datasets/pr_8021/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a> and the directory in <code>data_dir</code>:',xt,C,Nt,$,Dl="You can also use <code>videofolder</code> to load datasets involving multiple splits. To do so, your dataset directory should have the following structure:",Xt,G,Et,m,Fl="<p>If all video files are contained in a single directory or if they are not on the same level of directory structure, <code>label</code> column won’t be added automatically. If you need it, set <code>drop_labels=False</code> explicitly.</p>",Ht,B,zl="If there is additional information you’d like to include about your dataset, like text captions or bounding boxes, add it as a <code>metadata.csv</code> file in your folder. This lets you quickly create datasets for different computer vision tasks like text captioning or object detection. You can also use a JSONL file <code>metadata.jsonl</code> or a Parquet file <code>metadata.parquet</code>.",Vt,k,Dt,q,Pl="Your <code>metadata.csv</code> file must have a <code>file_name</code> or <code>*_file_name</code> field which links video files with their metadata:",Ft,_,zt,W,Ol="or using <code>metadata.jsonl</code>:",Pt,Q,Ot,R,Kl="Here the <code>file_name</code> must be the name of the video file next to the metadata file. More generally, it must be the relative path from the directory containing the metadata to the video file.",Kt,Y,te="It’s possible to point to more than one video in each row in your dataset, for example if both your input and output are videos:",tl,x,ll,N,le="You can also define lists of videos. In that case you need to name the field <code>file_names</code> or <code>*_file_names</code>. Here is an example:",el,X,sl,E,al,H,ee="Video captioning datasets have text describing a video. An example <code>metadata.csv</code> may look like:",nl,V,il,D,se="Load the dataset with <code>VideoFolder</code>, and it will create a <code>text</code> column for the video captions:",ol,F,pl,z,Ml,P,ae='Once you’ve created a dataset, you can share it to the using <code>huggingface_hub</code> for example. Make sure you have the <a href="https://huggingface.co/docs/huggingface_hub/index" rel="nofollow">huggingface_hub</a> library installed and you’re logged in to your Hugging Face account (see the <a href="upload_dataset#upload-with-python">Upload with Python tutorial</a> for more details).',dl,O,ne="Upload your dataset with <code>huggingface_hub.HfApi.upload_folder</code>:",Jl,K,Tl,tt,rl,lt,ie=`The <a href="https://github.com/webdataset/webdataset" rel="nofollow">WebDataset</a> format is based on TAR archives and is suitable for big video datasets.
Indeed you can group your videos in TAR archives (e.g. 1GB of videos per TAR archive) and have thousands of TAR archives:`,cl,et,jl,st,oe="In the archives, each example is made of files sharing the same prefix:",ul,at,wl,nt,pe="You can put your videos labels/captions/features using JSON or text files for example.",yl,it,Me='For more details on the WebDataset format and the python library, please check the <a href="https://webdataset.github.io/webdataset" rel="nofollow">WebDataset documentation</a>.',ml,ot,de="Load your WebDataset and it will create on column per file suffix (here “mp4” and “json”):",hl,pt,fl,Mt,Il,dt,Je=`<a href="https://lance.org" rel="nofollow">Lance</a> is an open multimodal lakehouse table format. Lance tables can natively store not only text and scalar values,
but also large binary objects (blobs) such as images, audio, and video alongside your tabular data.`,Ul,Jt,Te=`Lance provides a <a href="https://lance.org/guide/blob/" rel="nofollow">blob API</a> that makes it convenient to store and retrieve large blobs in Lance datasets.
The following example shows how to efficiently browse metadata without loading the heavier video blobs, then fetch the relevant video
blobs on demand.`,bl,Tt,re="Here’s a representative view of what a Lance table storing videos might look like (the <code>video_blob</code> column uses Lance’s blob encoding):",Ll,rt,Sl,ct,gl,jt,ce=`Starting from raw video files on disk plus associated metadata (for example, captions and scores), you can write a self-contained Lance dataset
to a local <code>*.lance</code> directory (a Lance dataset is a directory on disk, and it’s common to name it with a <code>.lance</code> suffix):`,Al,ut,vl,wt,je=`This stores your metadata and video bytes together inside <code>videos.lance/</code>, so you can move/copy a single directory without having to keep
separate <code>*.mp4</code> files in sync.`,Zl,yt,ue=`Here’s a representative view of what a Lance table storing videos might look like (the <code>video_blob</code> column contains data that’s
stored natively as blobs inside the Lance dataset):`,Cl,mt,$l,ht,we=`You can upload the resulting <code>videos.lance/</code> directory to the Hub (for example with <code>huggingface_hub.HfApi.upload_folder</code>) and share it as a
dataset repository, keeping the metadata and videos together as a single artifact.`,Gl,h,ye=`<p>Lance datasets scale to very large sizes (terabytes and beyond) since the data is stored in a columnar format on disk.
See the <a href="https://lance.org/guide/blob/" rel="nofollow">blob API</a> guide for the latest information on best practices for storing and retrieving
large blobs in Lance.</p>`,Bl,ft,me=`When writing large datasets, it’s typically best to limit the size of each individual <code>*.lance</code> file to a few gigabytest at most.
Simply gather the data via an iterator and specify the <code>max_bytes_per_file</code> parameter when writing the dataset:`,kl,It,ql,Ut,he='For more details on working with Lance datasets, see the <a href="https://lance.org" rel="nofollow">Lance documentation</a>.',_l,bt,Wl,At,Ql;return f=new ve({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),I=new Lt({props:{title:"Create a video dataset",local:"create-a-video-dataset",headingTag:"h1"}}),b=new Lt({props:{title:"VideoFolder",local:"videofolder",headingTag:"h2"}}),g=new j({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGY2hpaHVhaHVhLm1wNCUwQSUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5tcDQlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJlbmdhbC5tcDQlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJpcm1hbi5tcDQ=",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.mp4
folder<span class="hljs-regexp">/train/</span>dog/german_shepherd.mp4
folder<span class="hljs-regexp">/train/</span>dog/chihuahua.mp4
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.mp4
folder<span class="hljs-regexp">/train/</span>cat/bengal.mp4
folder<span class="hljs-regexp">/train/</span>cat/birman.mp4`,wrap:!1}}),v=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJwYXRoJTJGdG8lMkZmb2xkZXIlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;path/to/folder&quot;</span>)`,wrap:!1}}),C=new j({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ2aWRlb2ZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;videofolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>)',wrap:!1}}),G=new j({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5tcDQlMEFmb2xkZXIlMkZ0ZXN0JTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLm1wNCUwQWZvbGRlciUyRnRlc3QlMkZjYXQlMkZiZW5nYWwubXA0",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.mp4
folder<span class="hljs-regexp">/train/</span>cat/maine_coon.mp4
folder<span class="hljs-regexp">/test/</span>dog/german_shepherd.mp4
folder<span class="hljs-regexp">/test/</span>cat/bengal.mp4`,wrap:!1}}),k=new j({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZtZXRhZGF0YS5jc3YlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDEubXA0JTBBZm9sZGVyJTJGdHJhaW4lMkYwMDAyLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGMDAwMy5tcDQ=",highlighted:`folder<span class="hljs-regexp">/train/m</span>etadata.csv
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0001</span>.mp4
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0002</span>.mp4
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0003</span>.mp4`,wrap:!1}}),_=new j({props:{code:"ZmlsZV9uYW1lJTJDYWRkaXRpb25hbF9mZWF0dXJlJTBBMDAwMS5tcDQlMkNUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjB2aWRlb3MlMEEwMDAyLm1wNCUyQ1RoaXMlMjBpcyUyMGElMjBzZWNvbmQlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjB2aWRlb3MlMEEwMDAzLm1wNCUyQ1RoaXMlMjBpcyUyMGElMjB0aGlyZCUyMHZhbHVlJTIwb2YlMjBhJTIwdGV4dCUyMGZlYXR1cmUlMjB5b3UlMjBhZGRlZCUyMHRvJTIweW91ciUyMHZpZGVvcw==",highlighted:`file_name,additional_feature
<span class="hljs-number">0001.</span>mp4,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">first</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your videos
<span class="hljs-number">0002.</span>mp4,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">second</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your videos
<span class="hljs-number">0003.</span>mp4,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">third</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your videos`,wrap:!1}}),Q=new j({props:{code:"JTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5tcDQlMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjB2aWRlb3MlMjIlN0QlMEElN0IlMjJmaWxlX25hbWUlMjIlM0ElMjAlMjIwMDAyLm1wNCUyMiUyQyUyMCUyMmFkZGl0aW9uYWxfZmVhdHVyZSUyMiUzQSUyMCUyMlRoaXMlMjBpcyUyMGElMjBzZWNvbmQlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjB2aWRlb3MlMjIlN0QlMEElN0IlMjJmaWxlX25hbWUlMjIlM0ElMjAlMjIwMDAzLm1wNCUyMiUyQyUyMCUyMmFkZGl0aW9uYWxfZmVhdHVyZSUyMiUzQSUyMCUyMlRoaXMlMjBpcyUyMGElMjB0aGlyZCUyMHZhbHVlJTIwb2YlMjBhJTIwdGV4dCUyMGZlYXR1cmUlMjB5b3UlMjBhZGRlZCUyMHRvJTIweW91ciUyMHZpZGVvcyUyMiU3RA==",highlighted:`{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0001.mp4&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a first value of a text feature you added to your videos&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0002.mp4&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a second value of a text feature you added to your videos&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0003.mp4&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a third value of a text feature you added to your videos&quot;</span>}`,wrap:!1}}),x=new j({props:{code:"JTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5tcDQlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMV9vdXRwdXQubXA0JTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5tcDQlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMl9vdXRwdXQubXA0JTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMy5tcDQlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwM19vdXRwdXQubXA0JTIyJTdE",highlighted:`<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001.mp4&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001_output.mp4&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002.mp4&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002_output.mp4&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003.mp4&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003_output.mp4&quot;</span><span class="hljs-punctuation">}</span>`,wrap:!1}}),X=new j({props:{code:"JTdCJTIydmlkZW9zX2ZpbGVfbmFtZXMlMjIlM0ElMjAlNUIlMjIwMDAxX2xlZnQubXA0JTIyJTJDJTIwJTIyMDAwMV9yaWdodC5tcDQlMjIlNUQlMkMlMjAlMjJsYWJlbCUyMiUzQSUyMCUyMm1vdmluZ191cCUyMiU3RCUwQSU3QiUyMnZpZGVvc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwMl9sZWZ0Lm1wNCUyMiUyQyUyMCUyMjAwMDJfcmlnaHQubXA0JTIyJTVEJTJDJTIwJTIybGFiZWwlMjIlM0ElMjAlMjJtb3ZpbmdfZG93biUyMiU3RCUwQSU3QiUyMnZpZGVvc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwM19sZWZ0Lm1wNCUyMiUyQyUyMCUyMjAwMDNfcmlnaHQubXA0JTIyJTVEJTJDJTIwJTIybGFiZWwlMjIlM0ElMjAlMjJtb3ZpbmdfcmlnaHQlMjIlN0Q=",highlighted:`{<span class="hljs-string">&quot;videos_file_names&quot;</span>: [<span class="hljs-string">&quot;0001_left.mp4&quot;</span>, <span class="hljs-string">&quot;0001_right.mp4&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;moving_up&quot;</span>}
{<span class="hljs-string">&quot;videos_file_names&quot;</span>: [<span class="hljs-string">&quot;0002_left.mp4&quot;</span>, <span class="hljs-string">&quot;0002_right.mp4&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;moving_down&quot;</span>}
{<span class="hljs-string">&quot;videos_file_names&quot;</span>: [<span class="hljs-string">&quot;0003_left.mp4&quot;</span>, <span class="hljs-string">&quot;0003_right.mp4&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;moving_right&quot;</span>}`,wrap:!1}}),E=new Lt({props:{title:"Video captioning",local:"video-captioning",headingTag:"h3"}}),V=new j({props:{code:"ZmlsZV9uYW1lJTJDdGV4dCUwQTAwMDEubXA0JTJDVGhpcyUyMGlzJTIwYSUyMGdvbGRlbiUyMHJldHJpZXZlciUyMHBsYXlpbmclMjB3aXRoJTIwYSUyMGJhbGwlMEEwMDAyLm1wNCUyQ0ElMjBnZXJtYW4lMjBzaGVwaGVyZCUwQTAwMDMubXA0JTJDT25lJTIwY2hpaHVhaHVh",highlighted:`file_name,text
<span class="hljs-number">0001</span><span class="hljs-selector-class">.mp4</span>,This is <span class="hljs-selector-tag">a</span> golden retriever playing with <span class="hljs-selector-tag">a</span> ball
<span class="hljs-number">0002</span><span class="hljs-selector-class">.mp4</span>,A german shepherd
<span class="hljs-number">0003</span><span class="hljs-selector-class">.mp4</span>,One chihuahua`,wrap:!1}}),F=new j({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ2aWRlb2ZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMnRleHQlMjIlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;videofolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;text&quot;</span>]
<span class="hljs-string">&quot;This is a golden retriever playing with a ball&quot;</span>`,wrap:!1}}),z=new Lt({props:{title:"Upload dataset to the Hub",local:"upload-dataset-to-the-hub",headingTag:"h3"}}),K=new j({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEhmQXBpJTBBYXBpJTIwJTNEJTIwSGZBcGkoKSUwQSUwQWFwaS51cGxvYWRfZm9sZGVyKCUwQSUyMCUyMCUyMCUyMGZvbGRlcl9wYXRoJTNEJTIyJTJGcGF0aCUyRnRvJTJGbG9jYWwlMkZkYXRhc2V0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmVwb19pZCUzRCUyMnVzZXJuYW1lJTJGbXktY29vbC1kYXRhc2V0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmVwb190eXBlJTNEJTIyZGF0YXNldCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> HfApi
api = HfApi()
api.upload_folder(
folder_path=<span class="hljs-string">&quot;/path/to/local/dataset&quot;</span>,
repo_id=<span class="hljs-string">&quot;username/my-cool-dataset&quot;</span>,
repo_type=<span class="hljs-string">&quot;dataset&quot;</span>,
)`,wrap:!1}}),tt=new Lt({props:{title:"WebDataset",local:"webdataset",headingTag:"h2"}}),et=new j({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkYwMDAwMC50YXIlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDAxLnRhciUwQWZvbGRlciUyRnRyYWluJTJGMDAwMDIudGFyJTBBLi4u",highlighted:`folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00000</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00001</span>.tar
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">00002</span>.tar
...`,wrap:!1}}),at=new j({props:{code:"ZTM5ODcxZmQ5ZmQ3NGY1NS5tcDQlMEFlMzk4NzFmZDlmZDc0ZjU1Lmpzb24lMEFmMThiOTE1ODVjNGQzZjNlLm1wNCUwQWYxOGI5MTU4NWM0ZDNmM2UuanNvbiUwQWVkZTZlNjZiMmZiNTlhYWIubXA0JTBBZWRlNmU2NmIyZmI1OWFhYi5qc29uJTBBZWQ2MDBkNTdmY2VlNGY5NC5tcDQlMEFlZDYwMGQ1N2ZjZWU0Zjk0Lmpzb24lMEEuLi4=",highlighted:`e39871fd9fd74f55<span class="hljs-selector-class">.mp4</span>
e39871fd9fd74f55<span class="hljs-selector-class">.json</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.mp4</span>
f18b91585c4d3f3e<span class="hljs-selector-class">.json</span>
ede6e66b2fb59aab<span class="hljs-selector-class">.mp4</span>
ede6e66b2fb59aab<span class="hljs-selector-class">.json</span>
ed600d57fcee4f94<span class="hljs-selector-class">.mp4</span>
ed600d57fcee4f94<span class="hljs-selector-class">.json</span>
...`,wrap:!1}}),pt=new j({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ3ZWJkYXRhc2V0JTIyJTJDJTIwZGF0YV9kaXIlM0QlMjIlMkZwYXRoJTJGdG8lMkZmb2xkZXIlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQWRhdGFzZXQlNUIwJTVEJTVCJTIyanNvbiUyMiU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;webdataset&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;json&quot;</span>]
{<span class="hljs-string">&quot;bbox&quot;</span>: [[<span class="hljs-number">302.0</span>, <span class="hljs-number">109.0</span>, <span class="hljs-number">73.0</span>, <span class="hljs-number">52.0</span>]], <span class="hljs-string">&quot;categories&quot;</span>: [<span class="hljs-number">0</span>]}`,wrap:!1}}),Mt=new Lt({props:{title:"Lance",local:"lance",headingTag:"h2"}}),rt=new j({props:{code:"JTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMGNhcHRpb24lMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMjBhZXN0aGV0aWNfc2NvcmUlMjAlN0MlMjAuLi4lMjAlN0MlMjB2aWRlb19ibG9iJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTBBJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMCUyMmElMjBicmVhdGh0YWtpbmclMjB2aWV3JTIwb2YlMjBhJTIwbW91bnRhLi4uJTIyJTIwJTIwJTIwJTIwJTIwJTdDJTIwNS4yNDAxJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTIwLi4uJTIwJTdDJTIwJTdCcG9zaXRpb24lM0ElMjAwJTJDJTIwc2l6ZSUzQSUyMDQ4NzM4NzklN0QlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMEElN0MlMjAlMjJhJTIwY2FwdGl2YXRpbmclMjB2aWV3JTIwb2YlMjB0aGUlMjBzdW4lMkMlMjBiLi4uJTIyJTIwJTIwJTIwJTIwJTdDJTIwNS4yNDAxJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTIwLi4uJTIwJTdDJTIwJTdCcG9zaXRpb24lM0ElMjA0ODczOTIwJTJDJTIwc2l6ZSUzQSUyMDMzNzA1NzElN0QlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMEElMkItLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLS0tLS0tLS0tLS0tLSUyQi0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJC",highlighted:`+------------------------------------------+-----------------+-----+------------------------------------------+
| caption | aesthetic_score | ... | video_blob |
+------------------------------------------+-----------------+-----+------------------------------------------+
| &quot;a breathtaking view of a mounta...&quot; | 5.2401 | ... | {position: 0, size: 4873879} |
| &quot;a captivating view of the sun, b...&quot; | 5.2401 | ... | {position: 4873920, size: 3370571} |
+------------------------------------------+-----------------+-----+------------------------------------------+`,wrap:!1}}),ct=new Lt({props:{title:"Write a Lance dataset from raw video files",local:"write-a-lance-dataset-from-raw-video-files",headingTag:"h3"}}),ut=new j({props:{code:"aW1wb3J0JTIwbGFuY2UlMEFpbXBvcnQlMjBweWFycm93JTIwYXMlMjBwYSUwQSUwQWltcG9ydCUyMHVybGxpYi5yZXF1ZXN0JTBBJTBBc2NoZW1hJTIwJTNEJTIwcGEuc2NoZW1hKCUwQSUyMCUyMCUyMCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhLmZpZWxkKCUyMmNhcHRpb24lMjIlMkMlMjBwYS51dGY4KCkpJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGEuZmllbGQoJTIyYWVzdGhldGljX3Njb3JlJTIyJTJDJTIwcGEuZmxvYXQ2NCgpKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhLmZpZWxkKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnZpZGVvX2Jsb2IlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwYS5sYXJnZV9iaW5hcnkoKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1ldGFkYXRhJTNEJTdCJTIybGFuY2UtZW5jb2RpbmclM0FibG9iJTIyJTNBJTIwJTIydHJ1ZSUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCklMkMlMEElMjAlMjAlMjAlMjAlNUQlMEEpJTBBJTBBJTIzJTIwUHJvdmlkZSUyMHZpZGVvJTIwZmlsZXMlMjBhbG9uZ3NpZGUlMjBtZXRhZGF0YSUwQXJvd3MlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ2aWRlb19wYXRoJTIyJTNBJTIwJTIyJTJGcGF0aCUyRnRvJTJGdmlkZW9zJTJGMDAwMS5tcDQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJjYXB0aW9uJTIyJTNBJTIwJTIyYSUyMGJyZWF0aHRha2luZyUyMHZpZXclMjBvZiUyMGElMjBtb3VudGFpbm91cyUyMGxhbmRzY2FwZSUyMC4uLiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmFlc3RoZXRpY19zY29yZSUyMiUzQSUyMDUuMjQwMTM4MDUzODk0MDQzJTJDJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydmlkZW9fcGF0aCUyMiUzQSUyMCUyMjAwMDIubXA0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY2FwdGlvbiUyMiUzQSUyMCUyMmElMjBjYXB0aXZhdGluZyUyMHZpZXclMjBvZiUyMHRoZSUyMHN1biUyQyUyMGJhdGhlZCUyMGluJTIwaHVlcyUyMC4uLiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmFlc3RoZXRpY19zY29yZSUyMiUzQSUyMDUuMjQwMTM3MTAwMjE5NzI3JTJDJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTVEJTBBJTBBdmlkZW9fYnl0ZXMlMjAlM0QlMjAlNUIlNUQlMEFmb3IlMjByJTIwaW4lMjByb3dzJTNBJTBBJTIwJTIwJTIwJTIwd2l0aCUyMG9wZW4ociU1QiUyMnZpZGVvX3BhdGglMjIlNUQlMkMlMjAlMjJyYiUyMiklMjBhcyUyMGYlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB2aWRlb19ieXRlcy5hcHBlbmQoZi5yZWFkKCkpJTBBJTBBdGFibGUlMjAlM0QlMjBwYS50YWJsZSglMEElMjAlMjAlMjAlMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJjYXB0aW9uJTIyJTNBJTIwJTVCciU1QiUyMmNhcHRpb24lMjIlNUQlMjBmb3IlMjByJTIwaW4lMjByb3dzJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyYWVzdGhldGljX3Njb3JlJTIyJTNBJTIwJTVCciU1QiUyMmFlc3RoZXRpY19zY29yZSUyMiU1RCUyMGZvciUyMHIlMjBpbiUyMHJvd3MlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ2aWRlb19ibG9iJTIyJTNBJTIwdmlkZW9fYnl0ZXMlMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBzY2hlbWElM0RzY2hlbWElMkMlMEEpJTBBJTBBZHMlMjAlM0QlMjBsYW5jZS53cml0ZV9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMHRhYmxlJTJDJTBBJTIwJTIwJTIwJTIwJTIyLiUyRnZpZGVvcy5sYW5jZSUyMiUyQyUwQSUyMCUyMCUyMCUyMHNjaGVtYSUzRHNjaGVtYSUyQyUwQSUyMCUyMCUyMCUyMG1vZGUlM0QlMjJjcmVhdGUlMjIlMkMlMEEp",highlighted:`<span class="hljs-keyword">import</span> lance
<span class="hljs-keyword">import</span> pyarrow <span class="hljs-keyword">as</span> pa
<span class="hljs-keyword">import</span> urllib.request
schema = pa.schema(
[
pa.field(<span class="hljs-string">&quot;caption&quot;</span>, pa.utf8()),
pa.field(<span class="hljs-string">&quot;aesthetic_score&quot;</span>, pa.float64()),
pa.field(
<span class="hljs-string">&quot;video_blob&quot;</span>,
pa.large_binary(),
metadata={<span class="hljs-string">&quot;lance-encoding:blob&quot;</span>: <span class="hljs-string">&quot;true&quot;</span>},
),
]
)
<span class="hljs-comment"># Provide video files alongside metadata</span>
rows = [
{
<span class="hljs-string">&quot;video_path&quot;</span>: <span class="hljs-string">&quot;/path/to/videos/0001.mp4&quot;</span>,
<span class="hljs-string">&quot;caption&quot;</span>: <span class="hljs-string">&quot;a breathtaking view of a mountainous landscape ...&quot;</span>,
<span class="hljs-string">&quot;aesthetic_score&quot;</span>: <span class="hljs-number">5.240138053894043</span>,
},
{
<span class="hljs-string">&quot;video_path&quot;</span>: <span class="hljs-string">&quot;0002.mp4&quot;</span>,
<span class="hljs-string">&quot;caption&quot;</span>: <span class="hljs-string">&quot;a captivating view of the sun, bathed in hues ...&quot;</span>,
<span class="hljs-string">&quot;aesthetic_score&quot;</span>: <span class="hljs-number">5.240137100219727</span>,
},
]
video_bytes = []
<span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows:
<span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(r[<span class="hljs-string">&quot;video_path&quot;</span>], <span class="hljs-string">&quot;rb&quot;</span>) <span class="hljs-keyword">as</span> f:
video_bytes.append(f.read())
table = pa.table(
{
<span class="hljs-string">&quot;caption&quot;</span>: [r[<span class="hljs-string">&quot;caption&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows],
<span class="hljs-string">&quot;aesthetic_score&quot;</span>: [r[<span class="hljs-string">&quot;aesthetic_score&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> rows],
<span class="hljs-string">&quot;video_blob&quot;</span>: video_bytes,
},
schema=schema,
)
ds = lance.write_dataset(
table,
<span class="hljs-string">&quot;./videos.lance&quot;</span>,
schema=schema,
mode=<span class="hljs-string">&quot;create&quot;</span>,
)`,wrap:!1}}),mt=new j({props:{code:"JTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMGNhcHRpb24lMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMjBhZXN0aGV0aWNfc2NvcmUlMjAlN0MlMjAuLi4lMjAlN0MlMjB2aWRlb19ibG9iJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTBBJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLSUyQi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLSUyQiUwQSU3QyUyMCUyMmElMjBicmVhdGh0YWtpbmclMjB2aWV3JTIwb2YlMjBhJTIwbW91bnRhLi4uJTIyJTIwJTIwJTIwJTIwJTIwJTdDJTIwNS4yNDAxJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTIwLi4uJTIwJTdDJTIwJTdCcG9zaXRpb24lM0ElMjAwJTJDJTIwc2l6ZSUzQSUyMDQ4NzM4NzklN0QlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMEElN0MlMjAlMjJhJTIwY2FwdGl2YXRpbmclMjB2aWV3JTIwb2YlMjB0aGUlMjBzdW4lMkMlMjBiLi4uJTIyJTIwJTIwJTIwJTIwJTdDJTIwNS4yNDAxJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTIwLi4uJTIwJTdDJTIwJTdCcG9zaXRpb24lM0ElMjA0ODczOTIwJTJDJTIwc2l6ZSUzQSUyMDMzNzA1NzElN0QlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMEElMkItLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0lMkItLS0tLS0tLS0tLS0tLS0tLSUyQi0tLS0tJTJCLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tJTJC",highlighted:`+------------------------------------------+-----------------+-----+------------------------------------------+
| caption | aesthetic_score | ... | video_blob |
+------------------------------------------+-----------------+-----+------------------------------------------+
| &quot;a breathtaking view of a mounta...&quot; | 5.2401 | ... | {position: 0, size: 4873879} |
| &quot;a captivating view of the sun, b...&quot; | 5.2401 | ... | {position: 4873920, size: 3370571} |
+------------------------------------------+-----------------+-----+------------------------------------------+`,wrap:!1}}),It=new j({props:{code:"TUFYX0JZVEVTX1BFUl9GSUxFJTIwJTNEJTIwNSUyMColMjAxMDI0JTIwKiUyMDEwMjQlMjAqJTIwMTAyNCUyMCUyMCUyMyUyMH41JTIwR0IlMjBwZXIlMjBmaWxlJTBBJTBBJTIzJTIwV3JpdGUlMjBhcyUyMExhbmNlJTIwZGF0YXNldCUyMHdpdGglMjBmaWxlJTIwc2l6ZSUyMGxpbWl0cyUyMGZvciUyMGVhY2glMjAqLmxhbmNlJTIwZmlsZSUwQWRzJTIwJTNEJTIwbGFuY2Uud3JpdGVfZGF0YXNldCglMEElMjAlMjAlMjAlMjB0YWJsZSUyQyUwQSUyMCUyMCUyMCUyMCUyMi4lMkZ2aWRlb3MubGFuY2UlMjIlMkMlMEElMjAlMjAlMjAlMjBzY2hlbWElM0RzY2hlbWElMkMlMEElMjAlMjAlMjAlMjBtb2RlJTNEJTIyY3JlYXRlJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X2J5dGVzX3Blcl9maWxlJTNETUFYX0JZVEVTX1BFUl9GSUxFJTJDJTBBKQ==",highlighted:`MAX_BYTES_PER_FILE = <span class="hljs-number">5</span> * <span class="hljs-number">1024</span> * <span class="hljs-number">1024</span> * <span class="hljs-number">1024</span> <span class="hljs-comment"># ~5 GB per file</span>
<span class="hljs-comment"># Write as Lance dataset with file size limits for each *.lance file</span>
ds = lance.write_dataset(
table,
<span class="hljs-string">&quot;./videos.lance&quot;</span>,
schema=schema,
mode=<span class="hljs-string">&quot;create&quot;</span>,
max_bytes_per_file=MAX_BYTES_PER_FILE,
)`,wrap:!1}}),bt=new Ze({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/video_dataset.mdx"}}),{c(){u=i("meta"),vt=a(),gt=i("p"),Zt=a(),M(f.$$.fragment),Ct=a(),M(I.$$.fragment),$t=a(),U=i("p"),U.innerHTML=Yl,Gt=a(),w=i("blockquote"),w.innerHTML=xl,Bt=a(),M(b.$$.fragment),kt=a(),L=i("p"),L.innerHTML=Nl,qt=a(),y=i("blockquote"),y.innerHTML=Xl,_t=a(),S=i("p"),S.innerHTML=El,Wt=a(),M(g.$$.fragment),Qt=a(),A=i("p"),A.innerHTML=Hl,Rt=a(),M(v.$$.fragment),Yt=a(),Z=i("p"),Z.innerHTML=Vl,xt=a(),M(C.$$.fragment),Nt=a(),$=i("p"),$.innerHTML=Dl,Xt=a(),M(G.$$.fragment),Et=a(),m=i("blockquote"),m.innerHTML=Fl,Ht=a(),B=i("p"),B.innerHTML=zl,Vt=a(),M(k.$$.fragment),Dt=a(),q=i("p"),q.innerHTML=Pl,Ft=a(),M(_.$$.fragment),zt=a(),W=i("p"),W.innerHTML=Ol,Pt=a(),M(Q.$$.fragment),Ot=a(),R=i("p"),R.innerHTML=Kl,Kt=a(),Y=i("p"),Y.textContent=te,tl=a(),M(x.$$.fragment),ll=a(),N=i("p"),N.innerHTML=le,el=a(),M(X.$$.fragment),sl=a(),M(E.$$.fragment),al=a(),H=i("p"),H.innerHTML=ee,nl=a(),M(V.$$.fragment),il=a(),D=i("p"),D.innerHTML=se,ol=a(),M(F.$$.fragment),pl=a(),M(z.$$.fragment),Ml=a(),P=i("p"),P.innerHTML=ae,dl=a(),O=i("p"),O.innerHTML=ne,Jl=a(),M(K.$$.fragment),Tl=a(),M(tt.$$.fragment),rl=a(),lt=i("p"),lt.innerHTML=ie,cl=a(),M(et.$$.fragment),jl=a(),st=i("p"),st.textContent=oe,ul=a(),M(at.$$.fragment),wl=a(),nt=i("p"),nt.textContent=pe,yl=a(),it=i("p"),it.innerHTML=Me,ml=a(),ot=i("p"),ot.textContent=de,hl=a(),M(pt.$$.fragment),fl=a(),M(Mt.$$.fragment),Il=a(),dt=i("p"),dt.innerHTML=Je,Ul=a(),Jt=i("p"),Jt.innerHTML=Te,bl=a(),Tt=i("p"),Tt.innerHTML=re,Ll=a(),M(rt.$$.fragment),Sl=a(),M(ct.$$.fragment),gl=a(),jt=i("p"),jt.innerHTML=ce,Al=a(),M(ut.$$.fragment),vl=a(),wt=i("p"),wt.innerHTML=je,Zl=a(),yt=i("p"),yt.innerHTML=ue,Cl=a(),M(mt.$$.fragment),$l=a(),ht=i("p"),ht.innerHTML=we,Gl=a(),h=i("blockquote"),h.innerHTML=ye,Bl=a(),ft=i("p"),ft.innerHTML=me,kl=a(),M(It.$$.fragment),ql=a(),Ut=i("p"),Ut.innerHTML=he,_l=a(),M(bt.$$.fragment),Wl=a(),At=i("p"),this.h()},l(t){const l=ge("svelte-u9bgzb",document.head);u=o(l,"META",{name:!0,content:!0}),l.forEach(e),vt=n(t),gt=o(t,"P",{}),fe(gt).forEach(e),Zt=n(t),d(f.$$.fragment,t),Ct=n(t),d(I.$$.fragment,t),$t=n(t),U=o(t,"P",{"data-svelte-h":!0}),p(U)!=="svelte-1bv94t6"&&(U.innerHTML=Yl),Gt=n(t),w=o(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(w)!=="svelte-diqosn"&&(w.innerHTML=xl),Bt=n(t),d(b.$$.fragment,t),kt=n(t),L=o(t,"P",{"data-svelte-h":!0}),p(L)!=="svelte-1ycgm9f"&&(L.innerHTML=Nl),qt=n(t),y=o(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(y)!=="svelte-1hbx2zl"&&(y.innerHTML=Xl),_t=n(t),S=o(t,"P",{"data-svelte-h":!0}),p(S)!=="svelte-17l0fb0"&&(S.innerHTML=El),Wt=n(t),d(g.$$.fragment,t),Qt=n(t),A=o(t,"P",{"data-svelte-h":!0}),p(A)!=="svelte-1ng411t"&&(A.innerHTML=Hl),Rt=n(t),d(v.$$.fragment,t),Yt=n(t),Z=o(t,"P",{"data-svelte-h":!0}),p(Z)!=="svelte-3rm08h"&&(Z.innerHTML=Vl),xt=n(t),d(C.$$.fragment,t),Nt=n(t),$=o(t,"P",{"data-svelte-h":!0}),p($)!=="svelte-hzh3ks"&&($.innerHTML=Dl),Xt=n(t),d(G.$$.fragment,t),Et=n(t),m=o(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(m)!=="svelte-1rjlwfm"&&(m.innerHTML=Fl),Ht=n(t),B=o(t,"P",{"data-svelte-h":!0}),p(B)!=="svelte-69vkpo"&&(B.innerHTML=zl),Vt=n(t),d(k.$$.fragment,t),Dt=n(t),q=o(t,"P",{"data-svelte-h":!0}),p(q)!=="svelte-dyxa8w"&&(q.innerHTML=Pl),Ft=n(t),d(_.$$.fragment,t),zt=n(t),W=o(t,"P",{"data-svelte-h":!0}),p(W)!=="svelte-16ywdcf"&&(W.innerHTML=Ol),Pt=n(t),d(Q.$$.fragment,t),Ot=n(t),R=o(t,"P",{"data-svelte-h":!0}),p(R)!=="svelte-1stbr8p"&&(R.innerHTML=Kl),Kt=n(t),Y=o(t,"P",{"data-svelte-h":!0}),p(Y)!=="svelte-ft8iu1"&&(Y.textContent=te),tl=n(t),d(x.$$.fragment,t),ll=n(t),N=o(t,"P",{"data-svelte-h":!0}),p(N)!=="svelte-a7fbwu"&&(N.innerHTML=le),el=n(t),d(X.$$.fragment,t),sl=n(t),d(E.$$.fragment,t),al=n(t),H=o(t,"P",{"data-svelte-h":!0}),p(H)!=="svelte-wjodyx"&&(H.innerHTML=ee),nl=n(t),d(V.$$.fragment,t),il=n(t),D=o(t,"P",{"data-svelte-h":!0}),p(D)!=="svelte-10mkyte"&&(D.innerHTML=se),ol=n(t),d(F.$$.fragment,t),pl=n(t),d(z.$$.fragment,t),Ml=n(t),P=o(t,"P",{"data-svelte-h":!0}),p(P)!=="svelte-lvrx1l"&&(P.innerHTML=ae),dl=n(t),O=o(t,"P",{"data-svelte-h":!0}),p(O)!=="svelte-1y2guln"&&(O.innerHTML=ne),Jl=n(t),d(K.$$.fragment,t),Tl=n(t),d(tt.$$.fragment,t),rl=n(t),lt=o(t,"P",{"data-svelte-h":!0}),p(lt)!=="svelte-vihcr3"&&(lt.innerHTML=ie),cl=n(t),d(et.$$.fragment,t),jl=n(t),st=o(t,"P",{"data-svelte-h":!0}),p(st)!=="svelte-69nbou"&&(st.textContent=oe),ul=n(t),d(at.$$.fragment,t),wl=n(t),nt=o(t,"P",{"data-svelte-h":!0}),p(nt)!=="svelte-15hkn5q"&&(nt.textContent=pe),yl=n(t),it=o(t,"P",{"data-svelte-h":!0}),p(it)!=="svelte-12q6x7f"&&(it.innerHTML=Me),ml=n(t),ot=o(t,"P",{"data-svelte-h":!0}),p(ot)!=="svelte-1dk8isd"&&(ot.textContent=de),hl=n(t),d(pt.$$.fragment,t),fl=n(t),d(Mt.$$.fragment,t),Il=n(t),dt=o(t,"P",{"data-svelte-h":!0}),p(dt)!=="svelte-1dxqxjd"&&(dt.innerHTML=Je),Ul=n(t),Jt=o(t,"P",{"data-svelte-h":!0}),p(Jt)!=="svelte-1geu5wj"&&(Jt.innerHTML=Te),bl=n(t),Tt=o(t,"P",{"data-svelte-h":!0}),p(Tt)!=="svelte-apmgdq"&&(Tt.innerHTML=re),Ll=n(t),d(rt.$$.fragment,t),Sl=n(t),d(ct.$$.fragment,t),gl=n(t),jt=o(t,"P",{"data-svelte-h":!0}),p(jt)!=="svelte-xoe3ld"&&(jt.innerHTML=ce),Al=n(t),d(ut.$$.fragment,t),vl=n(t),wt=o(t,"P",{"data-svelte-h":!0}),p(wt)!=="svelte-1tqt4gx"&&(wt.innerHTML=je),Zl=n(t),yt=o(t,"P",{"data-svelte-h":!0}),p(yt)!=="svelte-255jny"&&(yt.innerHTML=ue),Cl=n(t),d(mt.$$.fragment,t),$l=n(t),ht=o(t,"P",{"data-svelte-h":!0}),p(ht)!=="svelte-m80pxh"&&(ht.innerHTML=we),Gl=n(t),h=o(t,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(h)!=="svelte-1w2e20b"&&(h.innerHTML=ye),Bl=n(t),ft=o(t,"P",{"data-svelte-h":!0}),p(ft)!=="svelte-4dsbx0"&&(ft.innerHTML=me),kl=n(t),d(It.$$.fragment,t),ql=n(t),Ut=o(t,"P",{"data-svelte-h":!0}),p(Ut)!=="svelte-1vpwwpu"&&(Ut.innerHTML=he),_l=n(t),d(bt.$$.fragment,t),Wl=n(t),At=o(t,"P",{}),fe(At).forEach(e),this.h()},h(){St(u,"name","hf:doc:metadata"),St(u,"content",$e),St(w,"class","tip"),St(y,"class","tip"),St(m,"class","warning"),St(h,"class","tip")},m(t,l){Ae(document.head,u),s(t,vt,l),s(t,gt,l),s(t,Zt,l),J(f,t,l),s(t,Ct,l),J(I,t,l),s(t,$t,l),s(t,U,l),s(t,Gt,l),s(t,w,l),s(t,Bt,l),J(b,t,l),s(t,kt,l),s(t,L,l),s(t,qt,l),s(t,y,l),s(t,_t,l),s(t,S,l),s(t,Wt,l),J(g,t,l),s(t,Qt,l),s(t,A,l),s(t,Rt,l),J(v,t,l),s(t,Yt,l),s(t,Z,l),s(t,xt,l),J(C,t,l),s(t,Nt,l),s(t,$,l),s(t,Xt,l),J(G,t,l),s(t,Et,l),s(t,m,l),s(t,Ht,l),s(t,B,l),s(t,Vt,l),J(k,t,l),s(t,Dt,l),s(t,q,l),s(t,Ft,l),J(_,t,l),s(t,zt,l),s(t,W,l),s(t,Pt,l),J(Q,t,l),s(t,Ot,l),s(t,R,l),s(t,Kt,l),s(t,Y,l),s(t,tl,l),J(x,t,l),s(t,ll,l),s(t,N,l),s(t,el,l),J(X,t,l),s(t,sl,l),J(E,t,l),s(t,al,l),s(t,H,l),s(t,nl,l),J(V,t,l),s(t,il,l),s(t,D,l),s(t,ol,l),J(F,t,l),s(t,pl,l),J(z,t,l),s(t,Ml,l),s(t,P,l),s(t,dl,l),s(t,O,l),s(t,Jl,l),J(K,t,l),s(t,Tl,l),J(tt,t,l),s(t,rl,l),s(t,lt,l),s(t,cl,l),J(et,t,l),s(t,jl,l),s(t,st,l),s(t,ul,l),J(at,t,l),s(t,wl,l),s(t,nt,l),s(t,yl,l),s(t,it,l),s(t,ml,l),s(t,ot,l),s(t,hl,l),J(pt,t,l),s(t,fl,l),J(Mt,t,l),s(t,Il,l),s(t,dt,l),s(t,Ul,l),s(t,Jt,l),s(t,bl,l),s(t,Tt,l),s(t,Ll,l),J(rt,t,l),s(t,Sl,l),J(ct,t,l),s(t,gl,l),s(t,jt,l),s(t,Al,l),J(ut,t,l),s(t,vl,l),s(t,wt,l),s(t,Zl,l),s(t,yt,l),s(t,Cl,l),J(mt,t,l),s(t,$l,l),s(t,ht,l),s(t,Gl,l),s(t,h,l),s(t,Bl,l),s(t,ft,l),s(t,kl,l),J(It,t,l),s(t,ql,l),s(t,Ut,l),s(t,_l,l),J(bt,t,l),s(t,Wl,l),s(t,At,l),Ql=!0},p:Ue,i(t){Ql||(T(f.$$.fragment,t),T(I.$$.fragment,t),T(b.$$.fragment,t),T(g.$$.fragment,t),T(v.$$.fragment,t),T(C.$$.fragment,t),T(G.$$.fragment,t),T(k.$$.fragment,t),T(_.$$.fragment,t),T(Q.$$.fragment,t),T(x.$$.fragment,t),T(X.$$.fragment,t),T(E.$$.fragment,t),T(V.$$.fragment,t),T(F.$$.fragment,t),T(z.$$.fragment,t),T(K.$$.fragment,t),T(tt.$$.fragment,t),T(et.$$.fragment,t),T(at.$$.fragment,t),T(pt.$$.fragment,t),T(Mt.$$.fragment,t),T(rt.$$.fragment,t),T(ct.$$.fragment,t),T(ut.$$.fragment,t),T(mt.$$.fragment,t),T(It.$$.fragment,t),T(bt.$$.fragment,t),Ql=!0)},o(t){r(f.$$.fragment,t),r(I.$$.fragment,t),r(b.$$.fragment,t),r(g.$$.fragment,t),r(v.$$.fragment,t),r(C.$$.fragment,t),r(G.$$.fragment,t),r(k.$$.fragment,t),r(_.$$.fragment,t),r(Q.$$.fragment,t),r(x.$$.fragment,t),r(X.$$.fragment,t),r(E.$$.fragment,t),r(V.$$.fragment,t),r(F.$$.fragment,t),r(z.$$.fragment,t),r(K.$$.fragment,t),r(tt.$$.fragment,t),r(et.$$.fragment,t),r(at.$$.fragment,t),r(pt.$$.fragment,t),r(Mt.$$.fragment,t),r(rt.$$.fragment,t),r(ct.$$.fragment,t),r(ut.$$.fragment,t),r(mt.$$.fragment,t),r(It.$$.fragment,t),r(bt.$$.fragment,t),Ql=!1},d(t){t&&(e(vt),e(gt),e(Zt),e(Ct),e($t),e(U),e(Gt),e(w),e(Bt),e(kt),e(L),e(qt),e(y),e(_t),e(S),e(Wt),e(Qt),e(A),e(Rt),e(Yt),e(Z),e(xt),e(Nt),e($),e(Xt),e(Et),e(m),e(Ht),e(B),e(Vt),e(Dt),e(q),e(Ft),e(zt),e(W),e(Pt),e(Ot),e(R),e(Kt),e(Y),e(tl),e(ll),e(N),e(el),e(sl),e(al),e(H),e(nl),e(il),e(D),e(ol),e(pl),e(Ml),e(P),e(dl),e(O),e(Jl),e(Tl),e(rl),e(lt),e(cl),e(jl),e(st),e(ul),e(wl),e(nt),e(yl),e(it),e(ml),e(ot),e(hl),e(fl),e(Il),e(dt),e(Ul),e(Jt),e(bl),e(Tt),e(Ll),e(Sl),e(gl),e(jt),e(Al),e(vl),e(wt),e(Zl),e(yt),e(Cl),e($l),e(ht),e(Gl),e(h),e(Bl),e(ft),e(kl),e(ql),e(Ut),e(_l),e(Wl),e(At)),e(u),c(f,t),c(I,t),c(b,t),c(g,t),c(v,t),c(C,t),c(G,t),c(k,t),c(_,t),c(Q,t),c(x,t),c(X,t),c(E,t),c(V,t),c(F,t),c(z,t),c(K,t),c(tt,t),c(et,t),c(at,t),c(pt,t),c(Mt,t),c(rt,t),c(ct,t),c(ut,t),c(mt,t),c(It,t),c(bt,t)}}}const $e='{"title":"Create a video dataset","local":"create-a-video-dataset","sections":[{"title":"VideoFolder","local":"videofolder","sections":[{"title":"Video captioning","local":"video-captioning","sections":[],"depth":3},{"title":"Upload dataset to the Hub","local":"upload-dataset-to-the-hub","sections":[],"depth":3}],"depth":2},{"title":"WebDataset","local":"webdataset","sections":[],"depth":2},{"title":"Lance","local":"lance","sections":[{"title":"Write a Lance dataset from raw video files","local":"write-a-lance-dataset-from-raw-video-files","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ge(Rl){return be(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class We extends Le{constructor(u){super(),Se(this,u,Ge,Ce,Ie,{})}}export{We as component};

Xet Storage Details

Size:
43 kB
·
Xet hash:
b7ec9af4db3e582657cc1745bb3caece647572768a7723b49d30ba4d8914e34e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.