Buckets:
| import{s as os,n as is,o as ds}from"../chunks/scheduler.d75c11ed.js";import{S as ps,i as cs,e as o,s as l,c as p,h as rs,a as i,d as s,b as n,f as ns,g as c,j as d,k as je,l as ms,m as a,n as r,t as m,o as h,p as f}from"../chunks/index.4ec9dfe9.js";import{C as hs,H as be,E as fs}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.97acd56d.js";import{C as u}from"../chunks/CodeBlock.827d2233.js";function us(_t){let g,Je,Te,$e,T,ve,w,Ue,M,kt="<p>Video support is experimental and is subject to change.</p>",Ze,J,Gt='Video datasets have <a href="/docs/datasets/pr_8153/en/package_reference/main_classes#datasets.Video">Video</a> type columns, which contain <code>torchcodec</code> objects.',_e,y,Rt='<p>To work with video datasets, you need to have the <code>torchcodec</code> and <code>ffmpeg</code> packages installed. Check out the <a href="https://github.com/meta-pytorch/torchcodec#installing-torchcodec" rel="nofollow">installation</a> guide to learn how to install them.</p>',ke,$,Ct="When you load a video dataset and call the video column, the videos are decoded as <code>torchcodec</code> Videos:",Ge,v,Re,b,Ft="<p>Index into a video dataset using the row index first and then the <code>video</code> column - <code>dataset[0]["video"]</code> - to avoid creating all the video objects in the dataset. Otherwise, this can be a slow and time-consuming process if you have a large dataset.</p>",Ce,U,Vt='For a guide on how to load any type of dataset, take a look at the <a class="underline decoration-sky-400 decoration-2 font-semibold" href="./loading">general loading guide</a>.',Fe,Z,Ve,_,It="Access frames directly from a video using the <code>VideoReader</code> using <code>next()</code>:",Ie,k,xe,G,xt=`To get multiple frames at once, you can call <code>.get_frames_in_range(start: int, stop: int, step: int)</code>. This will return a frame batch. | |
| This is the efficient way to obtain a long list of frames refer to the <a href="https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.VideoDecoder.html" rel="nofollow">torchcodec docs</a> to see more functions for effiently accessing the data:`,Xe,R,We,C,Xt="There is also <code>.get_frames_played_in_range(start_seconds: float, stop_seconds: float)</code> to access all frames played whithin a certain time range.",qe,F,Le,V,Ye,I,Wt='You can load a dataset from the video path. Use the <a href="/docs/datasets/pr_8153/en/package_reference/main_classes#datasets.Dataset.cast_column">cast_column()</a> function to accept a column of video file paths, and decode it into a <code>torchcodec</code> video with the <a href="/docs/datasets/pr_8153/en/package_reference/main_classes#datasets.Video">Video</a> feature:',Be,x,He,X,qt='If you only want to load the underlying path to the video dataset without decoding the video object, set <code>decode=False</code> in the <a href="/docs/datasets/pr_8153/en/package_reference/main_classes#datasets.Video">Video</a> feature:',Ne,W,ze,q,Qe,L,Lt="You can also load a dataset with an <code>VideoFolder</code> dataset builder which does not require writing a custom dataloader. This makes <code>VideoFolder</code> ideal for quickly creating and loading video datasets with several thousand videos for different vision tasks. Your video dataset structure should look like this:",Ee,Y,Se,B,Yt='If the dataset follows the <code>VideoFolder</code> structure, then you can load it directly with <a href="/docs/datasets/pr_8153/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',Ae,H,Pe,N,Bt='For local datasets, this is equivalent to passing <code>videofolder</code> manually in <a href="/docs/datasets/pr_8153/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a> and the directory in <code>data_dir</code>:',De,z,Ke,Q,Ht="Then you can access the videos as <code>torchcodec.decoders._video_decoder.VideoDecoder</code> objects:",Oe,E,et,S,Nt='To ignore the information in the metadata file, set <code>drop_metadata=True</code> in <a href="/docs/datasets/pr_8153/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',tt,A,st,P,zt=`If you don’t have a metadata file, <code>VideoFolder</code> automatically infers the label name from the directory name. | |
| If you want to drop automatically created labels, set <code>drop_labels=True</code>. | |
| In this case, your dataset will only contain a video column:`,at,D,lt,K,Qt="Finally the <code>filters</code> argument lets you load only a subset of the dataset, based on a condition on the label or the metadata. This is especially useful if the metadata is in Parquet format, since this format enables fast filtering. It is also recommended to use this argument with <code>streaming=True</code>, because by default the dataset is fully downloaded before filtering.",nt,O,ot,j,Et='<p>For more information about creating your own <code>VideoFolder</code> dataset, take a look at the <a href="./video_dataset">Create a video dataset</a> guide.</p>',it,ee,dt,te,St=`The <a href="https://github.com/webdataset/webdataset" rel="nofollow">WebDataset</a> format is based on a folder of TAR archives and is suitable for big video datasets. | |
| Because of their size, WebDatasets are generally loaded in streaming mode (using <code>streaming=True</code>).`,pt,se,At="You can load a WebDataset like this:",ct,ae,rt,le,mt,ne,Pt=`<a href="https://lance.org" rel="nofollow">Lance</a> is an open multimodal lakehouse table format. Lance tables can natively store not only text and scalar values, | |
| but also large binary objects (blobs) such as images, audio, and video alongside your tabular data. Inside a Lance table, large | |
| blobs like videos are stored as bytes with offsets (see the <a href="https://lance.org/guide/blob/" rel="nofollow">blob guide</a> for more details), so this | |
| makes it easy to scan and filter metadata without loading heavier video blobs, and then fetch only the specific video blobs you need on demand.`,ht,oe,Dt=`Also, because Lance is a columnar columnar format, you can project and filter only the metadata columns you care about | |
| (without fetching large video files), and then retrieve a small subset of rows (including the video) when you’re ready. This | |
| keeps your metadata and videos in one place, without needing a separate file store or an external index.`,ft,ie,ut,de,Kt=`In this example, the video is stored natively (as its encoded bytes) in the Lance table, so you can write it directly to an <code>mp4</code> file on your local | |
| filesystem without any extra conversion step.`,gt,pe,Ot='For more details on working with Lance datasets, see the <a href="https://lance.org" rel="nofollow">Lance documentation</a>.',Mt,ce,yt,re,es=`By default, videos are decoded sequentially as torchcodec <code>VideoDecoders</code> when you iterate on a dataset. | |
| It sequentially decodes the metadata of the videos, and doesn’t read the video frames until you access them.`,bt,me,ts="However it is possible to speed up the dataset significantly using multithreaded decoding:",jt,he,Tt,fe,ss=`You can enable multithreading using <code>num_threads</code>. This is especially useful to speed up remote data streaming. | |
| However it can be slower than <code>num_threads=0</code> for local data on fast disks.`,wt,ue,as="If you are not interested in the videos decoded as torchcodec <code>VideoDecoders</code> and would like to access the path/bytes instead, you can disable decoding:",Jt,ge,$t,Me,ls='Note: <a href="/docs/datasets/pr_8153/en/package_reference/main_classes#datasets.IterableDataset.decode">IterableDataset.decode()</a> is only available for streaming datasets at the moment.',vt,ye,Ut,we,Zt;return T=new hs({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new be({props:{title:"Load video data",local:"load-video-data",headingTag:"h1"}}),v=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTJDJTIwVmlkZW8lMEElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMnBhdGglMkZ0byUyRnZpZGVvJTJGZm9sZGVyJTIyJTJDJTIwc3BsaXQlM0QlMjJ0cmFpbiUyMiklMEFkYXRhc2V0JTVCMCU1RCU1QiUyMnZpZGVvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Video | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"path/to/video/folder"</span>, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"video"</span>] | |
| <torchcodec.decoders._video_decoder.VideoDecoder <span class="hljs-built_in">object</span> at <span class="hljs-number">0x14a61d5a0</span>>`,wrap:!1}}),Z=new be({props:{title:"Read frames",local:"read-frames",headingTag:"h2"}}),k=new u({props:{code:"dmlkZW8lMjAlM0QlMjBkYXRhc2V0JTVCMCU1RCU1QiUyMnZpZGVvJTIyJTVEJTBBZmlyc3RfZnJhbWUlMjAlM0QlMjB2aWRlby5nZXRfZnJhbWVfYXQoMCklMEFmaXJzdF9mcmFtZS5kYXRhLnNoYXBlJTBBZmlyc3RfZnJhbWUucHRzX3NlY29uZHMlMjAlMjAlMjMlMjB0aW1lc3RhbXA=",highlighted:`<span class="hljs-meta">>>> </span>video = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"video"</span>] | |
| <span class="hljs-meta">>>> </span>first_frame = video.get_frame_at(<span class="hljs-number">0</span>) | |
| <span class="hljs-meta">>>> </span>first_frame.data.shape | |
| (<span class="hljs-number">3</span>, <span class="hljs-number">240</span>, <span class="hljs-number">320</span>) | |
| <span class="hljs-meta">>>> </span>first_frame.pts_seconds <span class="hljs-comment"># timestamp</span> | |
| <span class="hljs-number">0.0</span>`,wrap:!1}}),R=new u({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcmFtZXMlMjAlM0QlMjB2aWRlby5nZXRfZnJhbWVzX2luX3JhbmdlKDAlMkMlMjA2JTJDJTIwMSklMEFmcmFtZXMuZGF0YS5zaGFwZQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>frames = video.get_frames_in_range(<span class="hljs-number">0</span>, <span class="hljs-number">6</span>, <span class="hljs-number">1</span>) | |
| <span class="hljs-meta">>>> </span>frames.data.shape | |
| torch.Size([<span class="hljs-number">5</span>, <span class="hljs-number">3</span>, <span class="hljs-number">240</span>, <span class="hljs-number">320</span>])`,wrap:!1}}),F=new u({props:{code:"ZnJhbWVzJTIwJTNEJTIwdmlkZW8uZ2V0X2ZyYW1lc19wbGF5ZWRfaW5fcmFuZ2UoLjUlMkMlMjAxLjIpJTBBZnJhbWVzLmRhdGEuc2hhcGU=",highlighted:`<span class="hljs-meta">>>> </span>frames = video.get_frames_played_in_range(<span class="hljs-number">.5</span>, <span class="hljs-number">1.2</span>) | |
| <span class="hljs-meta">>>> </span>frames.data.shape | |
| torch.Size([<span class="hljs-number">42</span>, <span class="hljs-number">3</span>, <span class="hljs-number">240</span>, <span class="hljs-number">320</span>])`,wrap:!1}}),V=new be({props:{title:"Local files",local:"local-files",headingTag:"h2"}}),x=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUyQyUyMFZpZGVvJTBBJTBBZGF0YXNldCUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KCU3QiUyMnZpZGVvJTIyJTNBJTIwJTVCJTIycGF0aCUyRnRvJTJGdmlkZW9fMSUyMiUyQyUyMCUyMnBhdGglMkZ0byUyRnZpZGVvXzIlMjIlMkMlMjAuLi4lMkMlMjAlMjJwYXRoJTJGdG8lMkZ2aWRlb19uJTIyJTVEJTdEKS5jYXN0X2NvbHVtbiglMjJ2aWRlbyUyMiUyQyUyMFZpZGVvKCkpJTBBZGF0YXNldCU1QjAlNUQlNUIlMjJ2aWRlbyUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset, Video | |
| <span class="hljs-meta">>>> </span>dataset = Dataset.from_dict({<span class="hljs-string">"video"</span>: [<span class="hljs-string">"path/to/video_1"</span>, <span class="hljs-string">"path/to/video_2"</span>, ..., <span class="hljs-string">"path/to/video_n"</span>]}).cast_column(<span class="hljs-string">"video"</span>, Video()) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"video"</span>] | |
| <torchcodec.decoders._video_decoder.VideoDecoder <span class="hljs-built_in">object</span> at <span class="hljs-number">0x14a61e080</span>>`,wrap:!1}}),W=new u({props:{code:"ZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQuY2FzdF9jb2x1bW4oJTIydmlkZW8lMjIlMkMlMjBWaWRlbyhkZWNvZGUlM0RGYWxzZSkpJTBBZGF0YXNldCU1QjAlNUQlNUIlMjJ2aWRlbyUyMiU1RA==",highlighted:`<span class="hljs-meta">>>> </span>dataset = dataset.cast_column(<span class="hljs-string">"video"</span>, Video(decode=<span class="hljs-literal">False</span>)) | |
| <span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">"video"</span>] | |
| {<span class="hljs-string">'bytes'</span>: <span class="hljs-literal">None</span>, | |
| <span class="hljs-string">'path'</span>: <span class="hljs-string">'path/to/video/folder/video0.mp4'</span>}`,wrap:!1}}),q=new be({props:{title:"VideoFolder",local:"videofolder",headingTag:"h2"}}),Y=new u({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZkb2clMkZnb2xkZW5fcmV0cmlldmVyLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGZ2VybWFuX3NoZXBoZXJkLm1wNCUwQWZvbGRlciUyRnRyYWluJTJGZG9nJTJGY2hpaHVhaHVhLm1wNCUwQSUwQWZvbGRlciUyRnRyYWluJTJGY2F0JTJGbWFpbmVfY29vbi5tcDQlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJlbmdhbC5tcDQlMEFmb2xkZXIlMkZ0cmFpbiUyRmNhdCUyRmJpcm1hbi5tcDQ=",highlighted:`folder<span class="hljs-regexp">/train/</span>dog/golden_retriever.mp4 | |
| folder<span class="hljs-regexp">/train/</span>dog/german_shepherd.mp4 | |
| folder<span class="hljs-regexp">/train/</span>dog/chihuahua.mp4 | |
| folder<span class="hljs-regexp">/train/</span>cat/maine_coon.mp4 | |
| folder<span class="hljs-regexp">/train/</span>cat/bengal.mp4 | |
| folder<span class="hljs-regexp">/train/</span>cat/birman.mp4`,wrap:!1}}),H=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ1c2VybmFtZSUyRmRhdGFzZXRfbmFtZSUyMiklMEElMjMlMjBPUiUyMGxvY2FsbHklM0ElMEFkYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMiUyRnBhdGglMkZ0byUyRmZvbGRlciUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"username/dataset_name"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># OR locally:</span> | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"/path/to/folder"</span>)`,wrap:!1}}),z=new u({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ2aWRlb2ZvbGRlciUyMiUyQyUyMGRhdGFfZGlyJTNEJTIyJTJGcGF0aCUyRnRvJTJGZm9sZGVyJTIyKQ==",highlighted:'<span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"videofolder"</span>, data_dir=<span class="hljs-string">"/path/to/folder"</span>)',wrap:!1}}),E=new u({props:{code:"ZGF0YXNldCU1QiUyMnRyYWluJTIyJTVEJTVCMCU1RCUwQSUwQWRhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCU1Qi0xJTVE",highlighted:`>>> dataset<span class="hljs-selector-attr">[<span class="hljs-string">"train"</span>]</span><span class="hljs-selector-attr">[0]</span> | |
| {<span class="hljs-string">"video"</span>: <torchcodec<span class="hljs-selector-class">.decoders</span>._video_decoder<span class="hljs-selector-class">.VideoDecoder</span> <span class="hljs-selector-tag">object</span> at <span class="hljs-number">0</span>x14a61e080>, <span class="hljs-string">"label"</span>: <span class="hljs-number">0</span>} | |
| >>> dataset<span class="hljs-selector-attr">[<span class="hljs-string">"train"</span>]</span><span class="hljs-selector-attr">[-1]</span> | |
| {<span class="hljs-string">"video"</span>: <torchcodec<span class="hljs-selector-class">.decoders</span>._video_decoder<span class="hljs-selector-class">.VideoDecoder</span> <span class="hljs-selector-tag">object</span> at <span class="hljs-number">0</span>x14a61e090>, <span class="hljs-string">"label"</span>: <span class="hljs-number">1</span>}`,wrap:!1}}),A=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ1c2VybmFtZSUyRmRhdGFzZXRfd2l0aF9tZXRhZGF0YSUyMiUyQyUyMGRyb3BfbWV0YWRhdGElM0RUcnVlKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"username/dataset_with_metadata"</span>, drop_metadata=<span class="hljs-literal">True</span>)`,wrap:!1}}),D=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ1c2VybmFtZSUyRmRhdGFzZXRfd2l0aG91dF9tZXRhZGF0YSUyMiUyQyUyMGRyb3BfbGFiZWxzJTNEVHJ1ZSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"username/dataset_without_metadata"</span>, drop_labels=<span class="hljs-literal">True</span>)`,wrap:!1}}),O=new u({props:{code:"ZmlsdGVycyUyMCUzRCUyMCU1QiglMjJsYWJlbCUyMiUyQyUyMCUyMiUzRCUyMiUyQyUyMDApJTVEJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ1c2VybmFtZSUyRmRhdGFzZXRfbmFtZSUyMiUyQyUyMHN0cmVhbWluZyUzRFRydWUlMkMlMjBmaWx0ZXJzJTNEZmlsdGVycyk=",highlighted:`<span class="hljs-meta">>>> </span>filters = [(<span class="hljs-string">"label"</span>, <span class="hljs-string">"="</span>, <span class="hljs-number">0</span>)] | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"username/dataset_name"</span>, streaming=<span class="hljs-literal">True</span>, filters=filters)`,wrap:!1}}),ee=new be({props:{title:"WebDataset",local:"webdataset",headingTag:"h2"}}),ae=new u({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJ3ZWJkYXRhc2V0JTIyJTJDJTIwZGF0YV9kaXIlM0QlMjIlMkZwYXRoJTJGdG8lMkZmb2xkZXIlMjIlMkMlMjBzdHJlYW1pbmclM0RUcnVlKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"webdataset"</span>, data_dir=<span class="hljs-string">"/path/to/folder"</span>, streaming=<span class="hljs-literal">True</span>)`,wrap:!1}}),le=new be({props:{title:"Lance",local:"lance",headingTag:"h2"}}),ie=new u({props:{code:"aW1wb3J0JTIwbGFuY2UlMEElMEFkcyUyMCUzRCUyMGxhbmNlLmRhdGFzZXQoJTIyaGYlM0ElMkYlMkZkYXRhc2V0cyUyRmxhbmNlLWZvcm1hdCUyRm9wZW52aWQtbGFuY2UlMkZkYXRhJTJGdHJhaW4ubGFuY2UlMjIpJTBBJTBBJTIzJTIwMS4lMjBCcm93c2UlMjBtZXRhZGF0YSUyMHdpdGhvdXQlMjBsb2FkaW5nJTIwdmlkZW8lMjBibG9icy4lMEFtZXRhZGF0YSUyMCUzRCUyMGRzLnNjYW5uZXIoJTBBJTIwJTIwJTIwJTIwY29sdW1ucyUzRCU1QiUyMmNhcHRpb24lMjIlMkMlMjAlMjJhZXN0aGV0aWNfc2NvcmUlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBmaWx0ZXIlM0QlMjJhZXN0aGV0aWNfc2NvcmUlMjAlM0UlM0QlMjA0LjUlMjIlMkMlMEElMjAlMjAlMjAlMjBsaW1pdCUzRDIlMkMlMEEpLnRvX3RhYmxlKCkudG9fcHlsaXN0KCklMEElMEElMjMlMjAyLiUyMEZldGNoJTIwYSUyMHNpbmdsZSUyMHZpZGVvJTIwYmxvYiUyMGJ5JTIwcm93JTIwaW5kZXguJTBBc2VsZWN0ZWRfaW5kZXglMjAlM0QlMjAwJTBBYmxvYl9maWxlJTIwJTNEJTIwZHMudGFrZV9ibG9icyglMjJ2aWRlb19ibG9iJTIyJTJDJTIwaWRzJTNEJTVCc2VsZWN0ZWRfaW5kZXglNUQpJTVCMCU1RCUwQXdpdGglMjBvcGVuKCUyMnZpZGVvXzAubXA0JTIyJTJDJTIwJTIyd2IlMjIpJTIwYXMlMjBmJTNBJTBBJTIwJTIwJTIwJTIwZi53cml0ZShibG9iX2ZpbGUucmVhZCgpKQ==",highlighted:`<span class="hljs-keyword">import</span> lance | |
| ds = lance.dataset(<span class="hljs-string">"hf://datasets/lance-format/openvid-lance/data/train.lance"</span>) | |
| <span class="hljs-comment"># 1. Browse metadata without loading video blobs.</span> | |
| metadata = ds.scanner( | |
| columns=[<span class="hljs-string">"caption"</span>, <span class="hljs-string">"aesthetic_score"</span>], | |
| <span class="hljs-built_in">filter</span>=<span class="hljs-string">"aesthetic_score >= 4.5"</span>, | |
| limit=<span class="hljs-number">2</span>, | |
| ).to_table().to_pylist() | |
| <span class="hljs-comment"># 2. Fetch a single video blob by row index.</span> | |
| selected_index = <span class="hljs-number">0</span> | |
| blob_file = ds.take_blobs(<span class="hljs-string">"video_blob"</span>, ids=[selected_index])[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(<span class="hljs-string">"video_0.mp4"</span>, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">as</span> f: | |
| f.write(blob_file.read())`,wrap:!1}}),ce=new be({props:{title:"Video decoding",local:"video-decoding",headingTag:"h2"}}),he=new u({props:{code:"aW1wb3J0JTIwb3MlMEFudW1fdGhyZWFkcyUyMCUzRCUyMG51bV90aHJlYWRzJTIwJTNEJTIwbWluKDMyJTJDJTIwKG9zLmNwdV9jb3VudCgpJTIwb3IlMjAxKSUyMCUyQiUyMDQpJTBBZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQuZGVjb2RlKG51bV90aHJlYWRzJTNEbnVtX3RocmVhZHMpJTBBZm9yJTIwZXhhbXBsZSUyMGluJTIwZGF0YXNldCUzQSUyMCUyMCUyMyUyMHVwJTIwdG8lMjAyMCUyMHRpbWVzJTIwZmFzdGVyJTIwISUwQSUyMCUyMCUyMCUyMC4uLg==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> os | |
| <span class="hljs-meta">>>> </span>num_threads = num_threads = <span class="hljs-built_in">min</span>(<span class="hljs-number">32</span>, (os.cpu_count() <span class="hljs-keyword">or</span> <span class="hljs-number">1</span>) + <span class="hljs-number">4</span>) | |
| <span class="hljs-meta">>>> </span>dataset = dataset.decode(num_threads=num_threads) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> example <span class="hljs-keyword">in</span> dataset: <span class="hljs-comment"># up to 20 times faster !</span> | |
| <span class="hljs-meta">... </span> ...`,wrap:!1}}),ge=new u({props:{code:"ZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQuZGVjb2RlKEZhbHNlKQ==",highlighted:'<span class="hljs-meta">>>> </span>dataset = dataset.decode(<span class="hljs-literal">False</span>)',wrap:!1}}),ye=new fs({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/video_load.mdx"}}),{c(){g=o("meta"),Je=l(),Te=o("p"),$e=l(),p(T.$$.fragment),ve=l(),p(w.$$.fragment),Ue=l(),M=o("blockquote"),M.innerHTML=kt,Ze=l(),J=o("p"),J.innerHTML=Gt,_e=l(),y=o("blockquote"),y.innerHTML=Rt,ke=l(),$=o("p"),$.innerHTML=Ct,Ge=l(),p(v.$$.fragment),Re=l(),b=o("blockquote"),b.innerHTML=Ft,Ce=l(),U=o("p"),U.innerHTML=Vt,Fe=l(),p(Z.$$.fragment),Ve=l(),_=o("p"),_.innerHTML=It,Ie=l(),p(k.$$.fragment),xe=l(),G=o("p"),G.innerHTML=xt,Xe=l(),p(R.$$.fragment),We=l(),C=o("p"),C.innerHTML=Xt,qe=l(),p(F.$$.fragment),Le=l(),p(V.$$.fragment),Ye=l(),I=o("p"),I.innerHTML=Wt,Be=l(),p(x.$$.fragment),He=l(),X=o("p"),X.innerHTML=qt,Ne=l(),p(W.$$.fragment),ze=l(),p(q.$$.fragment),Qe=l(),L=o("p"),L.innerHTML=Lt,Ee=l(),p(Y.$$.fragment),Se=l(),B=o("p"),B.innerHTML=Yt,Ae=l(),p(H.$$.fragment),Pe=l(),N=o("p"),N.innerHTML=Bt,De=l(),p(z.$$.fragment),Ke=l(),Q=o("p"),Q.innerHTML=Ht,Oe=l(),p(E.$$.fragment),et=l(),S=o("p"),S.innerHTML=Nt,tt=l(),p(A.$$.fragment),st=l(),P=o("p"),P.innerHTML=zt,at=l(),p(D.$$.fragment),lt=l(),K=o("p"),K.innerHTML=Qt,nt=l(),p(O.$$.fragment),ot=l(),j=o("blockquote"),j.innerHTML=Et,it=l(),p(ee.$$.fragment),dt=l(),te=o("p"),te.innerHTML=St,pt=l(),se=o("p"),se.textContent=At,ct=l(),p(ae.$$.fragment),rt=l(),p(le.$$.fragment),mt=l(),ne=o("p"),ne.innerHTML=Pt,ht=l(),oe=o("p"),oe.textContent=Dt,ft=l(),p(ie.$$.fragment),ut=l(),de=o("p"),de.innerHTML=Kt,gt=l(),pe=o("p"),pe.innerHTML=Ot,Mt=l(),p(ce.$$.fragment),yt=l(),re=o("p"),re.innerHTML=es,bt=l(),me=o("p"),me.textContent=ts,jt=l(),p(he.$$.fragment),Tt=l(),fe=o("p"),fe.innerHTML=ss,wt=l(),ue=o("p"),ue.innerHTML=as,Jt=l(),p(ge.$$.fragment),$t=l(),Me=o("p"),Me.innerHTML=ls,vt=l(),p(ye.$$.fragment),Ut=l(),we=o("p"),this.h()},l(e){const t=rs("svelte-u9bgzb",document.head);g=i(t,"META",{name:!0,content:!0}),t.forEach(s),Je=n(e),Te=i(e,"P",{}),ns(Te).forEach(s),$e=n(e),c(T.$$.fragment,e),ve=n(e),c(w.$$.fragment,e),Ue=n(e),M=i(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(M)!=="svelte-pit5yc"&&(M.innerHTML=kt),Ze=n(e),J=i(e,"P",{"data-svelte-h":!0}),d(J)!=="svelte-19vr2yr"&&(J.innerHTML=Gt),_e=n(e),y=i(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(y)!=="svelte-1op4off"&&(y.innerHTML=Rt),ke=n(e),$=i(e,"P",{"data-svelte-h":!0}),d($)!=="svelte-8qkahe"&&($.innerHTML=Ct),Ge=n(e),c(v.$$.fragment,e),Re=n(e),b=i(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(b)!=="svelte-1wiph16"&&(b.innerHTML=Ft),Ce=n(e),U=i(e,"P",{"data-svelte-h":!0}),d(U)!=="svelte-12zk6g2"&&(U.innerHTML=Vt),Fe=n(e),c(Z.$$.fragment,e),Ve=n(e),_=i(e,"P",{"data-svelte-h":!0}),d(_)!=="svelte-nrlgn7"&&(_.innerHTML=It),Ie=n(e),c(k.$$.fragment,e),xe=n(e),G=i(e,"P",{"data-svelte-h":!0}),d(G)!=="svelte-i0luyb"&&(G.innerHTML=xt),Xe=n(e),c(R.$$.fragment,e),We=n(e),C=i(e,"P",{"data-svelte-h":!0}),d(C)!=="svelte-11lu8aj"&&(C.innerHTML=Xt),qe=n(e),c(F.$$.fragment,e),Le=n(e),c(V.$$.fragment,e),Ye=n(e),I=i(e,"P",{"data-svelte-h":!0}),d(I)!=="svelte-ai0w8e"&&(I.innerHTML=Wt),Be=n(e),c(x.$$.fragment,e),He=n(e),X=i(e,"P",{"data-svelte-h":!0}),d(X)!=="svelte-ohum64"&&(X.innerHTML=qt),Ne=n(e),c(W.$$.fragment,e),ze=n(e),c(q.$$.fragment,e),Qe=n(e),L=i(e,"P",{"data-svelte-h":!0}),d(L)!=="svelte-1m60srx"&&(L.innerHTML=Lt),Ee=n(e),c(Y.$$.fragment,e),Se=n(e),B=i(e,"P",{"data-svelte-h":!0}),d(B)!=="svelte-16jmtkb"&&(B.innerHTML=Yt),Ae=n(e),c(H.$$.fragment,e),Pe=n(e),N=i(e,"P",{"data-svelte-h":!0}),d(N)!=="svelte-gz9s4o"&&(N.innerHTML=Bt),De=n(e),c(z.$$.fragment,e),Ke=n(e),Q=i(e,"P",{"data-svelte-h":!0}),d(Q)!=="svelte-1x8wxub"&&(Q.innerHTML=Ht),Oe=n(e),c(E.$$.fragment,e),et=n(e),S=i(e,"P",{"data-svelte-h":!0}),d(S)!=="svelte-cnb373"&&(S.innerHTML=Nt),tt=n(e),c(A.$$.fragment,e),st=n(e),P=i(e,"P",{"data-svelte-h":!0}),d(P)!=="svelte-svotpi"&&(P.innerHTML=zt),at=n(e),c(D.$$.fragment,e),lt=n(e),K=i(e,"P",{"data-svelte-h":!0}),d(K)!=="svelte-1jur300"&&(K.innerHTML=Qt),nt=n(e),c(O.$$.fragment,e),ot=n(e),j=i(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),d(j)!=="svelte-1523k5v"&&(j.innerHTML=Et),it=n(e),c(ee.$$.fragment,e),dt=n(e),te=i(e,"P",{"data-svelte-h":!0}),d(te)!=="svelte-irvxkn"&&(te.innerHTML=St),pt=n(e),se=i(e,"P",{"data-svelte-h":!0}),d(se)!=="svelte-1e5cszd"&&(se.textContent=At),ct=n(e),c(ae.$$.fragment,e),rt=n(e),c(le.$$.fragment,e),mt=n(e),ne=i(e,"P",{"data-svelte-h":!0}),d(ne)!=="svelte-i0pb1i"&&(ne.innerHTML=Pt),ht=n(e),oe=i(e,"P",{"data-svelte-h":!0}),d(oe)!=="svelte-1b7s8ts"&&(oe.textContent=Dt),ft=n(e),c(ie.$$.fragment,e),ut=n(e),de=i(e,"P",{"data-svelte-h":!0}),d(de)!=="svelte-5c9be6"&&(de.innerHTML=Kt),gt=n(e),pe=i(e,"P",{"data-svelte-h":!0}),d(pe)!=="svelte-1vpwwpu"&&(pe.innerHTML=Ot),Mt=n(e),c(ce.$$.fragment,e),yt=n(e),re=i(e,"P",{"data-svelte-h":!0}),d(re)!=="svelte-11nbdtc"&&(re.innerHTML=es),bt=n(e),me=i(e,"P",{"data-svelte-h":!0}),d(me)!=="svelte-fdii9b"&&(me.textContent=ts),jt=n(e),c(he.$$.fragment,e),Tt=n(e),fe=i(e,"P",{"data-svelte-h":!0}),d(fe)!=="svelte-1hi5tci"&&(fe.innerHTML=ss),wt=n(e),ue=i(e,"P",{"data-svelte-h":!0}),d(ue)!=="svelte-1n6aqp1"&&(ue.innerHTML=as),Jt=n(e),c(ge.$$.fragment,e),$t=n(e),Me=i(e,"P",{"data-svelte-h":!0}),d(Me)!=="svelte-c5n68w"&&(Me.innerHTML=ls),vt=n(e),c(ye.$$.fragment,e),Ut=n(e),we=i(e,"P",{}),ns(we).forEach(s),this.h()},h(){je(g,"name","hf:doc:metadata"),je(g,"content",gs),je(M,"class","warning"),je(y,"class","tip"),je(b,"class","warning"),je(j,"class","tip")},m(e,t){ms(document.head,g),a(e,Je,t),a(e,Te,t),a(e,$e,t),r(T,e,t),a(e,ve,t),r(w,e,t),a(e,Ue,t),a(e,M,t),a(e,Ze,t),a(e,J,t),a(e,_e,t),a(e,y,t),a(e,ke,t),a(e,$,t),a(e,Ge,t),r(v,e,t),a(e,Re,t),a(e,b,t),a(e,Ce,t),a(e,U,t),a(e,Fe,t),r(Z,e,t),a(e,Ve,t),a(e,_,t),a(e,Ie,t),r(k,e,t),a(e,xe,t),a(e,G,t),a(e,Xe,t),r(R,e,t),a(e,We,t),a(e,C,t),a(e,qe,t),r(F,e,t),a(e,Le,t),r(V,e,t),a(e,Ye,t),a(e,I,t),a(e,Be,t),r(x,e,t),a(e,He,t),a(e,X,t),a(e,Ne,t),r(W,e,t),a(e,ze,t),r(q,e,t),a(e,Qe,t),a(e,L,t),a(e,Ee,t),r(Y,e,t),a(e,Se,t),a(e,B,t),a(e,Ae,t),r(H,e,t),a(e,Pe,t),a(e,N,t),a(e,De,t),r(z,e,t),a(e,Ke,t),a(e,Q,t),a(e,Oe,t),r(E,e,t),a(e,et,t),a(e,S,t),a(e,tt,t),r(A,e,t),a(e,st,t),a(e,P,t),a(e,at,t),r(D,e,t),a(e,lt,t),a(e,K,t),a(e,nt,t),r(O,e,t),a(e,ot,t),a(e,j,t),a(e,it,t),r(ee,e,t),a(e,dt,t),a(e,te,t),a(e,pt,t),a(e,se,t),a(e,ct,t),r(ae,e,t),a(e,rt,t),r(le,e,t),a(e,mt,t),a(e,ne,t),a(e,ht,t),a(e,oe,t),a(e,ft,t),r(ie,e,t),a(e,ut,t),a(e,de,t),a(e,gt,t),a(e,pe,t),a(e,Mt,t),r(ce,e,t),a(e,yt,t),a(e,re,t),a(e,bt,t),a(e,me,t),a(e,jt,t),r(he,e,t),a(e,Tt,t),a(e,fe,t),a(e,wt,t),a(e,ue,t),a(e,Jt,t),r(ge,e,t),a(e,$t,t),a(e,Me,t),a(e,vt,t),r(ye,e,t),a(e,Ut,t),a(e,we,t),Zt=!0},p:is,i(e){Zt||(m(T.$$.fragment,e),m(w.$$.fragment,e),m(v.$$.fragment,e),m(Z.$$.fragment,e),m(k.$$.fragment,e),m(R.$$.fragment,e),m(F.$$.fragment,e),m(V.$$.fragment,e),m(x.$$.fragment,e),m(W.$$.fragment,e),m(q.$$.fragment,e),m(Y.$$.fragment,e),m(H.$$.fragment,e),m(z.$$.fragment,e),m(E.$$.fragment,e),m(A.$$.fragment,e),m(D.$$.fragment,e),m(O.$$.fragment,e),m(ee.$$.fragment,e),m(ae.$$.fragment,e),m(le.$$.fragment,e),m(ie.$$.fragment,e),m(ce.$$.fragment,e),m(he.$$.fragment,e),m(ge.$$.fragment,e),m(ye.$$.fragment,e),Zt=!0)},o(e){h(T.$$.fragment,e),h(w.$$.fragment,e),h(v.$$.fragment,e),h(Z.$$.fragment,e),h(k.$$.fragment,e),h(R.$$.fragment,e),h(F.$$.fragment,e),h(V.$$.fragment,e),h(x.$$.fragment,e),h(W.$$.fragment,e),h(q.$$.fragment,e),h(Y.$$.fragment,e),h(H.$$.fragment,e),h(z.$$.fragment,e),h(E.$$.fragment,e),h(A.$$.fragment,e),h(D.$$.fragment,e),h(O.$$.fragment,e),h(ee.$$.fragment,e),h(ae.$$.fragment,e),h(le.$$.fragment,e),h(ie.$$.fragment,e),h(ce.$$.fragment,e),h(he.$$.fragment,e),h(ge.$$.fragment,e),h(ye.$$.fragment,e),Zt=!1},d(e){e&&(s(Je),s(Te),s($e),s(ve),s(Ue),s(M),s(Ze),s(J),s(_e),s(y),s(ke),s($),s(Ge),s(Re),s(b),s(Ce),s(U),s(Fe),s(Ve),s(_),s(Ie),s(xe),s(G),s(Xe),s(We),s(C),s(qe),s(Le),s(Ye),s(I),s(Be),s(He),s(X),s(Ne),s(ze),s(Qe),s(L),s(Ee),s(Se),s(B),s(Ae),s(Pe),s(N),s(De),s(Ke),s(Q),s(Oe),s(et),s(S),s(tt),s(st),s(P),s(at),s(lt),s(K),s(nt),s(ot),s(j),s(it),s(dt),s(te),s(pt),s(se),s(ct),s(rt),s(mt),s(ne),s(ht),s(oe),s(ft),s(ut),s(de),s(gt),s(pe),s(Mt),s(yt),s(re),s(bt),s(me),s(jt),s(Tt),s(fe),s(wt),s(ue),s(Jt),s($t),s(Me),s(vt),s(Ut),s(we)),s(g),f(T,e),f(w,e),f(v,e),f(Z,e),f(k,e),f(R,e),f(F,e),f(V,e),f(x,e),f(W,e),f(q,e),f(Y,e),f(H,e),f(z,e),f(E,e),f(A,e),f(D,e),f(O,e),f(ee,e),f(ae,e),f(le,e),f(ie,e),f(ce,e),f(he,e),f(ge,e),f(ye,e)}}}const gs='{"title":"Load video data","local":"load-video-data","sections":[{"title":"Read frames","local":"read-frames","sections":[],"depth":2},{"title":"Local files","local":"local-files","sections":[],"depth":2},{"title":"VideoFolder","local":"videofolder","sections":[],"depth":2},{"title":"WebDataset","local":"webdataset","sections":[],"depth":2},{"title":"Lance","local":"lance","sections":[],"depth":2},{"title":"Video decoding","local":"video-decoding","sections":[],"depth":2}],"depth":1}';function Ms(_t){return ds(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ws extends ps{constructor(g){super(),cs(this,g,Ms,us,os,{})}}export{ws as component}; | |
Xet Storage Details
- Size:
- 31.4 kB
- Xet hash:
- 86cc09b8b81e454bd080ab0136289b4a3785704bd1dde7489f0f80d121414b90
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.