Buckets:
| import{s as so,o as oo,n as I}from"../chunks/scheduler.d75c11ed.js";import{S as io,i as lo,e as d,s as o,c,h as ro,a as p,d as a,b as i,f as w,g,j as $,k as N,l as v,m as s,n as m,t as f,o as u,p as _}from"../chunks/index.4ec9dfe9.js";import{C as po,H as U,E as co}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.1fd9202e.js";import{D as k}from"../chunks/Docstring.52dae968.js";import{C as M}from"../chunks/CodeBlock.9181a37c.js";import{E as J}from"../chunks/ExampleCodeBlock.2122371b.js";function go(D){let l,x="Load a dataset from the Hugging Face Hub:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2Nvcm5lbGwtbW92aWUtcmV2aWV3LWRhdGElMkZyb3R0ZW5fdG9tYXRvZXMnJTJDJTIwc3BsaXQlM0QndHJhaW4nKSUwQSUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQWRzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCdueXUtbWxsJTJGZ2x1ZSclMkMlMjAnc3N0MiclMkMlMjBzcGxpdCUzRCd0cmFpbicpJTBBJTBBZGF0YV9maWxlcyUyMCUzRCUyMCU3Qid0cmFpbiclM0ElMjAndHJhaW4uY3N2JyUyQyUyMCd0ZXN0JyUzQSUyMCd0ZXN0LmNzdiclN0QlMEFkcyUyMCUzRCUyMGxvYWRfZGF0YXNldCgnbmFtZXNwYWNlJTJGeW91cl9kYXRhc2V0X25hbWUnJTJDJTIwZGF0YV9maWxlcyUzRGRhdGFfZmlsZXMpJTBBJTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ25hbWVzcGFjZSUyRnlvdXJfZGF0YXNldF9uYW1lJyUyQyUyMGRhdGFfZGlyJTNEJ2ZvbGRlcl9uYW1lJyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>, split=<span class="hljs-string">'train'</span>) | |
| <span class="hljs-comment"># Load a subset or dataset configuration (here 'sst2')</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'nyu-mll/glue'</span>, <span class="hljs-string">'sst2'</span>, split=<span class="hljs-string">'train'</span>) | |
| <span class="hljs-comment"># Manual mapping of data files to splits</span> | |
| <span class="hljs-meta">>>> </span>data_files = {<span class="hljs-string">'train'</span>: <span class="hljs-string">'train.csv'</span>, <span class="hljs-string">'test'</span>: <span class="hljs-string">'test.csv'</span>} | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'namespace/your_dataset_name'</span>, data_files=data_files) | |
| <span class="hljs-comment"># Manual selection of a directory to load</span> | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'namespace/your_dataset_name'</span>, data_dir=<span class="hljs-string">'folder_name'</span>)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-cpjyx5"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function mo(D){let l,x="Load a dataset from a Storage Bucket on the Hugging Face Hub:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2J1Y2tldHMlMkZ1c2VybmFtZSUyRmJ1Y2tldF9uYW1lJTJGcm90dGVuX3RvbWF0b2VzJyUyQyUyMHNwbGl0JTNEJ3RyYWluJyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'buckets/username/bucket_name/rotten_tomatoes'</span>, split=<span class="hljs-string">'train'</span>)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-uychkg"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function fo(D){let l,x="Load a local dataset:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2NzdiclMkMlMjBkYXRhX2ZpbGVzJTNEJ3BhdGglMkZ0byUyRmxvY2FsJTJGbXlfZGF0YXNldC5jc3YnKSUwQSUwQWZyb20lMjBkYXRhc2V0cyUyMGltcG9ydCUyMGxvYWRfZGF0YXNldCUwQWRzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCdqc29uJyUyQyUyMGRhdGFfZmlsZXMlM0QncGF0aCUyRnRvJTJGbG9jYWwlMkZteV9kYXRhc2V0Lmpzb24nKQ==",highlighted:`<span class="hljs-comment"># Load a CSV file</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'csv'</span>, data_files=<span class="hljs-string">'path/to/local/my_dataset.csv'</span>) | |
| <span class="hljs-comment"># Load a JSON file</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'json'</span>, data_files=<span class="hljs-string">'path/to/local/my_dataset.json'</span>)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-18tmtyu"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function uo(D){let l,x='Load an <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.IterableDataset">IterableDataset</a>:',y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2Nvcm5lbGwtbW92aWUtcmV2aWV3LWRhdGElMkZyb3R0ZW5fdG9tYXRvZXMnJTJDJTIwc3BsaXQlM0QndHJhaW4nJTJDJTIwc3RyZWFtaW5nJTNEVHJ1ZSk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>, split=<span class="hljs-string">'train'</span>, streaming=<span class="hljs-literal">True</span>)`,wrap:!1}}),{c(){l=d("p"),l.innerHTML=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-l8auu3"&&(l.innerHTML=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function _o(D){let l,x="Load an image dataset with the <code>ImageFolder</code> dataset builder:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJ2ltYWdlZm9sZGVyJyUyQyUyMGRhdGFfZGlyJTNEJyUyRnBhdGglMkZ0byUyRmltYWdlcyclMkMlMjBzcGxpdCUzRCd0cmFpbicp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(<span class="hljs-string">'imagefolder'</span>, data_dir=<span class="hljs-string">'/path/to/images'</span>, split=<span class="hljs-string">'train'</span>)`,wrap:!1}}),{c(){l=d("p"),l.innerHTML=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-9alpt2"&&(l.innerHTML=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function ho(D){let l,x="Example:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9mcm9tX2Rpc2slMEFkcyUyMCUzRCUyMGxvYWRfZnJvbV9kaXNrKCdwYXRoJTJGdG8lMkZkYXRhc2V0JTJGZGlyZWN0b3J5Jyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_from_disk | |
| <span class="hljs-meta">>>> </span>ds = load_from_disk(<span class="hljs-string">'path/to/dataset/directory'</span>)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-11lpom8"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function vo(D){let l,x="Example:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIlMEFkc19idWlsZGVyJTIwJTNEJTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIoJ2Nvcm5lbGwtbW92aWUtcmV2aWV3LWRhdGElMkZyb3R0ZW5fdG9tYXRvZXMnKSUwQWRzX2J1aWxkZXIuaW5mby5mZWF0dXJlcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset_builder | |
| <span class="hljs-meta">>>> </span>ds_builder = load_dataset_builder(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>) | |
| <span class="hljs-meta">>>> </span>ds_builder.info.features | |
| {<span class="hljs-string">'label'</span>: ClassLabel(names=[<span class="hljs-string">'neg'</span>, <span class="hljs-string">'pos'</span>]), | |
| <span class="hljs-string">'text'</span>: Value(<span class="hljs-string">'string'</span>)}`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-11lpom8"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function yo(D){let l,x="Example:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwZ2V0X2RhdGFzZXRfY29uZmlnX25hbWVzJTBBZ2V0X2RhdGFzZXRfY29uZmlnX25hbWVzKCUyMm55dS1tbGwlMkZnbHVlJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> get_dataset_config_names | |
| <span class="hljs-meta">>>> </span>get_dataset_config_names(<span class="hljs-string">"nyu-mll/glue"</span>) | |
| [<span class="hljs-string">'cola'</span>, | |
| <span class="hljs-string">'sst2'</span>, | |
| <span class="hljs-string">'mrpc'</span>, | |
| <span class="hljs-string">'qqp'</span>, | |
| <span class="hljs-string">'stsb'</span>, | |
| <span class="hljs-string">'mnli'</span>, | |
| <span class="hljs-string">'mnli_mismatched'</span>, | |
| <span class="hljs-string">'mnli_matched'</span>, | |
| <span class="hljs-string">'qnli'</span>, | |
| <span class="hljs-string">'rte'</span>, | |
| <span class="hljs-string">'wnli'</span>, | |
| <span class="hljs-string">'ax'</span>]`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-11lpom8"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function bo(D){let l,x="Example:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwZ2V0X2RhdGFzZXRfaW5mb3MlMEFnZXRfZGF0YXNldF9pbmZvcygnY29ybmVsbC1tb3ZpZS1yZXZpZXctZGF0YSUyRnJvdHRlbl90b21hdG9lcycp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> get_dataset_infos | |
| <span class="hljs-meta">>>> </span>get_dataset_infos(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>) | |
| {<span class="hljs-string">'default'</span>: DatasetInfo(description=<span class="hljs-string">"Movie Review Dataset. | |
| is a dataset of containing 5,331 positive and 5,331 negative processed | |
| ences from Rotten Tomatoes movie reviews...), ...}</span>`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-11lpom8"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function $o(D){let l,x="Example:",y,r,h;return r=new M({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwZ2V0X2RhdGFzZXRfc3BsaXRfbmFtZXMlMEFnZXRfZGF0YXNldF9zcGxpdF9uYW1lcygnY29ybmVsbC1tb3ZpZS1yZXZpZXctZGF0YSUyRnJvdHRlbl90b21hdG9lcycp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> get_dataset_split_names | |
| <span class="hljs-meta">>>> </span>get_dataset_split_names(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>) | |
| [<span class="hljs-string">'train'</span>, <span class="hljs-string">'validation'</span>, <span class="hljs-string">'test'</span>]`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-11lpom8"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function xo(D){let l,x="Load a subset of columns:",y,r,h;return r=new M({props:{code:"ZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQocGFycXVldF9kYXRhc2V0X2lkJTJDJTIwY29sdW1ucyUzRCU1QiUyMmNvbF8wJTIyJTJDJTIwJTIyY29sXzElMjIlNUQp",highlighted:'<span class="hljs-meta">>>> </span>ds = load_dataset(parquet_dataset_id, columns=[<span class="hljs-string">"col_0"</span>, <span class="hljs-string">"col_1"</span>])',wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-rs9qaj"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function wo(D){let l,x="Stream data and efficiently filter data, possibly skipping entire files or row groups:",y,r,h;return r=new M({props:{code:"ZmlsdGVycyUyMCUzRCUyMCU1QiglMjJjb2xfMCUyMiUyQyUyMCUyMiUzRCUzRCUyMiUyQyUyMDApJTVEJTBBZHMlMjAlM0QlMjBsb2FkX2RhdGFzZXQocGFycXVldF9kYXRhc2V0X2lkJTJDJTIwc3RyZWFtaW5nJTNEVHJ1ZSUyQyUyMGZpbHRlcnMlM0RmaWx0ZXJzKQ==",highlighted:`<span class="hljs-meta">>>> </span>filters = [(<span class="hljs-string">"col_0"</span>, <span class="hljs-string">"=="</span>, <span class="hljs-number">0</span>)] | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(parquet_dataset_id, streaming=<span class="hljs-literal">True</span>, filters=filters)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-e0sf1a"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function No(D){let l,x="Increase the minimum request size when streaming from 32MiB (default) to 128MiB and enable prefetching:",y,r,h;return r=new M({props:{code:"aW1wb3J0JTIwcHlhcnJvdyUwQWltcG9ydCUyMHB5YXJyb3cuZGF0YXNldCUwQWZyYWdtZW50X3NjYW5fb3B0aW9ucyUyMCUzRCUyMHB5YXJyb3cuZGF0YXNldC5QYXJxdWV0RnJhZ21lbnRTY2FuT3B0aW9ucyglMEElMjAlMjAlMjAlMjBjYWNoZV9vcHRpb25zJTNEcHlhcnJvdy5DYWNoZU9wdGlvbnMoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJlZmV0Y2hfbGltaXQlM0QxJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmFuZ2Vfc2l6ZV9saW1pdCUzRDEyOCUyMCUzQyUzQyUyMDIwJTBBJTIwJTIwJTIwJTIwKSUyQyUwQSklMEFkcyUyMCUzRCUyMGxvYWRfZGF0YXNldChwYXJxdWV0X2RhdGFzZXRfaWQlMkMlMjBzdHJlYW1pbmclM0RUcnVlJTJDJTIwZnJhZ21lbnRfc2Nhbl9vcHRpb25zJTNEZnJhZ21lbnRfc2Nhbl9vcHRpb25zKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> pyarrow | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> pyarrow.dataset | |
| <span class="hljs-meta">>>> </span>fragment_scan_options = pyarrow.dataset.ParquetFragmentScanOptions( | |
| <span class="hljs-meta">... </span> cache_options=pyarrow.CacheOptions( | |
| <span class="hljs-meta">... </span> prefetch_limit=<span class="hljs-number">1</span>, | |
| <span class="hljs-meta">... </span> range_size_limit=<span class="hljs-number">128</span> << <span class="hljs-number">20</span> | |
| <span class="hljs-meta">... </span> ), | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>ds = load_dataset(parquet_dataset_id, streaming=<span class="hljs-literal">True</span>, fragment_scan_options=fragment_scan_options)`,wrap:!1}}),{c(){l=d("p"),l.textContent=x,y=o(),c(r.$$.fragment)},l(t){l=p(t,"P",{"data-svelte-h":!0}),$(l)!=="svelte-1s7mwad"&&(l.textContent=x),y=i(t),g(r.$$.fragment,t)},m(t,b){s(t,l,b),s(t,y,b),m(r,t,b),h=!0},p:I,i(t){h||(f(r.$$.fragment,t),h=!0)},o(t){u(r.$$.fragment,t),h=!1},d(t){t&&(a(l),a(y)),_(r,t)}}}function ko(D){let l,x,y,r,h,t,b,fa,he,us="Methods for listing and loading datasets:",ua,ve,_a,C,ye,Dn,It,_s="Load a dataset from the Hugging Face Hub, or a local dataset.",Tn,qt,hs='You can find the list of datasets on the <a href="https://huggingface.co/datasets" rel="nofollow">Hub</a> or with <code>huggingface_hub.list_datasets</code>.',On,Zt,vs=`A dataset is a directory that contains some data files in generic formats (JSON, CSV, Parquet, etc.) and possibly | |
| in a generic structure (Webdataset, ImageFolder, AudioFolder, VideoFolder, etc.)`,Fn,Rt,ys="This function does the following under the hood:",Un,Vt,bs=`<li><p>Load a dataset builder:</p> <ul><li>Find the most common data format in the dataset and pick its associated builder (JSON, CSV, Parquet, Webdataset, ImageFolder, AudioFolder, etc.)</li> <li>Find which file goes into which split (e.g. train/test) based on file and directory names or on the YAML configuration</li> <li>It is also possible to specify <code>data_files</code> manually, and which dataset builder to use (e.g. “parquet”).</li></ul></li> <li><p>Run the dataset builder:</p> <p>In the general case:</p> <ul><li><p>Download the data files from the dataset if they are not already available locally or cached.</p></li> <li><p>Process and cache the dataset in typed Arrow tables for caching.</p> <p>Arrow table are arbitrarily long, typed tables which can store nested objects and be mapped to numpy/pandas/python generic types. | |
| They can be directly accessed from disk, loaded in RAM or even streamed over the web.</p></li></ul> <p>In the streaming case:</p> <ul><li>Don’t download or cache anything. Instead, the dataset is lazily loaded and will be streamed on-the-fly when iterating on it.</li></ul></li> <li><p>Return a dataset built from the requested splits in <code>split</code> (default: all).</p></li>`,jn,Gt,$s="Example:",Mn,se,Jn,oe,In,ie,qn,le,Zn,re,ha,Z,be,Rn,Xt,xs=`Loads a dataset that was previously saved using <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.Dataset.save_to_disk">save_to_disk()</a> from a dataset directory, or | |
| from a filesystem using any implementation of <code>fsspec.spec.AbstractFileSystem</code>.`,Vn,de,va,O,$e,Gn,St,ws="Load a dataset builder which can be used to:",Xn,zt,Ns="<li>Inspect general information that is required to build a dataset (cache directory, config, dataset info, features, data files, etc.)</li> <li>Download and prepare the dataset as Arrow files in the cache</li> <li>Get a streaming dataset without downloading or caching anything</li>",Sn,Et,ks='You can find the list of datasets on the <a href="https://huggingface.co/datasets" rel="nofollow">Hub</a> or with <code>huggingface_hub.list_datasets</code>.',zn,Pt,Cs=`A dataset is a directory that contains some data files in generic formats (JSON, CSV, Parquet, etc.) and possibly | |
| in a generic structure (Webdataset, ImageFolder, AudioFolder, VideoFolder, etc.)`,En,pe,ya,R,xe,Pn,Wt,Ds="Get the list of available config names for a particular dataset.",Wn,ce,ba,V,we,Hn,Ht,Ts="Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.",Ln,ge,$a,G,Ne,Bn,Lt,Os="Get the list of available splits for a particular config and dataset.",Yn,me,xa,ke,wa,Ce,Fs=`Configurations used to load data files. | |
| They are used when loading local files or a dataset repository:`,Na,De,Us="<li>local files: <code>load_dataset("parquet", data_dir="path/to/data/dir")</code></li> <li>dataset repository: <code>load_dataset("allenai/c4")</code></li>",ka,Te,js=`You can pass arguments to <code>load_dataset</code> to configure data loading. | |
| For example you can specify the <code>sep</code> parameter to define the <a href="/docs/datasets/pr_8137/en/package_reference/loading_methods#datasets.packaged_modules.csv.CsvConfig">CsvConfig</a> that is used to load the data:`,Ca,Oe,Da,Fe,Ta,X,Ue,An,Bt,Ms="BuilderConfig for text files.",Oa,je,Me,Fa,Je,Ua,S,Ie,Qn,Yt,Js="BuilderConfig for CSV.",ja,qe,Ze,Ma,Re,Ja,z,Ve,Kn,At,Is="BuilderConfig for JSON.",Ia,Ge,Xe,qa,Se,Za,E,ze,es,Qt,qs="BuilderConfig for xml files.",Ra,Ee,Pe,Va,We,Ga,F,He,ts,Kt,Zs="BuilderConfig for Parquet.",as,ea,Rs="Example:",ns,fe,ss,ue,os,_e,Xa,Le,Be,Sa,Ye,za,P,Ae,is,ta,Vs="BuilderConfig for Arrow.",Ea,Qe,Ke,Pa,et,Wa,W,tt,ls,aa,Gs="BuilderConfig for SQL.",Ha,at,nt,La,st,Ba,H,ot,rs,na,Xs="BuilderConfig for ImageFolder.",Ya,it,lt,Aa,rt,Qa,L,dt,ds,sa,Ss="Builder Config for AudioFolder.",Ka,pt,ct,en,gt,tn,B,mt,ps,oa,zs="BuilderConfig for ImageFolder.",an,ft,ut,nn,_t,sn,Y,ht,cs,ia,Es="BuilderConfig for HDF5.",on,A,vt,gs,la,Ps="ArrowBasedBuilder that converts HDF5 files to Arrow tables using the HF extension types.",ln,yt,rn,Q,bt,ms,ra,Ws="BuilderConfig for ImageFolder.",dn,$t,xt,pn,wt,cn,K,Nt,fs,da,Hs="BuilderConfig for NiftiFolder.",gn,kt,Ct,mn,Dt,fn,Tt,Ot,un,Ft,_n,pa,hn;return h=new po({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new U({props:{title:"Loading methods",local:"loading-methods",headingTag:"h1"}}),ve=new U({props:{title:"Datasets",local:"datasets.load_dataset",headingTag:"h2"}}),ye=new k({props:{name:"datasets.load_dataset",anchor:"datasets.load_dataset",parameters:[{name:"path",val:": str"},{name:"name",val:": typing.Optional[str] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, collections.abc.Sequence[str], collections.abc.Mapping[str, typing.Union[str, collections.abc.Sequence[str]]], NoneType] = None"},{name:"split",val:": typing.Union[str, datasets.splits.Split, list[str], list[datasets.splits.Split], NoneType] = None"},{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"verification_mode",val:": typing.Union[datasets.utils.info_utils.VerificationMode, str, NoneType] = None"},{name:"keep_in_memory",val:": typing.Optional[bool] = None"},{name:"save_infos",val:": bool = False"},{name:"revision",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"streaming",val:": bool = False"},{name:"num_proc",val:": typing.Optional[int] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"**config_kwargs",val:""}],parametersDescription:[{anchor:"datasets.load_dataset.path",description:`<strong>path</strong> (<code>str</code>) — | |
| Path or name of the dataset.</p> | |
| <ul> | |
| <li> | |
| <p>if <code>path</code> is a dataset repository on the HF hub (list all available datasets with <code>huggingface_hub.list_datasets</code>) | |
| -> load the dataset from supported files in the repository (csv, json, parquet, etc.) | |
| e.g. <code>'username/dataset_name'</code>, a dataset repository on the HF hub containing the data files.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is a directory within a Storage Bucket on the HF Hub (list your buckets with <code>huggingface_hub.list_buckets</code>) | |
| -> load the dataset from supported files in the directory (csv, json, parquet, etc.) | |
| e.g. <code>'buckets/username/bucket_name/my_dataset'</code>.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is a local directory | |
| -> load the dataset from supported files in the directory (csv, json, parquet, etc.) | |
| e.g. <code>'./path/to/directory/with/my/csv/data'</code>.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is the name of a dataset builder and <code>data_files</code> or <code>data_dir</code> is specified | |
| (available builders are “json”, “csv”, “parquet”, “arrow”, “text”, “xml”, “webdataset”, “imagefolder”, “audiofolder”, “videofolder”) | |
| -> load the dataset from the files in <code>data_files</code> or <code>data_dir</code> | |
| e.g. <code>'parquet'</code>.</p> | |
| </li> | |
| </ul> | |
| <p>Use a <code>hf://</code> path like <code>'hf://datasets/username/dataset_name'</code> to allow remote only. | |
| Use an absolute path to allow local only.`,name:"path"},{anchor:"datasets.load_dataset.name",description:`<strong>name</strong> (<code>str</code>, <em>optional</em>) — | |
| Defining the name of the dataset configuration.`,name:"name"},{anchor:"datasets.load_dataset.data_dir",description:`<strong>data_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Defining the <code>data_dir</code> of the dataset configuration. If specified for the generic builders (csv, text etc.) or the Hub datasets and <code>data_files</code> is <code>None</code>, | |
| the behavior is equal to passing <code>os.path.join(data_dir, **)</code> as <code>data_files</code> to reference all the files in a directory.`,name:"data_dir"},{anchor:"datasets.load_dataset.data_files",description:`<strong>data_files</strong> (<code>str</code> or <code>Sequence</code> or <code>Mapping</code>, <em>optional</em>) — | |
| Path(s) to source data file(s).`,name:"data_files"},{anchor:"datasets.load_dataset.split",description:`<strong>split</strong> (<code>Split</code> or <code>str</code>) — | |
| Which split of the data to load. | |
| If <code>None</code>, will return a <code>dict</code> with all splits (typically <code>datasets.Split.TRAIN</code> and <code>datasets.Split.TEST</code>). | |
| If given, will return a single Dataset. | |
| Splits can be combined and specified like in tensorflow-datasets.`,name:"split"},{anchor:"datasets.load_dataset.cache_dir",description:`<strong>cache_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Directory to read/write data. Defaults to <code>"~/.cache/huggingface/datasets"</code>.`,name:"cache_dir"},{anchor:"datasets.load_dataset.features",description:`<strong>features</strong> (<code>Features</code>, <em>optional</em>) — | |
| Set the features type to use for this dataset.`,name:"features"},{anchor:"datasets.load_dataset.download_config",description:`<strong>download_config</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.load_dataset.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, defaults to <code>REUSE_DATASET_IF_EXISTS</code>) — | |
| Download/generate mode.`,name:"download_mode"},{anchor:"datasets.load_dataset.verification_mode",description:`<strong>verification_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.VerificationMode">VerificationMode</a> or <code>str</code>, defaults to <code>BASIC_CHECKS</code>) — | |
| Verification mode determining the checks to run on the downloaded/processed dataset information (checksums/size/splits/…).</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.9.1</p> | |
| </div>`,name:"verification_mode"},{anchor:"datasets.load_dataset.keep_in_memory",description:`<strong>keep_in_memory</strong> (<code>bool</code>, defaults to <code>None</code>) — | |
| Whether to copy the dataset in-memory. If <code>None</code>, the dataset | |
| will not be copied in-memory unless explicitly enabled by setting <code>datasets.config.IN_MEMORY_MAX_SIZE</code> to | |
| nonzero. See more details in the <a href="../cache#improve-performance">improve performance</a> section.`,name:"keep_in_memory"},{anchor:"datasets.load_dataset.revision",description:`<strong>revision</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.Version">Version</a> or <code>str</code>, <em>optional</em>) — | |
| Version of the dataset to load. | |
| As datasets have their own git repository on the Datasets Hub, the default version “main” corresponds to their “main” branch. | |
| You can specify a different version than the default “main” by using a commit SHA or a git tag of the dataset repository.`,name:"revision"},{anchor:"datasets.load_dataset.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| Optional string or boolean to use as Bearer token for remote files on the Datasets Hub. | |
| If <code>True</code>, or not specified, will get token from <code>"~/.huggingface"</code>.`,name:"token"},{anchor:"datasets.load_dataset.streaming",description:`<strong>streaming</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If set to <code>True</code>, don’t download the data files. Instead, it streams the data progressively while | |
| iterating on the dataset. An <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.IterableDataset">IterableDataset</a> or <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.IterableDatasetDict">IterableDatasetDict</a> is returned instead in this case.</p> | |
| <p>Note that streaming works for datasets that use data formats that support being iterated over like txt, csv, jsonl for example. | |
| Json files may be downloaded completely. Also streaming from remote zip or gzip files is supported but other compressed formats | |
| like rar and xz are not yet supported. The tgz format doesn’t allow streaming.`,name:"streaming"},{anchor:"datasets.load_dataset.num_proc",description:`<strong>num_proc</strong> (<code>int</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Number of processes when downloading and generating the dataset locally. | |
| Multiprocessing is disabled by default.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.7.0</p> | |
| </div>`,name:"num_proc"},{anchor:"datasets.load_dataset.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| <strong>Experimental</strong>. Key/value pairs to be passed on to the dataset file-system backend, if any.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.11.0</p> | |
| </div>`,name:"storage_options"},{anchor:"datasets.load_dataset.*config_kwargs",description:`*<strong>*config_kwargs</strong> (additional keyword arguments) — | |
| Keyword arguments to be passed to the <code>BuilderConfig</code> | |
| and used in the <a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DatasetBuilder">DatasetBuilder</a>.`,name:"*config_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/load.py#L1467",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <ul> | |
| <li>if <code>split</code> is not <code>None</code>: the dataset requested,</li> | |
| <li>if <code>split</code> is <code>None</code>, a <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.DatasetDict" | |
| >DatasetDict</a> with each split.</li> | |
| </ul> | |
| <p>or <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.IterableDataset" | |
| >IterableDataset</a> or <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.IterableDatasetDict" | |
| >IterableDatasetDict</a>: if <code>streaming=True</code></p> | |
| <ul> | |
| <li>if <code>split</code> is not <code>None</code>, the dataset is requested</li> | |
| <li>if <code>split</code> is <code>None</code>, a <code>~datasets.streaming.IterableDatasetDict</code> with each split.</li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.Dataset" | |
| >Dataset</a> or <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.DatasetDict" | |
| >DatasetDict</a></p> | |
| `}}),se=new J({props:{anchor:"datasets.load_dataset.example",$$slots:{default:[go]},$$scope:{ctx:D}}}),oe=new J({props:{anchor:"datasets.load_dataset.example-2",$$slots:{default:[mo]},$$scope:{ctx:D}}}),ie=new J({props:{anchor:"datasets.load_dataset.example-3",$$slots:{default:[fo]},$$scope:{ctx:D}}}),le=new J({props:{anchor:"datasets.load_dataset.example-4",$$slots:{default:[uo]},$$scope:{ctx:D}}}),re=new J({props:{anchor:"datasets.load_dataset.example-5",$$slots:{default:[_o]},$$scope:{ctx:D}}}),be=new k({props:{name:"datasets.load_from_disk",anchor:"datasets.load_from_disk",parameters:[{name:"dataset_path",val:": typing.Union[str, bytes, os.PathLike]"},{name:"keep_in_memory",val:": typing.Optional[bool] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"}],parametersDescription:[{anchor:"datasets.load_from_disk.dataset_path",description:`<strong>dataset_path</strong> (<code>path-like</code>) — | |
| Path (e.g. <code>"dataset/train"</code>) or remote URI (e.g. <code>"s3://my-bucket/dataset/train"</code>) | |
| of the <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.Dataset">Dataset</a> or <a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.DatasetDict">DatasetDict</a> directory where the dataset/dataset-dict will be | |
| loaded from.`,name:"dataset_path"},{anchor:"datasets.load_from_disk.keep_in_memory",description:`<strong>keep_in_memory</strong> (<code>bool</code>, defaults to <code>None</code>) — | |
| Whether to copy the dataset in-memory. If <code>None</code>, the dataset | |
| will not be copied in-memory unless explicitly enabled by setting <code>datasets.config.IN_MEMORY_MAX_SIZE</code> to | |
| nonzero. See more details in the <a href="../cache#improve-performance">improve performance</a> section.`,name:"keep_in_memory"},{anchor:"datasets.load_from_disk.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>) — | |
| Key/value pairs to be passed on to the file-system backend, if any.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.9.0</p> | |
| </div>`,name:"storage_options"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/load.py#L1725",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <ul> | |
| <li>If <code>dataset_path</code> is a path of a dataset directory: the dataset requested.</li> | |
| <li>If <code>dataset_path</code> is a path of a dataset dict directory, a <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.DatasetDict" | |
| >DatasetDict</a> with each split.</li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.Dataset" | |
| >Dataset</a> or <a | |
| href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.DatasetDict" | |
| >DatasetDict</a></p> | |
| `}}),de=new J({props:{anchor:"datasets.load_from_disk.example",$$slots:{default:[ho]},$$scope:{ctx:D}}}),$e=new k({props:{name:"datasets.load_dataset_builder",anchor:"datasets.load_dataset_builder",parameters:[{name:"path",val:": str"},{name:"name",val:": typing.Optional[str] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, collections.abc.Sequence[str], collections.abc.Mapping[str, typing.Union[str, collections.abc.Sequence[str]]], NoneType] = None"},{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"revision",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"**config_kwargs",val:""}],parametersDescription:[{anchor:"datasets.load_dataset_builder.path",description:`<strong>path</strong> (<code>str</code>) — | |
| Path or name of the dataset.</p> | |
| <ul> | |
| <li> | |
| <p>if <code>path</code> is a dataset repository on the HF hub (list all available datasets with <code>huggingface_hub.list_datasets</code>) | |
| -> load the dataset builder from supported files in the repository (csv, json, parquet, etc.) | |
| e.g. <code>'username/dataset_name'</code>, a dataset repository on the HF hub containing the data files.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is a directory within a Storage Bucket on the HF Hub (list your buckets with <code>huggingface_hub.list_buckets</code>) | |
| -> load the dataset from supported files in the directory (csv, json, parquet, etc.) | |
| e.g. <code>'buckets/username/bucket_name/my_dataset'</code>.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is a local directory | |
| -> load the dataset builder from supported files in the directory (csv, json, parquet, etc.) | |
| e.g. <code>'./path/to/directory/with/my/csv/data'</code>.</p> | |
| </li> | |
| <li> | |
| <p>if <code>path</code> is the name of a dataset builder and <code>data_files</code> or <code>data_dir</code> is specified | |
| (available builders are “json”, “csv”, “parquet”, “arrow”, “text”, “xml”, “webdataset”, “imagefolder”, “audiofolder”, “videofolder”) | |
| -> load the dataset builder from the files in <code>data_files</code> or <code>data_dir</code> | |
| e.g. <code>'parquet'</code>.</p> | |
| </li> | |
| </ul> | |
| <p>Use a <code>hf://</code> path like <code>'hf://datasets/username/dataset_name'</code> to allow remote only. | |
| Use an absolute path to allow local only.`,name:"path"},{anchor:"datasets.load_dataset_builder.name",description:`<strong>name</strong> (<code>str</code>, <em>optional</em>) — | |
| Defining the name of the dataset configuration.`,name:"name"},{anchor:"datasets.load_dataset_builder.data_dir",description:`<strong>data_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Defining the <code>data_dir</code> of the dataset configuration. If specified for the generic builders (csv, text etc.) or the Hub datasets and <code>data_files</code> is <code>None</code>, | |
| the behavior is equal to passing <code>os.path.join(data_dir, **)</code> as <code>data_files</code> to reference all the files in a directory.`,name:"data_dir"},{anchor:"datasets.load_dataset_builder.data_files",description:`<strong>data_files</strong> (<code>str</code> or <code>Sequence</code> or <code>Mapping</code>, <em>optional</em>) — | |
| Path(s) to source data file(s).`,name:"data_files"},{anchor:"datasets.load_dataset_builder.cache_dir",description:`<strong>cache_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Directory to read/write data. Defaults to <code>"~/.cache/huggingface/datasets"</code>.`,name:"cache_dir"},{anchor:"datasets.load_dataset_builder.features",description:`<strong>features</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/main_classes#datasets.Features">Features</a>, <em>optional</em>) — | |
| Set the features type to use for this dataset.`,name:"features"},{anchor:"datasets.load_dataset_builder.download_config",description:`<strong>download_config</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.load_dataset_builder.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, defaults to <code>REUSE_DATASET_IF_EXISTS</code>) — | |
| Download/generate mode.`,name:"download_mode"},{anchor:"datasets.load_dataset_builder.revision",description:`<strong>revision</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.Version">Version</a> or <code>str</code>, <em>optional</em>) — | |
| Version of the dataset to load. | |
| As datasets have their own git repository on the Datasets Hub, the default version “main” corresponds to their “main” branch. | |
| You can specify a different version than the default “main” by using a commit SHA or a git tag of the dataset repository.`,name:"revision"},{anchor:"datasets.load_dataset_builder.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| Optional string or boolean to use as Bearer token for remote files on the Datasets Hub. | |
| If <code>True</code>, or not specified, will get token from <code>"~/.huggingface"</code>.`,name:"token"},{anchor:"datasets.load_dataset_builder.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| <strong>Experimental</strong>. Key/value pairs to be passed on to the dataset file-system backend, if any.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.11.0</p> | |
| </div>`,name:"storage_options"},{anchor:"datasets.load_dataset_builder.*config_kwargs",description:`*<strong>*config_kwargs</strong> (additional keyword arguments) — | |
| Keyword arguments to be passed to the <a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.BuilderConfig">BuilderConfig</a> | |
| and used in the <a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DatasetBuilder">DatasetBuilder</a>.`,name:"*config_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/load.py#L1212",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DatasetBuilder" | |
| >DatasetBuilder</a></p> | |
| `}}),pe=new J({props:{anchor:"datasets.load_dataset_builder.example",$$slots:{default:[vo]},$$scope:{ctx:D}}}),xe=new k({props:{name:"datasets.get_dataset_config_names",anchor:"datasets.get_dataset_config_names",parameters:[{name:"path",val:": str"},{name:"revision",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"data_files",val:": typing.Union[str, list, dict, NoneType] = None"},{name:"**download_kwargs",val:""}],parametersDescription:[{anchor:"datasets.get_dataset_config_names.path",description:`<strong>path</strong> (<code>str</code>) — path to the dataset repository. Can be either:</p> | |
| <ul> | |
| <li>a local path to the dataset directory containing the data files, | |
| e.g. <code>'./dataset/squad'</code></li> | |
| <li>a dataset identifier on the Hugging Face Hub (list all available datasets and ids with <code>huggingface_hub.list_datasets</code>), | |
| e.g. <code>'rajpurkar/squad'</code>, <code>'nyu-mll/glue'</code> or\`<code>'openai/webtext'</code></li> | |
| </ul>`,name:"path"},{anchor:"datasets.get_dataset_config_names.revision",description:`<strong>revision</strong> (<code>Union[str, datasets.Version]</code>, <em>optional</em>) — | |
| If specified, the dataset module will be loaded from the datasets repository at this version. | |
| By default:</p> | |
| <ul> | |
| <li>it is set to the local version of the lib.</li> | |
| <li>it will also try to load it from the main branch if it’s not available at the local version of the lib. | |
| Specifying a version that is different from your local version of the lib might cause compatibility issues.</li> | |
| </ul>`,name:"revision"},{anchor:"datasets.get_dataset_config_names.download_config",description:`<strong>download_config</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.get_dataset_config_names.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, defaults to <code>REUSE_DATASET_IF_EXISTS</code>) — | |
| Download/generate mode.`,name:"download_mode"},{anchor:"datasets.get_dataset_config_names.data_files",description:`<strong>data_files</strong> (<code>Union[Dict, List, str]</code>, <em>optional</em>) — | |
| Defining the data_files of the dataset configuration.`,name:"data_files"},{anchor:"datasets.get_dataset_config_names.*download_kwargs",description:`*<strong>*download_kwargs</strong> (additional keyword arguments) — | |
| Optional attributes for <a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a> which will override the attributes in <code>download_config</code> if supplied, | |
| for example <code>token</code>.`,name:"*download_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/inspect.py#L109"}}),ce=new J({props:{anchor:"datasets.get_dataset_config_names.example",$$slots:{default:[yo]},$$scope:{ctx:D}}}),we=new k({props:{name:"datasets.get_dataset_infos",anchor:"datasets.get_dataset_infos",parameters:[{name:"path",val:": str"},{name:"data_files",val:": typing.Union[str, list, dict, NoneType] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"revision",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"**config_kwargs",val:""}],parametersDescription:[{anchor:"datasets.get_dataset_infos.path",description:`<strong>path</strong> (<code>str</code>) — path to the dataset repository. Can be either:</p> | |
| <ul> | |
| <li>a local path to the dataset directory containing the data files, | |
| e.g. <code>'./dataset/squad'</code></li> | |
| <li>a dataset identifier on the Hugging Face Hub (list all available datasets and ids with <code>huggingface_hub.list_datasets</code>), | |
| e.g. <code>'rajpurkar/squad'</code>, <code>'nyu-mll/glue'</code> or\`<code>'openai/webtext'</code></li> | |
| </ul>`,name:"path"},{anchor:"datasets.get_dataset_infos.revision",description:`<strong>revision</strong> (<code>Union[str, datasets.Version]</code>, <em>optional</em>) — | |
| If specified, the dataset module will be loaded from the datasets repository at this version. | |
| By default:</p> | |
| <ul> | |
| <li>it is set to the local version of the lib.</li> | |
| <li>it will also try to load it from the main branch if it’s not available at the local version of the lib. | |
| Specifying a version that is different from your local version of the lib might cause compatibility issues.</li> | |
| </ul>`,name:"revision"},{anchor:"datasets.get_dataset_infos.download_config",description:`<strong>download_config</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.get_dataset_infos.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, defaults to <code>REUSE_DATASET_IF_EXISTS</code>) — | |
| Download/generate mode.`,name:"download_mode"},{anchor:"datasets.get_dataset_infos.data_files",description:`<strong>data_files</strong> (<code>Union[Dict, List, str]</code>, <em>optional</em>) — | |
| Defining the data_files of the dataset configuration.`,name:"data_files"},{anchor:"datasets.get_dataset_infos.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| Optional string or boolean to use as Bearer token for remote files on the Datasets Hub. | |
| If <code>True</code>, or not specified, will get token from <code>"~/.huggingface"</code>.`,name:"token"},{anchor:"datasets.get_dataset_infos.*config_kwargs",description:`*<strong>*config_kwargs</strong> (additional keyword arguments) — | |
| Optional attributes for builder class which will override the attributes if supplied.`,name:"*config_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/inspect.py#L42"}}),ge=new J({props:{anchor:"datasets.get_dataset_infos.example",$$slots:{default:[bo]},$$scope:{ctx:D}}}),Ne=new k({props:{name:"datasets.get_dataset_split_names",anchor:"datasets.get_dataset_split_names",parameters:[{name:"path",val:": str"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, collections.abc.Sequence[str], collections.abc.Mapping[str, typing.Union[str, collections.abc.Sequence[str]]], NoneType] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"revision",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"**config_kwargs",val:""}],parametersDescription:[{anchor:"datasets.get_dataset_split_names.path",description:`<strong>path</strong> (<code>str</code>) — path to the dataset repository. Can be either:</p> | |
| <ul> | |
| <li>a local path to the dataset directory containing the data files, | |
| e.g. <code>'./dataset/squad'</code></li> | |
| <li>a dataset identifier on the Hugging Face Hub (list all available datasets and ids with <code>huggingface_hub.list_datasets</code>), | |
| e.g. <code>'rajpurkar/squad'</code>, <code>'nyu-mll/glue'</code> or\`<code>'openai/webtext'</code></li> | |
| </ul>`,name:"path"},{anchor:"datasets.get_dataset_split_names.config_name",description:`<strong>config_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Defining the name of the dataset configuration.`,name:"config_name"},{anchor:"datasets.get_dataset_split_names.data_files",description:`<strong>data_files</strong> (<code>str</code> or <code>Sequence</code> or <code>Mapping</code>, <em>optional</em>) — | |
| Path(s) to source data file(s).`,name:"data_files"},{anchor:"datasets.get_dataset_split_names.download_config",description:`<strong>download_config</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadConfig">DownloadConfig</a>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.get_dataset_split_names.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, defaults to <code>REUSE_DATASET_IF_EXISTS</code>) — | |
| Download/generate mode.`,name:"download_mode"},{anchor:"datasets.get_dataset_split_names.revision",description:`<strong>revision</strong> (<a href="/docs/datasets/pr_8137/en/package_reference/builder_classes#datasets.Version">Version</a> or <code>str</code>, <em>optional</em>) — | |
| Version of the dataset to load. | |
| As datasets have their own git repository on the Datasets Hub, the default version “main” corresponds to their “main” branch. | |
| You can specify a different version than the default “main” by using a commit SHA or a git tag of the dataset repository.`,name:"revision"},{anchor:"datasets.get_dataset_split_names.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| Optional string or boolean to use as Bearer token for remote files on the Datasets Hub. | |
| If <code>True</code>, or not specified, will get token from <code>"~/.huggingface"</code>.`,name:"token"},{anchor:"datasets.get_dataset_split_names.*config_kwargs",description:`*<strong>*config_kwargs</strong> (additional keyword arguments) — | |
| Optional attributes for builder class which will override the attributes if supplied.`,name:"*config_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/inspect.py#L295"}}),me=new J({props:{anchor:"datasets.get_dataset_split_names.example",$$slots:{default:[$o]},$$scope:{ctx:D}}}),ke=new U({props:{title:"From files",local:"from-files",headingTag:"h2"}}),Oe=new M({props:{code:"bG9hZF9kYXRhc2V0KCUyMmNzdiUyMiUyQyUyMGRhdGFfZGlyJTNEJTIycGF0aCUyRnRvJTJGZGF0YSUyRmRpciUyMiUyQyUyMHNlcCUzRCUyMiU1Q3QlMjIp",highlighted:'load_dataset(<span class="hljs-string">"csv"</span>, data_dir=<span class="hljs-string">"path/to/data/dir"</span>, sep=<span class="hljs-string">"\\t"</span>)',wrap:!1}}),Fe=new U({props:{title:"Text",local:"datasets.packaged_modules.text.TextConfig",headingTag:"h3"}}),Ue=new k({props:{name:"class datasets.packaged_modules.text.TextConfig",anchor:"datasets.packaged_modules.text.TextConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"encoding",val:": str = 'utf-8'"},{name:"encoding_errors",val:": typing.Optional[str] = None"},{name:"chunksize",val:": int = 10485760"},{name:"keep_linebreaks",val:": bool = False"},{name:"sample_by",val:": typing.Literal['line', 'paragraph', 'document'] = 'line'"}],parametersDescription:[{anchor:"datasets.packaged_modules.text.TextConfig.features",description:`<strong>features</strong> — (<code>Features</code>, <em>optional</em>): | |
| Cast the data to <code>features</code>.`,name:"features"},{anchor:"datasets.packaged_modules.text.TextConfig.encoding",description:`<strong>encoding</strong> — (<code>str</code>, defaults to “utf-8”): | |
| Encoding to decode the file.`,name:"encoding"},{anchor:"datasets.packaged_modules.text.TextConfig.encoding_errors",description:`<strong>encoding_errors</strong> — (<code>str</code>, <em>optional</em>): | |
| Argument to define what to do in case of encoding error. | |
| This is the same as the <code>error</code> argument in <code>open()</code>.`,name:"encoding_errors"},{anchor:"datasets.packaged_modules.text.TextConfig.chunksize",description:`<strong>chunksize</strong> — (<code>Features</code>, <em>optional</em>, defaults to “10MB”): | |
| Chunk size to read the data.`,name:"chunksize"},{anchor:"datasets.packaged_modules.text.TextConfig.keep_linebreaks",description:`<strong>keep_linebreaks</strong> — (<code>bool</code>, defaults to False): | |
| Whether to keep line breaks.`,name:"keep_linebreaks"},{anchor:"datasets.packaged_modules.text.TextConfig.sample_by",description:`<strong>sample_by</strong> (<code>Literal["line", "paragraph", "document"]</code>, defaults to “line”) — | |
| Whether to load data per line, praragraph or document. | |
| By default one row in the dataset = one line.`,name:"sample_by"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/text/text.py#L17"}}),Me=new k({props:{name:"class datasets.packaged_modules.text.Text",anchor:"datasets.packaged_modules.text.Text",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/text/text.py#L45"}}),Je=new U({props:{title:"CSV",local:"datasets.packaged_modules.csv.CsvConfig",headingTag:"h3"}}),Ie=new k({props:{name:"class datasets.packaged_modules.csv.CsvConfig",anchor:"datasets.packaged_modules.csv.CsvConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"sep",val:": str = ','"},{name:"delimiter",val:": typing.Optional[str] = None"},{name:"header",val:": typing.Union[int, list[int], str, NoneType] = 'infer'"},{name:"names",val:": typing.Optional[list[str]] = None"},{name:"column_names",val:": typing.Optional[list[str]] = None"},{name:"index_col",val:": typing.Union[int, str, list[int], list[str], NoneType] = None"},{name:"usecols",val:": typing.Union[list[int], list[str], NoneType] = None"},{name:"prefix",val:": typing.Optional[str] = None"},{name:"mangle_dupe_cols",val:": bool = True"},{name:"engine",val:": typing.Optional[typing.Literal['c', 'python', 'pyarrow']] = None"},{name:"converters",val:": dict = None"},{name:"true_values",val:": typing.Optional[list] = None"},{name:"false_values",val:": typing.Optional[list] = None"},{name:"skipinitialspace",val:": bool = False"},{name:"skiprows",val:": typing.Union[int, list[int], NoneType] = None"},{name:"nrows",val:": typing.Optional[int] = None"},{name:"na_values",val:": typing.Union[str, list[str], NoneType] = None"},{name:"keep_default_na",val:": bool = True"},{name:"na_filter",val:": bool = True"},{name:"verbose",val:": bool = False"},{name:"skip_blank_lines",val:": bool = True"},{name:"thousands",val:": typing.Optional[str] = None"},{name:"decimal",val:": str = '.'"},{name:"lineterminator",val:": typing.Optional[str] = None"},{name:"quotechar",val:`: str = '"'`},{name:"quoting",val:": int = 0"},{name:"escapechar",val:": typing.Optional[str] = None"},{name:"comment",val:": typing.Optional[str] = None"},{name:"encoding",val:": typing.Optional[str] = None"},{name:"dialect",val:": typing.Optional[str] = None"},{name:"error_bad_lines",val:": bool = True"},{name:"warn_bad_lines",val:": bool = True"},{name:"skipfooter",val:": int = 0"},{name:"doublequote",val:": bool = True"},{name:"memory_map",val:": bool = False"},{name:"float_precision",val:": typing.Optional[str] = None"},{name:"chunksize",val:": int = 10000"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"encoding_errors",val:": typing.Optional[str] = 'strict'"},{name:"on_bad_lines",val:": typing.Literal['error', 'warn', 'skip'] = 'error'"},{name:"date_format",val:": typing.Optional[str] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/csv/csv.py#L25"}}),Ze=new k({props:{name:"class datasets.packaged_modules.csv.Csv",anchor:"datasets.packaged_modules.csv.Csv",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/csv/csv.py#L145"}}),Re=new U({props:{title:"JSON",local:"datasets.packaged_modules.json.JsonConfig",headingTag:"h3"}}),Ve=new k({props:{name:"class datasets.packaged_modules.json.JsonConfig",anchor:"datasets.packaged_modules.json.JsonConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"encoding",val:": str = 'utf-8'"},{name:"encoding_errors",val:": typing.Optional[str] = None"},{name:"field",val:": typing.Optional[str] = None"},{name:"use_threads",val:": bool = True"},{name:"block_size",val:": typing.Optional[int] = None"},{name:"chunksize",val:": int = 10485760"},{name:"newlines_in_values",val:": typing.Optional[bool] = None"},{name:"on_mixed_types",val:": typing.Optional[typing.Literal['use_json']] = 'use_json'"},{name:"parse_agent_traces",val:": bool = True"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/json/json.py#L42"}}),Xe=new k({props:{name:"class datasets.Json",anchor:"datasets.Json",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/json/json.py#L60"}}),Se=new U({props:{title:"XML",local:"datasets.packaged_modules.xml.XmlConfig",headingTag:"h3"}}),ze=new k({props:{name:"class datasets.packaged_modules.xml.XmlConfig",anchor:"datasets.packaged_modules.xml.XmlConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"encoding",val:": str = 'utf-8'"},{name:"encoding_errors",val:": typing.Optional[str] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/xml/xml.py#L15"}}),Pe=new k({props:{name:"class datasets.packaged_modules.xml.Xml",anchor:"datasets.packaged_modules.xml.Xml",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/xml/xml.py#L23"}}),We=new U({props:{title:"Parquet",local:"datasets.packaged_modules.parquet.ParquetConfig",headingTag:"h3"}}),He=new k({props:{name:"class datasets.packaged_modules.parquet.ParquetConfig",anchor:"datasets.packaged_modules.parquet.ParquetConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"batch_size",val:": typing.Optional[int] = None"},{name:"columns",val:": typing.Optional[list[str]] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"},{name:"fragment_scan_options",val:": typing.Optional[pyarrow._dataset_parquet.ParquetFragmentScanOptions] = None"},{name:"on_bad_files",val:": typing.Literal['error', 'warn', 'skip'] = 'error'"}],parametersDescription:[{anchor:"datasets.packaged_modules.parquet.ParquetConfig.batch_size",description:`<strong>batch_size</strong> (<code>int</code>, <em>optional</em>) — | |
| Size of the RecordBatches to iterate on. | |
| The default is the row group size (defined by the first row group).`,name:"batch_size"},{anchor:"datasets.packaged_modules.parquet.ParquetConfig.columns",description:`<strong>columns</strong> (<code>list[str]</code>, <em>optional</em>) — | |
| List of columns to load, the other ones are ignored. | |
| All columns are loaded by default.`,name:"columns"},{anchor:"datasets.packaged_modules.parquet.ParquetConfig.features",description:`<strong>features</strong> — (<code>Features</code>, <em>optional</em>): | |
| Cast the data to <code>features</code>.`,name:"features"},{anchor:"datasets.packaged_modules.parquet.ParquetConfig.filters",description:`<strong>filters</strong> (<code>Union[pyarrow.dataset.Expression, list[tuple], list[list[tuple]]]</code>, <em>optional</em>) — | |
| Return only the rows matching the filter. | |
| If possible the predicate will be pushed down to exploit the partition information | |
| or internal metadata found in the data source, e.g. Parquet statistics. | |
| Otherwise filters the loaded RecordBatches before yielding them.`,name:"filters"},{anchor:"datasets.packaged_modules.parquet.ParquetConfig.fragment_scan_options",description:`<strong>fragment_scan_options</strong> (<code>pyarrow.dataset.ParquetFragmentScanOptions</code>, <em>optional</em>) — | |
| Scan-specific options for Parquet fragments. | |
| This is especially useful to configure buffering and caching.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 4.2.0</p> | |
| </div>`,name:"fragment_scan_options"},{anchor:"datasets.packaged_modules.parquet.ParquetConfig.on_bad_files",description:`<strong>on_bad_files</strong> (<code>Literal["error", "warn", "skip"]</code>, <em>optional</em>, defaults to “error”) — | |
| Specify what to do upon encountering a bad file (a file that can’t be read). Allowed values are :</p> | |
| <ul> | |
| <li>‘error’, raise an Exception when a bad file is encountered.</li> | |
| <li>‘warn’, raise a warning when a bad file is encountered and skip that file.</li> | |
| <li>‘skip’, skip bad files without raising or warning when they are encountered.</li> | |
| </ul> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 4.2.0</p> | |
| </div>`,name:"on_bad_files"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/parquet/parquet.py#L17"}}),fe=new J({props:{anchor:"datasets.packaged_modules.parquet.ParquetConfig.example",$$slots:{default:[xo]},$$scope:{ctx:D}}}),ue=new J({props:{anchor:"datasets.packaged_modules.parquet.ParquetConfig.example-2",$$slots:{default:[wo]},$$scope:{ctx:D}}}),_e=new J({props:{anchor:"datasets.packaged_modules.parquet.ParquetConfig.example-3",$$slots:{default:[No]},$$scope:{ctx:D}}}),Be=new k({props:{name:"class datasets.packaged_modules.parquet.Parquet",anchor:"datasets.packaged_modules.parquet.Parquet",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/parquet/parquet.py#L90"}}),Ye=new U({props:{title:"Arrow",local:"datasets.packaged_modules.arrow.ArrowConfig",headingTag:"h3"}}),Ae=new k({props:{name:"class datasets.packaged_modules.arrow.ArrowConfig",anchor:"datasets.packaged_modules.arrow.ArrowConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/arrow/arrow.py#L15"}}),Ke=new k({props:{name:"class datasets.packaged_modules.arrow.Arrow",anchor:"datasets.packaged_modules.arrow.Arrow",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/arrow/arrow.py#L24"}}),et=new U({props:{title:"SQL",local:"datasets.packaged_modules.sql.SqlConfig",headingTag:"h3"}}),tt=new k({props:{name:"class datasets.packaged_modules.sql.SqlConfig",anchor:"datasets.packaged_modules.sql.SqlConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"sql",val:": typing.Union[str, ForwardRef('sqlalchemy.sql.Selectable')] = None"},{name:"con",val:": typing.Union[str, ForwardRef('sqlalchemy.engine.Connection'), ForwardRef('sqlalchemy.engine.Engine'), ForwardRef('sqlite3.Connection')] = None"},{name:"index_col",val:": typing.Union[str, list[str], NoneType] = None"},{name:"coerce_float",val:": bool = True"},{name:"params",val:": typing.Union[list, tuple, dict, NoneType] = None"},{name:"parse_dates",val:": typing.Union[list, dict, NoneType] = None"},{name:"columns",val:": typing.Optional[list[str]] = None"},{name:"chunksize",val:": typing.Optional[int] = 10000"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/sql/sql.py#L25"}}),nt=new k({props:{name:"class datasets.packaged_modules.sql.Sql",anchor:"datasets.packaged_modules.sql.Sql",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/sql/sql.py#L92"}}),st=new U({props:{title:"Images",local:"datasets.packaged_modules.imagefolder.ImageFolderConfig",headingTag:"h3"}}),ot=new k({props:{name:"class datasets.packaged_modules.imagefolder.ImageFolderConfig",anchor:"datasets.packaged_modules.imagefolder.ImageFolderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"drop_labels",val:": bool = None"},{name:"drop_metadata",val:": bool = None"},{name:"metadata_filenames",val:": list = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/imagefolder/imagefolder.py#L9"}}),lt=new k({props:{name:"class datasets.packaged_modules.imagefolder.ImageFolder",anchor:"datasets.packaged_modules.imagefolder.ImageFolder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/imagefolder/imagefolder.py#L19"}}),rt=new U({props:{title:"Audio",local:"datasets.packaged_modules.audiofolder.AudioFolderConfig",headingTag:"h3"}}),dt=new k({props:{name:"class datasets.packaged_modules.audiofolder.AudioFolderConfig",anchor:"datasets.packaged_modules.audiofolder.AudioFolderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"drop_labels",val:": bool = None"},{name:"drop_metadata",val:": bool = None"},{name:"metadata_filenames",val:": list = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/audiofolder/audiofolder.py#L9"}}),ct=new k({props:{name:"class datasets.packaged_modules.audiofolder.AudioFolder",anchor:"datasets.packaged_modules.audiofolder.AudioFolder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/audiofolder/audiofolder.py#L19"}}),gt=new U({props:{title:"Videos",local:"datasets.packaged_modules.videofolder.VideoFolderConfig",headingTag:"h3"}}),mt=new k({props:{name:"class datasets.packaged_modules.videofolder.VideoFolderConfig",anchor:"datasets.packaged_modules.videofolder.VideoFolderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"drop_labels",val:": bool = None"},{name:"drop_metadata",val:": bool = None"},{name:"metadata_filenames",val:": list = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/videofolder/videofolder.py#L9"}}),ut=new k({props:{name:"class datasets.packaged_modules.videofolder.VideoFolder",anchor:"datasets.packaged_modules.videofolder.VideoFolder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/videofolder/videofolder.py#L19"}}),_t=new U({props:{title:"HDF5",local:"datasets.packaged_modules.hdf5.HDF5Config",headingTag:"h3"}}),ht=new k({props:{name:"class datasets.packaged_modules.hdf5.HDF5Config",anchor:"datasets.packaged_modules.hdf5.HDF5Config",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"batch_size",val:": typing.Optional[int] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/hdf5/hdf5.py#L33"}}),vt=new k({props:{name:"class datasets.packaged_modules.hdf5.HDF5",anchor:"datasets.packaged_modules.hdf5.HDF5",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/hdf5/hdf5.py#L40"}}),yt=new U({props:{title:"Pdf",local:"datasets.packaged_modules.pdffolder.PdfFolderConfig",headingTag:"h3"}}),bt=new k({props:{name:"class datasets.packaged_modules.pdffolder.PdfFolderConfig",anchor:"datasets.packaged_modules.pdffolder.PdfFolderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"drop_labels",val:": bool = None"},{name:"drop_metadata",val:": bool = None"},{name:"metadata_filenames",val:": list = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/pdffolder/pdffolder.py#L9"}}),xt=new k({props:{name:"class datasets.packaged_modules.pdffolder.PdfFolder",anchor:"datasets.packaged_modules.pdffolder.PdfFolder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/pdffolder/pdffolder.py#L19"}}),wt=new U({props:{title:"Nifti",local:"datasets.packaged_modules.niftifolder.NiftiFolderConfig",headingTag:"h3"}}),Nt=new k({props:{name:"class datasets.packaged_modules.niftifolder.NiftiFolderConfig",anchor:"datasets.packaged_modules.niftifolder.NiftiFolderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"drop_labels",val:": bool = None"},{name:"drop_metadata",val:": bool = None"},{name:"metadata_filenames",val:": list = None"},{name:"filters",val:": typing.Union[pyarrow._compute.Expression, list[tuple], list[list[tuple]], NoneType] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/niftifolder/niftifolder.py#L9"}}),Ct=new k({props:{name:"class datasets.packaged_modules.niftifolder.NiftiFolder",anchor:"datasets.packaged_modules.niftifolder.NiftiFolder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/niftifolder/niftifolder.py#L19"}}),Dt=new U({props:{title:"WebDataset",local:"datasets.packaged_modules.webdataset.WebDataset",headingTag:"h3"}}),Ot=new k({props:{name:"class datasets.packaged_modules.webdataset.WebDataset",anchor:"datasets.packaged_modules.webdataset.WebDataset",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8137/src/datasets/packaged_modules/webdataset/webdataset.py#L20"}}),Ft=new co({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/package_reference/loading_methods.mdx"}}),{c(){l=d("meta"),x=o(),y=d("p"),r=o(),c(h.$$.fragment),t=o(),c(b.$$.fragment),fa=o(),he=d("p"),he.textContent=us,ua=o(),c(ve.$$.fragment),_a=o(),C=d("div"),c(ye.$$.fragment),Dn=o(),It=d("p"),It.textContent=_s,Tn=o(),qt=d("p"),qt.innerHTML=hs,On=o(),Zt=d("p"),Zt.textContent=vs,Fn=o(),Rt=d("p"),Rt.textContent=ys,Un=o(),Vt=d("ol"),Vt.innerHTML=bs,jn=o(),Gt=d("p"),Gt.textContent=$s,Mn=o(),c(se.$$.fragment),Jn=o(),c(oe.$$.fragment),In=o(),c(ie.$$.fragment),qn=o(),c(le.$$.fragment),Zn=o(),c(re.$$.fragment),ha=o(),Z=d("div"),c(be.$$.fragment),Rn=o(),Xt=d("p"),Xt.innerHTML=xs,Vn=o(),c(de.$$.fragment),va=o(),O=d("div"),c($e.$$.fragment),Gn=o(),St=d("p"),St.textContent=ws,Xn=o(),zt=d("ul"),zt.innerHTML=Ns,Sn=o(),Et=d("p"),Et.innerHTML=ks,zn=o(),Pt=d("p"),Pt.textContent=Cs,En=o(),c(pe.$$.fragment),ya=o(),R=d("div"),c(xe.$$.fragment),Pn=o(),Wt=d("p"),Wt.textContent=Ds,Wn=o(),c(ce.$$.fragment),ba=o(),V=d("div"),c(we.$$.fragment),Hn=o(),Ht=d("p"),Ht.textContent=Ts,Ln=o(),c(ge.$$.fragment),$a=o(),G=d("div"),c(Ne.$$.fragment),Bn=o(),Lt=d("p"),Lt.textContent=Os,Yn=o(),c(me.$$.fragment),xa=o(),c(ke.$$.fragment),wa=o(),Ce=d("p"),Ce.textContent=Fs,Na=o(),De=d("ul"),De.innerHTML=Us,ka=o(),Te=d("p"),Te.innerHTML=js,Ca=o(),c(Oe.$$.fragment),Da=o(),c(Fe.$$.fragment),Ta=o(),X=d("div"),c(Ue.$$.fragment),An=o(),Bt=d("p"),Bt.textContent=Ms,Oa=o(),je=d("div"),c(Me.$$.fragment),Fa=o(),c(Je.$$.fragment),Ua=o(),S=d("div"),c(Ie.$$.fragment),Qn=o(),Yt=d("p"),Yt.textContent=Js,ja=o(),qe=d("div"),c(Ze.$$.fragment),Ma=o(),c(Re.$$.fragment),Ja=o(),z=d("div"),c(Ve.$$.fragment),Kn=o(),At=d("p"),At.textContent=Is,Ia=o(),Ge=d("div"),c(Xe.$$.fragment),qa=o(),c(Se.$$.fragment),Za=o(),E=d("div"),c(ze.$$.fragment),es=o(),Qt=d("p"),Qt.textContent=qs,Ra=o(),Ee=d("div"),c(Pe.$$.fragment),Va=o(),c(We.$$.fragment),Ga=o(),F=d("div"),c(He.$$.fragment),ts=o(),Kt=d("p"),Kt.textContent=Zs,as=o(),ea=d("p"),ea.textContent=Rs,ns=o(),c(fe.$$.fragment),ss=o(),c(ue.$$.fragment),os=o(),c(_e.$$.fragment),Xa=o(),Le=d("div"),c(Be.$$.fragment),Sa=o(),c(Ye.$$.fragment),za=o(),P=d("div"),c(Ae.$$.fragment),is=o(),ta=d("p"),ta.textContent=Vs,Ea=o(),Qe=d("div"),c(Ke.$$.fragment),Pa=o(),c(et.$$.fragment),Wa=o(),W=d("div"),c(tt.$$.fragment),ls=o(),aa=d("p"),aa.textContent=Gs,Ha=o(),at=d("div"),c(nt.$$.fragment),La=o(),c(st.$$.fragment),Ba=o(),H=d("div"),c(ot.$$.fragment),rs=o(),na=d("p"),na.textContent=Xs,Ya=o(),it=d("div"),c(lt.$$.fragment),Aa=o(),c(rt.$$.fragment),Qa=o(),L=d("div"),c(dt.$$.fragment),ds=o(),sa=d("p"),sa.textContent=Ss,Ka=o(),pt=d("div"),c(ct.$$.fragment),en=o(),c(gt.$$.fragment),tn=o(),B=d("div"),c(mt.$$.fragment),ps=o(),oa=d("p"),oa.textContent=zs,an=o(),ft=d("div"),c(ut.$$.fragment),nn=o(),c(_t.$$.fragment),sn=o(),Y=d("div"),c(ht.$$.fragment),cs=o(),ia=d("p"),ia.textContent=Es,on=o(),A=d("div"),c(vt.$$.fragment),gs=o(),la=d("p"),la.textContent=Ps,ln=o(),c(yt.$$.fragment),rn=o(),Q=d("div"),c(bt.$$.fragment),ms=o(),ra=d("p"),ra.textContent=Ws,dn=o(),$t=d("div"),c(xt.$$.fragment),pn=o(),c(wt.$$.fragment),cn=o(),K=d("div"),c(Nt.$$.fragment),fs=o(),da=d("p"),da.textContent=Hs,gn=o(),kt=d("div"),c(Ct.$$.fragment),mn=o(),c(Dt.$$.fragment),fn=o(),Tt=d("div"),c(Ot.$$.fragment),un=o(),c(Ft.$$.fragment),_n=o(),pa=d("p"),this.h()},l(e){const n=ro("svelte-u9bgzb",document.head);l=p(n,"META",{name:!0,content:!0}),n.forEach(a),x=i(e),y=p(e,"P",{}),w(y).forEach(a),r=i(e),g(h.$$.fragment,e),t=i(e),g(b.$$.fragment,e),fa=i(e),he=p(e,"P",{"data-svelte-h":!0}),$(he)!=="svelte-spmsxp"&&(he.textContent=us),ua=i(e),g(ve.$$.fragment,e),_a=i(e),C=p(e,"DIV",{class:!0});var T=w(C);g(ye.$$.fragment,T),Dn=i(T),It=p(T,"P",{"data-svelte-h":!0}),$(It)!=="svelte-x6t4fm"&&(It.textContent=_s),Tn=i(T),qt=p(T,"P",{"data-svelte-h":!0}),$(qt)!=="svelte-1j3dvna"&&(qt.innerHTML=hs),On=i(T),Zt=p(T,"P",{"data-svelte-h":!0}),$(Zt)!=="svelte-scev7w"&&(Zt.textContent=vs),Fn=i(T),Rt=p(T,"P",{"data-svelte-h":!0}),$(Rt)!=="svelte-1osovlg"&&(Rt.textContent=ys),Un=i(T),Vt=p(T,"OL",{"data-svelte-h":!0}),$(Vt)!=="svelte-i4spcm"&&(Vt.innerHTML=bs),jn=i(T),Gt=p(T,"P",{"data-svelte-h":!0}),$(Gt)!=="svelte-11lpom8"&&(Gt.textContent=$s),Mn=i(T),g(se.$$.fragment,T),Jn=i(T),g(oe.$$.fragment,T),In=i(T),g(ie.$$.fragment,T),qn=i(T),g(le.$$.fragment,T),Zn=i(T),g(re.$$.fragment,T),T.forEach(a),ha=i(e),Z=p(e,"DIV",{class:!0});var ee=w(Z);g(be.$$.fragment,ee),Rn=i(ee),Xt=p(ee,"P",{"data-svelte-h":!0}),$(Xt)!=="svelte-14xp02v"&&(Xt.innerHTML=xs),Vn=i(ee),g(de.$$.fragment,ee),ee.forEach(a),va=i(e),O=p(e,"DIV",{class:!0});var j=w(O);g($e.$$.fragment,j),Gn=i(j),St=p(j,"P",{"data-svelte-h":!0}),$(St)!=="svelte-dbq44g"&&(St.textContent=ws),Xn=i(j),zt=p(j,"UL",{"data-svelte-h":!0}),$(zt)!=="svelte-82dds3"&&(zt.innerHTML=Ns),Sn=i(j),Et=p(j,"P",{"data-svelte-h":!0}),$(Et)!=="svelte-1j3dvna"&&(Et.innerHTML=ks),zn=i(j),Pt=p(j,"P",{"data-svelte-h":!0}),$(Pt)!=="svelte-scev7w"&&(Pt.textContent=Cs),En=i(j),g(pe.$$.fragment,j),j.forEach(a),ya=i(e),R=p(e,"DIV",{class:!0});var te=w(R);g(xe.$$.fragment,te),Pn=i(te),Wt=p(te,"P",{"data-svelte-h":!0}),$(Wt)!=="svelte-2twqqm"&&(Wt.textContent=Ds),Wn=i(te),g(ce.$$.fragment,te),te.forEach(a),ba=i(e),V=p(e,"DIV",{class:!0});var ae=w(V);g(we.$$.fragment,ae),Hn=i(ae),Ht=p(ae,"P",{"data-svelte-h":!0}),$(Ht)!=="svelte-vg9yfc"&&(Ht.textContent=Ts),Ln=i(ae),g(ge.$$.fragment,ae),ae.forEach(a),$a=i(e),G=p(e,"DIV",{class:!0});var ne=w(G);g(Ne.$$.fragment,ne),Bn=i(ne),Lt=p(ne,"P",{"data-svelte-h":!0}),$(Lt)!=="svelte-hrdkoe"&&(Lt.textContent=Os),Yn=i(ne),g(me.$$.fragment,ne),ne.forEach(a),xa=i(e),g(ke.$$.fragment,e),wa=i(e),Ce=p(e,"P",{"data-svelte-h":!0}),$(Ce)!=="svelte-psk543"&&(Ce.textContent=Fs),Na=i(e),De=p(e,"UL",{"data-svelte-h":!0}),$(De)!=="svelte-mua0wr"&&(De.innerHTML=Us),ka=i(e),Te=p(e,"P",{"data-svelte-h":!0}),$(Te)!=="svelte-15ounx8"&&(Te.innerHTML=js),Ca=i(e),g(Oe.$$.fragment,e),Da=i(e),g(Fe.$$.fragment,e),Ta=i(e),X=p(e,"DIV",{class:!0});var Ut=w(X);g(Ue.$$.fragment,Ut),An=i(Ut),Bt=p(Ut,"P",{"data-svelte-h":!0}),$(Bt)!=="svelte-jpahu0"&&(Bt.textContent=Ms),Ut.forEach(a),Oa=i(e),je=p(e,"DIV",{class:!0});var ca=w(je);g(Me.$$.fragment,ca),ca.forEach(a),Fa=i(e),g(Je.$$.fragment,e),Ua=i(e),S=p(e,"DIV",{class:!0});var jt=w(S);g(Ie.$$.fragment,jt),Qn=i(jt),Yt=p(jt,"P",{"data-svelte-h":!0}),$(Yt)!=="svelte-993wsc"&&(Yt.textContent=Js),jt.forEach(a),ja=i(e),qe=p(e,"DIV",{class:!0});var ga=w(qe);g(Ze.$$.fragment,ga),ga.forEach(a),Ma=i(e),g(Re.$$.fragment,e),Ja=i(e),z=p(e,"DIV",{class:!0});var Mt=w(z);g(Ve.$$.fragment,Mt),Kn=i(Mt),At=p(Mt,"P",{"data-svelte-h":!0}),$(At)!=="svelte-12lz3xe"&&(At.textContent=Is),Mt.forEach(a),Ia=i(e),Ge=p(e,"DIV",{class:!0});var ma=w(Ge);g(Xe.$$.fragment,ma),ma.forEach(a),qa=i(e),g(Se.$$.fragment,e),Za=i(e),E=p(e,"DIV",{class:!0});var Jt=w(E);g(ze.$$.fragment,Jt),es=i(Jt),Qt=p(Jt,"P",{"data-svelte-h":!0}),$(Qt)!=="svelte-1kpjrrq"&&(Qt.textContent=qs),Jt.forEach(a),Ra=i(e),Ee=p(e,"DIV",{class:!0});var Ls=w(Ee);g(Pe.$$.fragment,Ls),Ls.forEach(a),Va=i(e),g(We.$$.fragment,e),Ga=i(e),F=p(e,"DIV",{class:!0});var q=w(F);g(He.$$.fragment,q),ts=i(q),Kt=p(q,"P",{"data-svelte-h":!0}),$(Kt)!=="svelte-1kivgjw"&&(Kt.textContent=Zs),as=i(q),ea=p(q,"P",{"data-svelte-h":!0}),$(ea)!=="svelte-11lpom8"&&(ea.textContent=Rs),ns=i(q),g(fe.$$.fragment,q),ss=i(q),g(ue.$$.fragment,q),os=i(q),g(_e.$$.fragment,q),q.forEach(a),Xa=i(e),Le=p(e,"DIV",{class:!0});var Bs=w(Le);g(Be.$$.fragment,Bs),Bs.forEach(a),Sa=i(e),g(Ye.$$.fragment,e),za=i(e),P=p(e,"DIV",{class:!0});var vn=w(P);g(Ae.$$.fragment,vn),is=i(vn),ta=p(vn,"P",{"data-svelte-h":!0}),$(ta)!=="svelte-1ah41nl"&&(ta.textContent=Vs),vn.forEach(a),Ea=i(e),Qe=p(e,"DIV",{class:!0});var Ys=w(Qe);g(Ke.$$.fragment,Ys),Ys.forEach(a),Pa=i(e),g(et.$$.fragment,e),Wa=i(e),W=p(e,"DIV",{class:!0});var yn=w(W);g(tt.$$.fragment,yn),ls=i(yn),aa=p(yn,"P",{"data-svelte-h":!0}),$(aa)!=="svelte-18rutfs"&&(aa.textContent=Gs),yn.forEach(a),Ha=i(e),at=p(e,"DIV",{class:!0});var As=w(at);g(nt.$$.fragment,As),As.forEach(a),La=i(e),g(st.$$.fragment,e),Ba=i(e),H=p(e,"DIV",{class:!0});var bn=w(H);g(ot.$$.fragment,bn),rs=i(bn),na=p(bn,"P",{"data-svelte-h":!0}),$(na)!=="svelte-myiztp"&&(na.textContent=Xs),bn.forEach(a),Ya=i(e),it=p(e,"DIV",{class:!0});var Qs=w(it);g(lt.$$.fragment,Qs),Qs.forEach(a),Aa=i(e),g(rt.$$.fragment,e),Qa=i(e),L=p(e,"DIV",{class:!0});var $n=w(L);g(dt.$$.fragment,$n),ds=i($n),sa=p($n,"P",{"data-svelte-h":!0}),$(sa)!=="svelte-obgmtg"&&(sa.textContent=Ss),$n.forEach(a),Ka=i(e),pt=p(e,"DIV",{class:!0});var Ks=w(pt);g(ct.$$.fragment,Ks),Ks.forEach(a),en=i(e),g(gt.$$.fragment,e),tn=i(e),B=p(e,"DIV",{class:!0});var xn=w(B);g(mt.$$.fragment,xn),ps=i(xn),oa=p(xn,"P",{"data-svelte-h":!0}),$(oa)!=="svelte-myiztp"&&(oa.textContent=zs),xn.forEach(a),an=i(e),ft=p(e,"DIV",{class:!0});var eo=w(ft);g(ut.$$.fragment,eo),eo.forEach(a),nn=i(e),g(_t.$$.fragment,e),sn=i(e),Y=p(e,"DIV",{class:!0});var wn=w(Y);g(ht.$$.fragment,wn),cs=i(wn),ia=p(wn,"P",{"data-svelte-h":!0}),$(ia)!=="svelte-ffmm9v"&&(ia.textContent=Es),wn.forEach(a),on=i(e),A=p(e,"DIV",{class:!0});var Nn=w(A);g(vt.$$.fragment,Nn),gs=i(Nn),la=p(Nn,"P",{"data-svelte-h":!0}),$(la)!=="svelte-pm698w"&&(la.textContent=Ps),Nn.forEach(a),ln=i(e),g(yt.$$.fragment,e),rn=i(e),Q=p(e,"DIV",{class:!0});var kn=w(Q);g(bt.$$.fragment,kn),ms=i(kn),ra=p(kn,"P",{"data-svelte-h":!0}),$(ra)!=="svelte-myiztp"&&(ra.textContent=Ws),kn.forEach(a),dn=i(e),$t=p(e,"DIV",{class:!0});var to=w($t);g(xt.$$.fragment,to),to.forEach(a),pn=i(e),g(wt.$$.fragment,e),cn=i(e),K=p(e,"DIV",{class:!0});var Cn=w(K);g(Nt.$$.fragment,Cn),fs=i(Cn),da=p(Cn,"P",{"data-svelte-h":!0}),$(da)!=="svelte-olfluy"&&(da.textContent=Hs),Cn.forEach(a),gn=i(e),kt=p(e,"DIV",{class:!0});var ao=w(kt);g(Ct.$$.fragment,ao),ao.forEach(a),mn=i(e),g(Dt.$$.fragment,e),fn=i(e),Tt=p(e,"DIV",{class:!0});var no=w(Tt);g(Ot.$$.fragment,no),no.forEach(a),un=i(e),g(Ft.$$.fragment,e),_n=i(e),pa=p(e,"P",{}),w(pa).forEach(a),this.h()},h(){N(l,"name","hf:doc:metadata"),N(l,"content",Co),N(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(je,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(qe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Ge,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Le,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Qe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(at,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(it,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(pt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(ft,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N($t,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(kt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),N(Tt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,n){v(document.head,l),s(e,x,n),s(e,y,n),s(e,r,n),m(h,e,n),s(e,t,n),m(b,e,n),s(e,fa,n),s(e,he,n),s(e,ua,n),m(ve,e,n),s(e,_a,n),s(e,C,n),m(ye,C,null),v(C,Dn),v(C,It),v(C,Tn),v(C,qt),v(C,On),v(C,Zt),v(C,Fn),v(C,Rt),v(C,Un),v(C,Vt),v(C,jn),v(C,Gt),v(C,Mn),m(se,C,null),v(C,Jn),m(oe,C,null),v(C,In),m(ie,C,null),v(C,qn),m(le,C,null),v(C,Zn),m(re,C,null),s(e,ha,n),s(e,Z,n),m(be,Z,null),v(Z,Rn),v(Z,Xt),v(Z,Vn),m(de,Z,null),s(e,va,n),s(e,O,n),m($e,O,null),v(O,Gn),v(O,St),v(O,Xn),v(O,zt),v(O,Sn),v(O,Et),v(O,zn),v(O,Pt),v(O,En),m(pe,O,null),s(e,ya,n),s(e,R,n),m(xe,R,null),v(R,Pn),v(R,Wt),v(R,Wn),m(ce,R,null),s(e,ba,n),s(e,V,n),m(we,V,null),v(V,Hn),v(V,Ht),v(V,Ln),m(ge,V,null),s(e,$a,n),s(e,G,n),m(Ne,G,null),v(G,Bn),v(G,Lt),v(G,Yn),m(me,G,null),s(e,xa,n),m(ke,e,n),s(e,wa,n),s(e,Ce,n),s(e,Na,n),s(e,De,n),s(e,ka,n),s(e,Te,n),s(e,Ca,n),m(Oe,e,n),s(e,Da,n),m(Fe,e,n),s(e,Ta,n),s(e,X,n),m(Ue,X,null),v(X,An),v(X,Bt),s(e,Oa,n),s(e,je,n),m(Me,je,null),s(e,Fa,n),m(Je,e,n),s(e,Ua,n),s(e,S,n),m(Ie,S,null),v(S,Qn),v(S,Yt),s(e,ja,n),s(e,qe,n),m(Ze,qe,null),s(e,Ma,n),m(Re,e,n),s(e,Ja,n),s(e,z,n),m(Ve,z,null),v(z,Kn),v(z,At),s(e,Ia,n),s(e,Ge,n),m(Xe,Ge,null),s(e,qa,n),m(Se,e,n),s(e,Za,n),s(e,E,n),m(ze,E,null),v(E,es),v(E,Qt),s(e,Ra,n),s(e,Ee,n),m(Pe,Ee,null),s(e,Va,n),m(We,e,n),s(e,Ga,n),s(e,F,n),m(He,F,null),v(F,ts),v(F,Kt),v(F,as),v(F,ea),v(F,ns),m(fe,F,null),v(F,ss),m(ue,F,null),v(F,os),m(_e,F,null),s(e,Xa,n),s(e,Le,n),m(Be,Le,null),s(e,Sa,n),m(Ye,e,n),s(e,za,n),s(e,P,n),m(Ae,P,null),v(P,is),v(P,ta),s(e,Ea,n),s(e,Qe,n),m(Ke,Qe,null),s(e,Pa,n),m(et,e,n),s(e,Wa,n),s(e,W,n),m(tt,W,null),v(W,ls),v(W,aa),s(e,Ha,n),s(e,at,n),m(nt,at,null),s(e,La,n),m(st,e,n),s(e,Ba,n),s(e,H,n),m(ot,H,null),v(H,rs),v(H,na),s(e,Ya,n),s(e,it,n),m(lt,it,null),s(e,Aa,n),m(rt,e,n),s(e,Qa,n),s(e,L,n),m(dt,L,null),v(L,ds),v(L,sa),s(e,Ka,n),s(e,pt,n),m(ct,pt,null),s(e,en,n),m(gt,e,n),s(e,tn,n),s(e,B,n),m(mt,B,null),v(B,ps),v(B,oa),s(e,an,n),s(e,ft,n),m(ut,ft,null),s(e,nn,n),m(_t,e,n),s(e,sn,n),s(e,Y,n),m(ht,Y,null),v(Y,cs),v(Y,ia),s(e,on,n),s(e,A,n),m(vt,A,null),v(A,gs),v(A,la),s(e,ln,n),m(yt,e,n),s(e,rn,n),s(e,Q,n),m(bt,Q,null),v(Q,ms),v(Q,ra),s(e,dn,n),s(e,$t,n),m(xt,$t,null),s(e,pn,n),m(wt,e,n),s(e,cn,n),s(e,K,n),m(Nt,K,null),v(K,fs),v(K,da),s(e,gn,n),s(e,kt,n),m(Ct,kt,null),s(e,mn,n),m(Dt,e,n),s(e,fn,n),s(e,Tt,n),m(Ot,Tt,null),s(e,un,n),m(Ft,e,n),s(e,_n,n),s(e,pa,n),hn=!0},p(e,[n]){const T={};n&2&&(T.$$scope={dirty:n,ctx:e}),se.$set(T);const ee={};n&2&&(ee.$$scope={dirty:n,ctx:e}),oe.$set(ee);const j={};n&2&&(j.$$scope={dirty:n,ctx:e}),ie.$set(j);const te={};n&2&&(te.$$scope={dirty:n,ctx:e}),le.$set(te);const ae={};n&2&&(ae.$$scope={dirty:n,ctx:e}),re.$set(ae);const ne={};n&2&&(ne.$$scope={dirty:n,ctx:e}),de.$set(ne);const Ut={};n&2&&(Ut.$$scope={dirty:n,ctx:e}),pe.$set(Ut);const ca={};n&2&&(ca.$$scope={dirty:n,ctx:e}),ce.$set(ca);const jt={};n&2&&(jt.$$scope={dirty:n,ctx:e}),ge.$set(jt);const ga={};n&2&&(ga.$$scope={dirty:n,ctx:e}),me.$set(ga);const Mt={};n&2&&(Mt.$$scope={dirty:n,ctx:e}),fe.$set(Mt);const ma={};n&2&&(ma.$$scope={dirty:n,ctx:e}),ue.$set(ma);const Jt={};n&2&&(Jt.$$scope={dirty:n,ctx:e}),_e.$set(Jt)},i(e){hn||(f(h.$$.fragment,e),f(b.$$.fragment,e),f(ve.$$.fragment,e),f(ye.$$.fragment,e),f(se.$$.fragment,e),f(oe.$$.fragment,e),f(ie.$$.fragment,e),f(le.$$.fragment,e),f(re.$$.fragment,e),f(be.$$.fragment,e),f(de.$$.fragment,e),f($e.$$.fragment,e),f(pe.$$.fragment,e),f(xe.$$.fragment,e),f(ce.$$.fragment,e),f(we.$$.fragment,e),f(ge.$$.fragment,e),f(Ne.$$.fragment,e),f(me.$$.fragment,e),f(ke.$$.fragment,e),f(Oe.$$.fragment,e),f(Fe.$$.fragment,e),f(Ue.$$.fragment,e),f(Me.$$.fragment,e),f(Je.$$.fragment,e),f(Ie.$$.fragment,e),f(Ze.$$.fragment,e),f(Re.$$.fragment,e),f(Ve.$$.fragment,e),f(Xe.$$.fragment,e),f(Se.$$.fragment,e),f(ze.$$.fragment,e),f(Pe.$$.fragment,e),f(We.$$.fragment,e),f(He.$$.fragment,e),f(fe.$$.fragment,e),f(ue.$$.fragment,e),f(_e.$$.fragment,e),f(Be.$$.fragment,e),f(Ye.$$.fragment,e),f(Ae.$$.fragment,e),f(Ke.$$.fragment,e),f(et.$$.fragment,e),f(tt.$$.fragment,e),f(nt.$$.fragment,e),f(st.$$.fragment,e),f(ot.$$.fragment,e),f(lt.$$.fragment,e),f(rt.$$.fragment,e),f(dt.$$.fragment,e),f(ct.$$.fragment,e),f(gt.$$.fragment,e),f(mt.$$.fragment,e),f(ut.$$.fragment,e),f(_t.$$.fragment,e),f(ht.$$.fragment,e),f(vt.$$.fragment,e),f(yt.$$.fragment,e),f(bt.$$.fragment,e),f(xt.$$.fragment,e),f(wt.$$.fragment,e),f(Nt.$$.fragment,e),f(Ct.$$.fragment,e),f(Dt.$$.fragment,e),f(Ot.$$.fragment,e),f(Ft.$$.fragment,e),hn=!0)},o(e){u(h.$$.fragment,e),u(b.$$.fragment,e),u(ve.$$.fragment,e),u(ye.$$.fragment,e),u(se.$$.fragment,e),u(oe.$$.fragment,e),u(ie.$$.fragment,e),u(le.$$.fragment,e),u(re.$$.fragment,e),u(be.$$.fragment,e),u(de.$$.fragment,e),u($e.$$.fragment,e),u(pe.$$.fragment,e),u(xe.$$.fragment,e),u(ce.$$.fragment,e),u(we.$$.fragment,e),u(ge.$$.fragment,e),u(Ne.$$.fragment,e),u(me.$$.fragment,e),u(ke.$$.fragment,e),u(Oe.$$.fragment,e),u(Fe.$$.fragment,e),u(Ue.$$.fragment,e),u(Me.$$.fragment,e),u(Je.$$.fragment,e),u(Ie.$$.fragment,e),u(Ze.$$.fragment,e),u(Re.$$.fragment,e),u(Ve.$$.fragment,e),u(Xe.$$.fragment,e),u(Se.$$.fragment,e),u(ze.$$.fragment,e),u(Pe.$$.fragment,e),u(We.$$.fragment,e),u(He.$$.fragment,e),u(fe.$$.fragment,e),u(ue.$$.fragment,e),u(_e.$$.fragment,e),u(Be.$$.fragment,e),u(Ye.$$.fragment,e),u(Ae.$$.fragment,e),u(Ke.$$.fragment,e),u(et.$$.fragment,e),u(tt.$$.fragment,e),u(nt.$$.fragment,e),u(st.$$.fragment,e),u(ot.$$.fragment,e),u(lt.$$.fragment,e),u(rt.$$.fragment,e),u(dt.$$.fragment,e),u(ct.$$.fragment,e),u(gt.$$.fragment,e),u(mt.$$.fragment,e),u(ut.$$.fragment,e),u(_t.$$.fragment,e),u(ht.$$.fragment,e),u(vt.$$.fragment,e),u(yt.$$.fragment,e),u(bt.$$.fragment,e),u(xt.$$.fragment,e),u(wt.$$.fragment,e),u(Nt.$$.fragment,e),u(Ct.$$.fragment,e),u(Dt.$$.fragment,e),u(Ot.$$.fragment,e),u(Ft.$$.fragment,e),hn=!1},d(e){e&&(a(x),a(y),a(r),a(t),a(fa),a(he),a(ua),a(_a),a(C),a(ha),a(Z),a(va),a(O),a(ya),a(R),a(ba),a(V),a($a),a(G),a(xa),a(wa),a(Ce),a(Na),a(De),a(ka),a(Te),a(Ca),a(Da),a(Ta),a(X),a(Oa),a(je),a(Fa),a(Ua),a(S),a(ja),a(qe),a(Ma),a(Ja),a(z),a(Ia),a(Ge),a(qa),a(Za),a(E),a(Ra),a(Ee),a(Va),a(Ga),a(F),a(Xa),a(Le),a(Sa),a(za),a(P),a(Ea),a(Qe),a(Pa),a(Wa),a(W),a(Ha),a(at),a(La),a(Ba),a(H),a(Ya),a(it),a(Aa),a(Qa),a(L),a(Ka),a(pt),a(en),a(tn),a(B),a(an),a(ft),a(nn),a(sn),a(Y),a(on),a(A),a(ln),a(rn),a(Q),a(dn),a($t),a(pn),a(cn),a(K),a(gn),a(kt),a(mn),a(fn),a(Tt),a(un),a(_n),a(pa)),a(l),_(h,e),_(b,e),_(ve,e),_(ye),_(se),_(oe),_(ie),_(le),_(re),_(be),_(de),_($e),_(pe),_(xe),_(ce),_(we),_(ge),_(Ne),_(me),_(ke,e),_(Oe,e),_(Fe,e),_(Ue),_(Me),_(Je,e),_(Ie),_(Ze),_(Re,e),_(Ve),_(Xe),_(Se,e),_(ze),_(Pe),_(We,e),_(He),_(fe),_(ue),_(_e),_(Be),_(Ye,e),_(Ae),_(Ke),_(et,e),_(tt),_(nt),_(st,e),_(ot),_(lt),_(rt,e),_(dt),_(ct),_(gt,e),_(mt),_(ut),_(_t,e),_(ht),_(vt),_(yt,e),_(bt),_(xt),_(wt,e),_(Nt),_(Ct),_(Dt,e),_(Ot),_(Ft,e)}}}const Co='{"title":"Loading methods","local":"loading-methods","sections":[{"title":"Datasets","local":"datasets.load_dataset","sections":[],"depth":2},{"title":"From files","local":"from-files","sections":[{"title":"Text","local":"datasets.packaged_modules.text.TextConfig","sections":[],"depth":3},{"title":"CSV","local":"datasets.packaged_modules.csv.CsvConfig","sections":[],"depth":3},{"title":"JSON","local":"datasets.packaged_modules.json.JsonConfig","sections":[],"depth":3},{"title":"XML","local":"datasets.packaged_modules.xml.XmlConfig","sections":[],"depth":3},{"title":"Parquet","local":"datasets.packaged_modules.parquet.ParquetConfig","sections":[],"depth":3},{"title":"Arrow","local":"datasets.packaged_modules.arrow.ArrowConfig","sections":[],"depth":3},{"title":"SQL","local":"datasets.packaged_modules.sql.SqlConfig","sections":[],"depth":3},{"title":"Images","local":"datasets.packaged_modules.imagefolder.ImageFolderConfig","sections":[],"depth":3},{"title":"Audio","local":"datasets.packaged_modules.audiofolder.AudioFolderConfig","sections":[],"depth":3},{"title":"Videos","local":"datasets.packaged_modules.videofolder.VideoFolderConfig","sections":[],"depth":3},{"title":"HDF5","local":"datasets.packaged_modules.hdf5.HDF5Config","sections":[],"depth":3},{"title":"Pdf","local":"datasets.packaged_modules.pdffolder.PdfFolderConfig","sections":[],"depth":3},{"title":"Nifti","local":"datasets.packaged_modules.niftifolder.NiftiFolderConfig","sections":[],"depth":3},{"title":"WebDataset","local":"datasets.packaged_modules.webdataset.WebDataset","sections":[],"depth":3}],"depth":2}],"depth":1}';function Do(D){return oo(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Jo extends io{constructor(l){super(),lo(this,l,Do,ko,so,{})}}export{Jo as component}; | |
Xet Storage Details
- Size:
- 115 kB
- Xet hash:
- fb4bcd73f45248802c69c842b86bdc74fb9a77509000a1ec29a59ac27b4d8e8e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.