Buckets:
| import{s as dr,o as ir,n as k}from"../chunks/scheduler.d75c11ed.js";import{S as cr,i as pr,e as c,s as o,c as f,h as mr,a as p,d as l,b as r,f as M,g as h,j as y,k as T,l as n,m as u,n as _,t as b,o as $,p as x}from"../chunks/index.4ec9dfe9.js";import{C as gr,H as Ya,E as ur}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.903f9bf7.js";import{D as J}from"../chunks/Docstring.5dae47b8.js";import{C as U}from"../chunks/CodeBlock.77fa95e2.js";import{E as C}from"../chunks/ExampleCodeBlock.a373dc81.js";function fr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIlMEFidWlsZGVyJTIwJTNEJTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIoJ2Nvcm5lbGwtbW92aWUtcmV2aWV3LWRhdGElMkZyb3R0ZW5fdG9tYXRvZXMnKSUwQWJ1aWxkZXIuZG93bmxvYWRfYW5kX3ByZXBhcmUoKSUwQWRzJTIwJTNEJTIwYnVpbGRlci5hc19kYXRhc2V0KHNwbGl0JTNEJ3RyYWluJyklMEFkcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset_builder | |
| <span class="hljs-meta">>>> </span>builder = load_dataset_builder(<span class="hljs-string">'cornell-movie-review-data/rotten_tomatoes'</span>) | |
| <span class="hljs-meta">>>> </span>builder.download_and_prepare() | |
| <span class="hljs-meta">>>> </span>ds = builder.as_dataset(split=<span class="hljs-string">'train'</span>) | |
| <span class="hljs-meta">>>> </span>ds | |
| Dataset({ | |
| features: [<span class="hljs-string">'text'</span>, <span class="hljs-string">'label'</span>], | |
| num_rows: <span class="hljs-number">8530</span> | |
| })`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function hr(v){let a,w="Download and prepare the dataset as Arrow files that can be loaded as a Dataset using <code>builder.as_dataset()</code>:",i,s,d;return s=new U({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIlMEFidWlsZGVyJTIwJTNEJTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIoJTIyY29ybmVsbC1tb3ZpZS1yZXZpZXctZGF0YSUyRnJvdHRlbl90b21hdG9lcyUyMiklMEFidWlsZGVyLmRvd25sb2FkX2FuZF9wcmVwYXJlKCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset_builder | |
| <span class="hljs-meta">>>> </span>builder = load_dataset_builder(<span class="hljs-string">"cornell-movie-review-data/rotten_tomatoes"</span>) | |
| <span class="hljs-meta">>>> </span>builder.download_and_prepare()`,wrap:!1}}),{c(){a=c("p"),a.innerHTML=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-i6fpq7"&&(a.innerHTML=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function _r(v){let a,w="Download and prepare the dataset as sharded Parquet files locally:",i,s,d;return s=new U({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIlMEFidWlsZGVyJTIwJTNEJTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIoJTIyY29ybmVsbC1tb3ZpZS1yZXZpZXctZGF0YSUyRnJvdHRlbl90b21hdG9lcyUyMiklMEFidWlsZGVyLmRvd25sb2FkX2FuZF9wcmVwYXJlKCUyMi4lMkZvdXRwdXRfZGlyJTIyJTJDJTIwZmlsZV9mb3JtYXQlM0QlMjJwYXJxdWV0JTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset_builder | |
| <span class="hljs-meta">>>> </span>builder = load_dataset_builder(<span class="hljs-string">"cornell-movie-review-data/rotten_tomatoes"</span>) | |
| <span class="hljs-meta">>>> </span>builder.download_and_prepare(<span class="hljs-string">"./output_dir"</span>, file_format=<span class="hljs-string">"parquet"</span>)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-1035kd7"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function br(v){let a,w="Download and prepare the dataset as sharded Parquet files in a cloud storage:",i,s,d;return s=new U({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0X2J1aWxkZXIlMEFzdG9yYWdlX29wdGlvbnMlMjAlM0QlMjAlN0IlMjJrZXklMjIlM0ElMjBhd3NfYWNjZXNzX2tleV9pZCUyQyUyMCUyMnNlY3JldCUyMiUzQSUyMGF3c19zZWNyZXRfYWNjZXNzX2tleSU3RCUwQWJ1aWxkZXIlMjAlM0QlMjBsb2FkX2RhdGFzZXRfYnVpbGRlciglMjJjb3JuZWxsLW1vdmllLXJldmlldy1kYXRhJTJGcm90dGVuX3RvbWF0b2VzJTIyKSUwQWJ1aWxkZXIuZG93bmxvYWRfYW5kX3ByZXBhcmUoJTIyczMlM0ElMkYlMkZteS1idWNrZXQlMkZteV9yb3R0ZW5fdG9tYXRvZXMlMjIlMkMlMjBzdG9yYWdlX29wdGlvbnMlM0RzdG9yYWdlX29wdGlvbnMlMkMlMjBmaWxlX2Zvcm1hdCUzRCUyMnBhcnF1ZXQlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset_builder | |
| <span class="hljs-meta">>>> </span>storage_options = {<span class="hljs-string">"key"</span>: aws_access_key_id, <span class="hljs-string">"secret"</span>: aws_secret_access_key} | |
| <span class="hljs-meta">>>> </span>builder = load_dataset_builder(<span class="hljs-string">"cornell-movie-review-data/rotten_tomatoes"</span>) | |
| <span class="hljs-meta">>>> </span>builder.download_and_prepare(<span class="hljs-string">"s3://my-bucket/my_rotten_tomatoes"</span>, storage_options=storage_options, file_format=<span class="hljs-string">"parquet"</span>)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-y7m7rz"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function $r(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZG93bmxvYWRlZF9maWxlcyUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicp",highlighted:'<span class="hljs-meta">>>> </span>downloaded_files = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>)',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function xr(v){let a,w="Is roughly equivalent to:",i,s,d;return s=new U({props:{code:"ZXh0cmFjdGVkX3BhdGhzJTIwJTNEJTIwZGxfbWFuYWdlci5leHRyYWN0KGRsX21hbmFnZXIuZG93bmxvYWQodXJsX29yX3VybHMpKQ==",highlighted:'<span class="hljs-attr">extracted_paths</span> = dl_manager.extract(dl_manager.download(url_or_urls))',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-yva38v"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function yr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZG93bmxvYWRlZF9maWxlcyUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicpJTBBZXh0cmFjdGVkX2ZpbGVzJTIwJTNEJTIwZGxfbWFuYWdlci5leHRyYWN0KGRvd25sb2FkZWRfZmlsZXMp",highlighted:`<span class="hljs-meta">>>> </span>downloaded_files = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>) | |
| <span class="hljs-meta">>>> </span>extracted_files = dl_manager.extract(downloaded_files)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function wr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"YXJjaGl2ZSUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicpJTBBZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLml0ZXJfYXJjaGl2ZShhcmNoaXZlKQ==",highlighted:`<span class="hljs-meta">>>> </span>archive = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>) | |
| <span class="hljs-meta">>>> </span>files = dl_manager.iter_archive(archive)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function vr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZF9leHRyYWN0KCdodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZBSS1MYWItTWFrZXJlcmUlMkZiZWFucyUyRnJlc29sdmUlMkZtYWluJTJGZGF0YSUyRnRyYWluLnppcCcpJTBBZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLml0ZXJfZmlsZXMoZmlsZXMp",highlighted:`<span class="hljs-meta">>>> </span>files = dl_manager.download_and_extract(<span class="hljs-string">'https://huggingface.co/datasets/AI-Lab-Makerere/beans/resolve/main/data/train.zip'</span>) | |
| <span class="hljs-meta">>>> </span>files = dl_manager.iter_files(files)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Mr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZG93bmxvYWRlZF9maWxlcyUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicp",highlighted:'<span class="hljs-meta">>>> </span>downloaded_files = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>)',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Tr(v){let a,w="Is equivalent to:",i,s,d;return s=new U({props:{code:"dXJscyUyMCUzRCUyMGRsX21hbmFnZXIuZXh0cmFjdChkbF9tYW5hZ2VyLmRvd25sb2FkKHVybF9vcl91cmxzKSk=",highlighted:'<span class="hljs-attribute">urls</span> <span class="hljs-operator">=</span> dl_manager.extract(dl_manager.download(url_or_urls))',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-b0lbw9"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Jr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZG93bmxvYWRlZF9maWxlcyUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicpJTBBZXh0cmFjdGVkX2ZpbGVzJTIwJTNEJTIwZGxfbWFuYWdlci5leHRyYWN0KGRvd25sb2FkZWRfZmlsZXMp",highlighted:`<span class="hljs-meta">>>> </span>downloaded_files = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>) | |
| <span class="hljs-meta">>>> </span>extracted_files = dl_manager.extract(downloaded_files)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Cr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"YXJjaGl2ZSUyMCUzRCUyMGRsX21hbmFnZXIuZG93bmxvYWQoJ2h0dHBzJTNBJTJGJTJGc3RvcmFnZS5nb29nbGVhcGlzLmNvbSUyRnNlbGRvbi1kYXRhc2V0cyUyRnNlbnRlbmNlX3BvbGFyaXR5X3YxJTJGcnQtcG9sYXJpdHlkYXRhLnRhci5neicpJTBBZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLml0ZXJfYXJjaGl2ZShhcmNoaXZlKQ==",highlighted:`<span class="hljs-meta">>>> </span>archive = dl_manager.download(<span class="hljs-string">'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz'</span>) | |
| <span class="hljs-meta">>>> </span>files = dl_manager.iter_archive(archive)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function kr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZF9leHRyYWN0KCdodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZBSS1MYWItTWFrZXJlcmUlMkZiZWFucyUyRnJlc29sdmUlMkZtYWluJTJGZGF0YSUyRnRyYWluLnppcCcpJTBBZmlsZXMlMjAlM0QlMjBkbF9tYW5hZ2VyLml0ZXJfZmlsZXMoZmlsZXMp",highlighted:`<span class="hljs-meta">>>> </span>files = dl_manager.download_and_extract(<span class="hljs-string">'https://huggingface.co/datasets/AI-Lab-Makerere/beans/resolve/main/data/train.zip'</span>) | |
| <span class="hljs-meta">>>> </span>files = dl_manager.iter_files(files)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Ur(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZGF0YXNldHMuU3BsaXRHZW5lcmF0b3IoJTBBJTIwJTIwJTIwJTIwbmFtZSUzRGRhdGFzZXRzLlNwbGl0LlRSQUlOJTJDJTBBJTIwJTIwJTIwJTIwZ2VuX2t3YXJncyUzRCU3QiUyMnNwbGl0X2tleSUyMiUzQSUyMCUyMnRyYWluJTIyJTJDJTIwJTIyZmlsZXMlMjIlM0ElMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZF9leHRyYWN0KHVybCklN0QlMkMlMEEp",highlighted:`<span class="hljs-meta">>>> </span>datasets.SplitGenerator( | |
| <span class="hljs-meta">... </span> name=datasets.Split.TRAIN, | |
| <span class="hljs-meta">... </span> gen_kwargs={<span class="hljs-string">"split_key"</span>: <span class="hljs-string">"train"</span>, <span class="hljs-string">"files"</span>: dl_manager.download_and_extract(url)}, | |
| <span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Rr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"ZGF0YXNldHMuU3BsaXRHZW5lcmF0b3IoJTBBJTIwJTIwJTIwJTIwbmFtZSUzRGRhdGFzZXRzLlNwbGl0LlRSQUlOJTJDJTBBJTIwJTIwJTIwJTIwZ2VuX2t3YXJncyUzRCU3QiUyMnNwbGl0X2tleSUyMiUzQSUyMCUyMnRyYWluJTIyJTJDJTIwJTIyZmlsZXMlMjIlM0ElMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZCUyMGV4dHJhY3QodXJsKSU3RCUyQyUwQSklMkMlMEFkYXRhc2V0cy5TcGxpdEdlbmVyYXRvciglMEElMjAlMjAlMjAlMjBuYW1lJTNEZGF0YXNldHMuU3BsaXQuVkFMSURBVElPTiUyQyUwQSUyMCUyMCUyMCUyMGdlbl9rd2FyZ3MlM0QlN0IlMjJzcGxpdF9rZXklMjIlM0ElMjAlMjJ2YWxpZGF0aW9uJTIyJTJDJTIwJTIyZmlsZXMlMjIlM0ElMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZCUyMGV4dHJhY3QodXJsKSU3RCUyQyUwQSklMkMlMEFkYXRhc2V0cy5TcGxpdEdlbmVyYXRvciglMEElMjAlMjAlMjAlMjBuYW1lJTNEZGF0YXNldHMuU3BsaXQuVEVTVCUyQyUwQSUyMCUyMCUyMCUyMGdlbl9rd2FyZ3MlM0QlN0IlMjJzcGxpdF9rZXklMjIlM0ElMjAlMjJ0ZXN0JTIyJTJDJTIwJTIyZmlsZXMlMjIlM0ElMjBkbF9tYW5hZ2VyLmRvd25sb2FkX2FuZCUyMGV4dHJhY3QodXJsKSU3RCUyQyUwQSk=",highlighted:`<span class="hljs-meta">>>> </span>datasets.SplitGenerator( | |
| <span class="hljs-meta">... </span> name=datasets.Split.TRAIN, | |
| <span class="hljs-meta">... </span> gen_kwargs={<span class="hljs-string">"split_key"</span>: <span class="hljs-string">"train"</span>, <span class="hljs-string">"files"</span>: dl_manager.download_and extract(url)}, | |
| <span class="hljs-meta">... </span>), | |
| <span class="hljs-meta">... </span>datasets.SplitGenerator( | |
| <span class="hljs-meta">... </span> name=datasets.Split.VALIDATION, | |
| <span class="hljs-meta">... </span> gen_kwargs={<span class="hljs-string">"split_key"</span>: <span class="hljs-string">"validation"</span>, <span class="hljs-string">"files"</span>: dl_manager.download_and extract(url)}, | |
| <span class="hljs-meta">... </span>), | |
| <span class="hljs-meta">... </span>datasets.SplitGenerator( | |
| <span class="hljs-meta">... </span> name=datasets.Split.TEST, | |
| <span class="hljs-meta">... </span> gen_kwargs={<span class="hljs-string">"split_key"</span>: <span class="hljs-string">"test"</span>, <span class="hljs-string">"files"</span>: dl_manager.download_and extract(url)}, | |
| <span class="hljs-meta">... </span>)`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function jr(v){let a,w="Each descriptor can be composed with other using addition or slice:",i,s,d;return s=new U({props:{code:"c3BsaXQlMjAlM0QlMjBkYXRhc2V0cy5TcGxpdC5UUkFJTi5zdWJzcGxpdChkYXRhc2V0cy5wZXJjZW50JTVCMCUzQTI1JTVEKSUyMCUyQiUyMGRhdGFzZXRzLlNwbGl0LlRFU1Q=",highlighted:'split = datasets.Split.TRAIN.subsplit(datasets.percent[<span class="hljs-number">0</span>:<span class="hljs-number">25</span>]) + datasets.Split.TEST',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-in376m"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Nr(v){let a,w="A split cannot be added twice, so the following will fail:",i,s,d;return s=new U({props:{code:"c3BsaXQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZGF0YXNldHMuU3BsaXQuVFJBSU4uc3Vic3BsaXQoZGF0YXNldHMucGVyY2VudCU1QiUzQTI1JTVEKSUyMCUyQiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGRhdGFzZXRzLlNwbGl0LlRSQUlOLnN1YnNwbGl0KGRhdGFzZXRzLnBlcmNlbnQlNUI3NSUzQSU1RCklMEEpJTIwJTIwJTIzJTIwRXJyb3IlMEFzcGxpdCUyMCUzRCUyMGRhdGFzZXRzLlNwbGl0LlRFU1QlMjAlMkIlMjBkYXRhc2V0cy5TcGxpdC5BTEwlMjAlMjAlMjMlMjBFcnJvcg==",highlighted:`split = ( | |
| datasets.Split.TRAIN.subsplit(datasets.percent[:<span class="hljs-number">25</span>]) + | |
| datasets.Split.TRAIN.subsplit(datasets.percent[<span class="hljs-number">75</span>:]) | |
| ) <span class="hljs-comment"># Error</span> | |
| split = datasets.Split.TEST + datasets.Split.ALL <span class="hljs-comment"># Error</span>`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-1dn84z5"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Gr(v){let a,w="The slices can be applied only one time. So the following are valid:",i,s,d;return s=new U({props:{code:"c3BsaXQlMjAlM0QlMjAoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZGF0YXNldHMuU3BsaXQuVFJBSU4uc3Vic3BsaXQoZGF0YXNldHMucGVyY2VudCU1QiUzQTI1JTVEKSUyMCUyQiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGRhdGFzZXRzLlNwbGl0LlRFU1Quc3Vic3BsaXQoZGF0YXNldHMucGVyY2VudCU1QiUzQTUwJTVEKSUwQSklMEFzcGxpdCUyMCUzRCUyMChkYXRhc2V0cy5TcGxpdC5UUkFJTiUyMCUyQiUyMGRhdGFzZXRzLlNwbGl0LlRFU1QpLnN1YnNwbGl0KGRhdGFzZXRzLnBlcmNlbnQlNUIlM0E1MCU1RCk=",highlighted:`split = ( | |
| datasets.Split.TRAIN.subsplit(datasets.percent[:<span class="hljs-number">25</span>]) + | |
| datasets.Split.TEST.subsplit(datasets.percent[:<span class="hljs-number">50</span>]) | |
| ) | |
| split = (datasets.Split.TRAIN + datasets.Split.TEST).subsplit(datasets.percent[:<span class="hljs-number">50</span>])`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-f377rx"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Zr(v){let a,w="But this is not valid:",i,s,d;return s=new U({props:{code:"dHJhaW4lMjAlM0QlMjBkYXRhc2V0cy5TcGxpdC5UUkFJTiUwQXRlc3QlMjAlM0QlMjBkYXRhc2V0cy5TcGxpdC5URVNUJTBBc3BsaXQlMjAlM0QlMjB0cmFpbi5zdWJzcGxpdChkYXRhc2V0cy5wZXJjZW50JTVCJTNBMjUlNUQpLnN1YnNwbGl0KGRhdGFzZXRzLnBlcmNlbnQlNUIlM0EyNSU1RCklMEFzcGxpdCUyMCUzRCUyMCh0cmFpbi5zdWJzcGxpdChkYXRhc2V0cy5wZXJjZW50JTVCJTNBMjUlNUQpJTIwJTJCJTIwdGVzdCkuc3Vic3BsaXQoZGF0YXNldHMucGVyY2VudCU1QiUzQTUwJTVEKQ==",highlighted:`train = datasets.Split.TRAIN | |
| test = datasets.Split.TEST | |
| split = train.subsplit(datasets.percent[:<span class="hljs-number">25</span>]).subsplit(datasets.percent[:<span class="hljs-number">25</span>]) | |
| split = (train.subsplit(datasets.percent[:<span class="hljs-number">25</span>]) + test).subsplit(datasets.percent[:<span class="hljs-number">50</span>])`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-1jf0xuu"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Xr(v){let a,w="Examples:",i,s,d;return s=new U({props:{code:"JTIzJTIwVGhlJTIwZm9sbG93aW5nJTIwbGluZXMlMjBhcmUlMjBlcXVpdmFsZW50JTNBJTBBZHMlMjAlM0QlMjBkYXRhc2V0cy5sb2FkX2RhdGFzZXQoJ3lsZWN1biUyRm1uaXN0JyUyQyUyMHNwbGl0JTNEJ3Rlc3QlNUIlM0EzMyUyNSU1RCcpJTBBZHMlMjAlM0QlMjBkYXRhc2V0cy5sb2FkX2RhdGFzZXQoJ3lsZWN1biUyRm1uaXN0JyUyQyUyMHNwbGl0JTNEZGF0YXNldHMuUmVhZEluc3RydWN0aW9uLmZyb21fc3BlYygndGVzdCU1QiUzQTMzJTI1JTVEJykpJTBBZHMlMjAlM0QlMjBkYXRhc2V0cy5sb2FkX2RhdGFzZXQoJ3lsZWN1biUyRm1uaXN0JyUyQyUyMHNwbGl0JTNEZGF0YXNldHMuUmVhZEluc3RydWN0aW9uKCd0ZXN0JyUyQyUyMHRvJTNEMzMlMkMlMjB1bml0JTNEJyUyNScpKSUwQWRzJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KCd5bGVjdW4lMkZtbmlzdCclMkMlMjBzcGxpdCUzRGRhdGFzZXRzLlJlYWRJbnN0cnVjdGlvbiglMEEndGVzdCclMkMlMjBmcm9tXyUzRDAlMkMlMjB0byUzRDMzJTJDJTIwdW5pdCUzRCclMjUnKSklMEElMEElMjMlMjBUaGUlMjBmb2xsb3dpbmclMjBsaW5lcyUyMGFyZSUyMGVxdWl2YWxlbnQlM0ElMEFkcyUyMCUzRCUyMGRhdGFzZXRzLmxvYWRfZGF0YXNldCgneWxlY3VuJTJGbW5pc3QnJTJDJTIwc3BsaXQlM0QndGVzdCU1QiUzQTMzJTI1JTVEJTJCdHJhaW4lNUIxJTNBLTElNUQnKSUwQWRzJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KCd5bGVjdW4lMkZtbmlzdCclMkMlMjBzcGxpdCUzRGRhdGFzZXRzLlJlYWRJbnN0cnVjdGlvbi5mcm9tX3NwZWMoJTBBJ3Rlc3QlNUIlM0EzMyUyNSU1RCUyQnRyYWluJTVCMSUzQS0xJTVEJykpJTBBZHMlMjAlM0QlMjBkYXRhc2V0cy5sb2FkX2RhdGFzZXQoJ3lsZWN1biUyRm1uaXN0JyUyQyUyMHNwbGl0JTNEKCUwQWRhdGFzZXRzLlJlYWRJbnN0cnVjdGlvbigndGVzdCclMkMlMjB0byUzRDMzJTJDJTIwdW5pdCUzRCclMjUnKSUyMCUyQiUwQWRhdGFzZXRzLlJlYWRJbnN0cnVjdGlvbigndHJhaW4nJTJDJTIwZnJvbV8lM0QxJTJDJTIwdG8lM0QtMSUyQyUyMHVuaXQlM0QnYWJzJykpKSUwQSUwQSUyMyUyMFRoZSUyMGZvbGxvd2luZyUyMGxpbmVzJTIwYXJlJTIwZXF1aXZhbGVudCUzQSUwQWRzJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KCd5bGVjdW4lMkZtbmlzdCclMkMlMjBzcGxpdCUzRCd0ZXN0JTVCJTNBMzMlMjUlNUQocGN0MV9kcm9wcmVtYWluZGVyKScpJTBBZHMlMjAlM0QlMjBkYXRhc2V0cy5sb2FkX2RhdGFzZXQoJ3lsZWN1biUyRm1uaXN0JyUyQyUyMHNwbGl0JTNEZGF0YXNldHMuUmVhZEluc3RydWN0aW9uLmZyb21fc3BlYyglMEEndGVzdCU1QiUzQTMzJTI1JTVEKHBjdDFfZHJvcHJlbWFpbmRlciknKSklMEFkcyUyMCUzRCUyMGRhdGFzZXRzLmxvYWRfZGF0YXNldCgneWxlY3VuJTJGbW5pc3QnJTJDJTIwc3BsaXQlM0RkYXRhc2V0cy5SZWFkSW5zdHJ1Y3Rpb24oJTBBJ3Rlc3QnJTJDJTIwZnJvbV8lM0QwJTJDJTIwdG8lM0QzMyUyQyUyMHVuaXQlM0QnJTI1JyUyQyUyMHJvdW5kaW5nJTNEJTIycGN0MV9kcm9wcmVtYWluZGVyJTIyKSklMEElMEElMjMlMjAxMC1mb2xkJTIwdmFsaWRhdGlvbiUzQSUwQXRlc3RzJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KCUwQSd5bGVjdW4lMkZtbmlzdCclMkMlMEElNUJkYXRhc2V0cy5SZWFkSW5zdHJ1Y3Rpb24oJ3RyYWluJyUyQyUyMGZyb21fJTNEayUyQyUyMHRvJTNEayUyQjEwJTJDJTIwdW5pdCUzRCclMjUnKSUwQWZvciUyMGslMjBpbiUyMHJhbmdlKDAlMkMlMjAxMDAlMkMlMjAxMCklNUQpJTBBdHJhaW5zJTIwJTNEJTIwZGF0YXNldHMubG9hZF9kYXRhc2V0KCUwQSd5bGVjdW4lMkZtbmlzdCclMkMlMEElNUJkYXRhc2V0cy5SZWFkSW5zdHJ1Y3Rpb24oJ3RyYWluJyUyQyUyMHRvJTNEayUyQyUyMHVuaXQlM0QnJTI1JyklMjAlMkIlMjBkYXRhc2V0cy5SZWFkSW5zdHJ1Y3Rpb24oJ3RyYWluJyUyQyUyMGZyb21fJTNEayUyQjEwJTJDJTIwdW5pdCUzRCclMjUnKSUwQWZvciUyMGslMjBpbiUyMHJhbmdlKDAlMkMlMjAxMDAlMkMlMjAxMCklNUQp",highlighted:`<span class="hljs-comment"># The following lines are equivalent:</span> | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=<span class="hljs-string">'test[:33%]'</span>) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction.from_spec(<span class="hljs-string">'test[:33%]'</span>)) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction(<span class="hljs-string">'test'</span>, to=<span class="hljs-number">33</span>, unit=<span class="hljs-string">'%'</span>)) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction( | |
| <span class="hljs-string">'test'</span>, from_=<span class="hljs-number">0</span>, to=<span class="hljs-number">33</span>, unit=<span class="hljs-string">'%'</span>)) | |
| <span class="hljs-comment"># The following lines are equivalent:</span> | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=<span class="hljs-string">'test[:33%]+train[1:-1]'</span>) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction.from_spec( | |
| <span class="hljs-string">'test[:33%]+train[1:-1]'</span>)) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=( | |
| datasets.ReadInstruction(<span class="hljs-string">'test'</span>, to=<span class="hljs-number">33</span>, unit=<span class="hljs-string">'%'</span>) + | |
| datasets.ReadInstruction(<span class="hljs-string">'train'</span>, from_=<span class="hljs-number">1</span>, to=-<span class="hljs-number">1</span>, unit=<span class="hljs-string">'abs'</span>))) | |
| <span class="hljs-comment"># The following lines are equivalent:</span> | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=<span class="hljs-string">'test[:33%](pct1_dropremainder)'</span>) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction.from_spec( | |
| <span class="hljs-string">'test[:33%](pct1_dropremainder)'</span>)) | |
| ds = datasets.load_dataset(<span class="hljs-string">'ylecun/mnist'</span>, split=datasets.ReadInstruction( | |
| <span class="hljs-string">'test'</span>, from_=<span class="hljs-number">0</span>, to=<span class="hljs-number">33</span>, unit=<span class="hljs-string">'%'</span>, rounding=<span class="hljs-string">"pct1_dropremainder"</span>)) | |
| <span class="hljs-comment"># 10-fold validation:</span> | |
| tests = datasets.load_dataset( | |
| <span class="hljs-string">'ylecun/mnist'</span>, | |
| [datasets.ReadInstruction(<span class="hljs-string">'train'</span>, from_=k, to=k+<span class="hljs-number">10</span>, unit=<span class="hljs-string">'%'</span>) | |
| <span class="hljs-keyword">for</span> k <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, <span class="hljs-number">100</span>, <span class="hljs-number">10</span>)]) | |
| trains = datasets.load_dataset( | |
| <span class="hljs-string">'ylecun/mnist'</span>, | |
| [datasets.ReadInstruction(<span class="hljs-string">'train'</span>, to=k, unit=<span class="hljs-string">'%'</span>) + datasets.ReadInstruction(<span class="hljs-string">'train'</span>, from_=k+<span class="hljs-number">10</span>, unit=<span class="hljs-string">'%'</span>) | |
| <span class="hljs-keyword">for</span> k <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, <span class="hljs-number">100</span>, <span class="hljs-number">10</span>)])`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-kvfsh7"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Ir(v){let a,w="Examples:",i,s,d;return s=new U({props:{code:"dGVzdCUzQSUyMHRlc3QlMjBzcGxpdC4lMEF0ZXN0JTIwJTJCJTIwdmFsaWRhdGlvbiUzQSUyMHRlc3QlMjBzcGxpdCUyMCUyQiUyMHZhbGlkYXRpb24lMjBzcGxpdC4lMEF0ZXN0JTVCMTAlM0ElNUQlM0ElMjB0ZXN0JTIwc3BsaXQlMkMlMjBtaW51cyUyMGl0cyUyMGZpcnN0JTIwMTAlMjByZWNvcmRzLiUwQXRlc3QlNUIlM0ExMCUyNSU1RCUzQSUyMGZpcnN0JTIwMTAlMjUlMjByZWNvcmRzJTIwb2YlMjB0ZXN0JTIwc3BsaXQuJTBBdGVzdCU1QiUzQTIwJTI1JTVEKHBjdDFfZHJvcHJlbWFpbmRlciklM0ElMjBmaXJzdCUyMDEwJTI1JTIwcmVjb3JkcyUyQyUyMHJvdW5kZWQlMjB3aXRoJTIwdGhlJTIwcGN0MV9kcm9wcmVtYWluZGVyJTIwcm91bmRpbmcuJTBBdGVzdCU1QiUzQS01JTI1JTVEJTJCdHJhaW4lNUI0MCUyNSUzQTYwJTI1JTVEJTNBJTIwZmlyc3QlMjA5NSUyNSUyMG9mJTIwdGVzdCUyMCUyQiUyMG1pZGRsZSUyMDIwJTI1JTIwb2YlMjB0cmFpbi4=",highlighted:`<span class="hljs-keyword">test: test</span> split. | |
| <span class="hljs-keyword">test </span>+ validation: test split + validation split. | |
| test[10:]: test split, minus its first 10 records. | |
| test[:10%]: first 10% records of test split. | |
| test[:20%](pct1_dropremainder): first 10% records, rounded with the pct1_dropremainder rounding. | |
| test[:<span class="hljs-string">-5</span>%]+train[40%:60%]: first 95% of test + middle 20% of train.`,wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-kvfsh7"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Dr(v){let a,w="Example:",i,s,d;return s=new U({props:{code:"VkVSU0lPTiUyMCUzRCUyMGRhdGFzZXRzLlZlcnNpb24oJTIyMS4wLjAlMjIp",highlighted:'<span class="hljs-meta">>>> </span>VERSION = datasets.Version(<span class="hljs-string">"1.0.0"</span>)',wrap:!1}}),{c(){a=c("p"),a.textContent=w,i=o(),f(s.$$.fragment)},l(e){a=p(e,"P",{"data-svelte-h":!0}),y(a)!=="svelte-11lpom8"&&(a.textContent=w),i=r(e),h(s.$$.fragment,e)},m(e,g){u(e,a,g),u(e,i,g),_(s,e,g),d=!0},p:k,i(e){d||(b(s.$$.fragment,e),d=!0)},o(e){$(s.$$.fragment,e),d=!1},d(e){e&&(l(a),l(i)),x(s,e)}}}function Br(v){let a,w,i,s,d,e,g,La,et,Ha,tt,oo='🤗 Datasets relies on two main classes during the dataset building process: <a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.DatasetBuilder">DatasetBuilder</a> and <a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.BuilderConfig">BuilderConfig</a>.',Aa,R,at,bs,St,ro="Abstract base class for all datasets.",$s,zt,lo="<code>DatasetBuilder</code> has 3 key methods:",xs,Et,io=`<li><code>DatasetBuilder.info</code>: Documents the dataset, including feature | |
| names, types, shapes, version, splits, citation, etc.</li> <li><a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.DatasetBuilder.download_and_prepare">DatasetBuilder.download_and_prepare()</a>: Downloads the source data | |
| and writes it to disk.</li> <li><a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.DatasetBuilder.as_dataset">DatasetBuilder.as_dataset()</a>: Generates a <a href="/docs/datasets/pr_8113/en/package_reference/main_classes#datasets.Dataset">Dataset</a>.</li>`,ys,Qt,co=`Some <code>DatasetBuilder</code>s expose multiple variants of the | |
| dataset by defining a <a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.BuilderConfig">BuilderConfig</a> subclass and accepting a | |
| config object (or name) on construction. Configurable datasets expose a | |
| pre-defined set of configurations in <code>DatasetBuilder.builder_configs()</code>.`,ws,re,st,vs,Yt,po="Return a Dataset for the specified split.",Ms,Ue,Ts,D,nt,Js,Wt,mo="Downloads and prepares dataset for reading.",Cs,Lt,go="Example:",ks,Re,Us,je,Rs,Ne,js,Ge,ot,Ns,Ht,uo="Return the path of the module of this class or subclass.",Pa,K,rt,Gs,At,fo="Base class for datasets with data generation based on dict generators.",Zs,Pt,ho=`<code>GeneratorBasedBuilder</code> is a convenience class that abstracts away much | |
| of the data writing and reading of <code>DatasetBuilder</code>. It expects subclasses to | |
| implement generators of feature dictionaries across the dataset splits | |
| (<code>_split_generators</code>). See the method docstrings for details.`,qa,_e,lt,Xs,qt,_o="Base class for datasets with data generation based on Arrow loading functions (CSV/JSON/Parquet).",Oa,Y,dt,Is,Ot,bo="Base class for <code>DatasetBuilder</code> data configuration.",Ds,Kt,$o=`<code>DatasetBuilder</code> subclasses with data configuration options should subclass | |
| <code>BuilderConfig</code> and add their own properties.`,Bs,A,it,Fs,ea,xo=`The config id is used to build the cache directory. | |
| By default it is equal to the config name. | |
| However the name of a config is not sufficient to have a unique identifier for the dataset being generated | |
| since it doesn’t take into account:`,Vs,ta,yo="<li>the config kwargs that can be used to overwrite attributes</li> <li>the custom features used to write the dataset</li> <li>the data_files for json/text/csv/pandas datasets</li>",Ss,aa,wo="Therefore the config id is just the config name with an optional suffix based on these.",Ka,ct,es,X,pt,zs,P,mt,Es,sa,vo="Download given URL(s).",Qs,na,Mo="By default, only one process is used for download. Pass customized <code>download_config.num_proc</code> to change this behavior.",Ys,Ze,Ws,le,gt,Ls,oa,To="Download and extract given <code>url_or_urls</code>.",Hs,Xe,As,de,ut,Ps,ra,Jo="Extract given path(s).",qs,Ie,Os,ie,ft,Ks,la,Co="Iterate over files within an archive.",en,De,tn,ce,ht,an,da,ko="Iterate over file paths.",sn,Be,ts,N,_t,nn,ia,Uo=`Download manager that uses the ”::” separator to navigate through (possibly remote) compressed archives. | |
| Contrary to the regular <code>DownloadManager</code>, the <code>download</code> and <code>extract</code> methods don’t actually download nor extract | |
| data, but they rather return the path or url that could be opened using the <code>xopen</code> function which extends the | |
| built-in <code>open</code> function to stream data from remote files.`,on,pe,bt,rn,ca,Ro=`Normalize URL(s) of files to stream data from. | |
| This is the lazy version of <code>DownloadManager.download</code> for streaming.`,ln,Fe,dn,q,$t,cn,pa,jo="Prepare given <code>url_or_urls</code> for streaming (add extraction protocol).",pn,ma,No="This is the lazy version of <code>DownloadManager.download_and_extract</code> for streaming.",mn,Ve,gn,O,xt,un,ga,Go="Add extraction protocol for given url(s) for streaming.",fn,ua,Zo="This is the lazy version of <code>DownloadManager.extract</code> for streaming.",hn,Se,_n,me,yt,bn,fa,Xo="Iterate over files within an archive.",$n,ze,xn,ge,wt,yn,ha,Io="Iterate over files.",wn,Ee,as,be,vt,vn,_a,Do="Configuration for our cached path manager.",ss,F,Mt,Mn,ba,Bo="<code>Enum</code> for how to treat pre-existing downloads and data.",Tn,$a,Fo=`The default mode is <code>REUSE_DATASET_IF_EXISTS</code>, which will reuse both | |
| raw downloads and the prepared dataset if they exist.`,Jn,xa,Vo="The generations modes:",Cn,ya,So="<thead><tr><th></th> <th>Downloads</th> <th>Dataset</th></tr></thead> <tbody><tr><td><code>REUSE_DATASET_IF_EXISTS</code> (default)</td> <td>Reuse</td> <td>Reuse</td></tr> <tr><td><code>REUSE_CACHE_IF_EXISTS</code></td> <td>Reuse</td> <td>Fresh</td></tr> <tr><td><code>FORCE_REDOWNLOAD</code></td> <td>Fresh</td> <td>Fresh</td></tr></tbody>",ns,Tt,os,V,Jt,kn,wa,zo="<code>Enum</code> that specifies which verification checks to run.",Un,va,Eo=`The default mode is <code>BASIC_CHECKS</code>, which will perform only rudimentary checks to avoid slowdowns | |
| when generating/downloading a dataset for the first time.`,Rn,Ma,Qo="The verification modes:",jn,Ta,Yo="<thead><tr><th></th> <th>Verification checks</th></tr></thead> <tbody><tr><td><code>ALL_CHECKS</code></td> <td>Split checks and validity (number of files, checksums) of downloaded files</td></tr> <tr><td><code>BASIC_CHECKS</code> (default)</td> <td>Same as <code>ALL_CHECKS</code> but without checking downloaded files</td></tr> <tr><td><code>NO_CHECKS</code></td> <td>None</td></tr></tbody>",rs,Ct,ls,W,kt,Nn,Ja,Wo="Defines the split information for the generator.",Gn,Ca,Lo=`This should be used as returned value of | |
| <code>GeneratorBasedBuilder._split_generators</code>. | |
| See <code>GeneratorBasedBuilder._split_generators</code> for more info and example | |
| of usage.`,Zn,Qe,ds,G,Ut,Xn,ka,Ho="<code>Enum</code> for dataset splits.",In,Ua,Ao=`Datasets are typically split into different subsets to be used at various | |
| stages of training and evaluation.`,Dn,Ra,Po=`<li><code>TRAIN</code>: the training data.</li> <li><code>VALIDATION</code>: the validation data. If present, this is typically used as | |
| evaluation data while iterating on a model (e.g. changing hyperparameters, | |
| model architecture, etc.).</li> <li><code>TEST</code>: the testing data. This is the data to report metrics on. Typically | |
| you do not want to use this during model iteration as you may overfit to it.</li> <li><code>ALL</code>: the union of all defined dataset splits.</li>`,Bn,ja,qo="All splits, including compositions inherit from <code>datasets.SplitBase</code>.",Fn,Na,Oo='See the <a href="../load_hub#splits">guide</a> on splits for more information.',Vn,Ye,is,j,Rt,Sn,Ga,Ko="Descriptor corresponding to a named split (train, test, …).",zn,Za,er="Example:",En,We,Qn,Xa,tr=`The resulting split will correspond to 25% of the train split merged with | |
| 100% of the test split.`,Yn,Le,Wn,He,Ln,Ae,cs,$e,jt,Hn,Ia,ar="Split corresponding to the union of all defined dataset splits.",ps,S,Nt,An,Da,sr="Reading instruction for a dataset.",Pn,Pe,qn,ue,Gt,On,Ba,nr="Creates a <code>ReadInstruction</code> instance out of a string spec.",Kn,qe,eo,fe,Zt,to,Fa,or="Translate instruction into a list of absolute instructions.",ao,Va,rr="Those absolute instructions are then to be added together.",ms,Xt,gs,ee,It,so,Sa,lr="Dataset version <code>MAJOR.MINOR.PATCH</code>.",no,Oe,us,Dt,fs,Wa,hs;return d=new gr({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),g=new Ya({props:{title:"Builder classes",local:"builder-classes",headingTag:"h1"}}),et=new Ya({props:{title:"Builders",local:"datasets.DatasetBuilder",headingTag:"h2"}}),at=new J({props:{name:"class datasets.DatasetBuilder",anchor:"datasets.DatasetBuilder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],parametersDescription:[{anchor:"datasets.DatasetBuilder.cache_dir",description:`<strong>cache_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Directory to cache data. Defaults to <code>"~/.cache/huggingface/datasets"</code>.`,name:"cache_dir"},{anchor:"datasets.DatasetBuilder.dataset_name",description:`<strong>dataset_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Name of the dataset, if different from the builder name. Useful for packaged builders | |
| like csv, imagefolder, audiofolder, etc. to reflect the difference between datasets | |
| that use the same packaged builder.`,name:"dataset_name"},{anchor:"datasets.DatasetBuilder.config_name",description:`<strong>config_name</strong> (<code>str</code>, <em>optional</em>) — | |
| Name of the dataset configuration. | |
| It affects the data generated on disk. Different configurations will have their own subdirectories and | |
| versions. | |
| If not provided, the default configuration is used (if it exists).</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.3.0</p> | |
| <p>Parameter <code>name</code> was renamed to <code>config_name</code>.</p> | |
| </div>`,name:"config_name"},{anchor:"datasets.DatasetBuilder.hash",description:`<strong>hash</strong> (<code>str</code>, <em>optional</em>) — | |
| Hash specific to the dataset builder code. Used to update the caching directory when the | |
| dataset builder code is updated (to avoid reusing old data). | |
| The typical caching directory (defined in <code>self._relative_data_dir</code>) is <code>name/version/hash/</code>.`,name:"hash"},{anchor:"datasets.DatasetBuilder.base_path",description:`<strong>base_path</strong> (<code>str</code>, <em>optional</em>) — | |
| Base path for relative paths that are used to download files. | |
| This can be a remote URL.`,name:"base_path"},{anchor:"datasets.DatasetBuilder.features",description:`<strong>features</strong> (<a href="/docs/datasets/pr_8113/en/package_reference/main_classes#datasets.Features">Features</a>, <em>optional</em>) — | |
| Features types to use with this dataset. | |
| It can be used to change the <a href="/docs/datasets/pr_8113/en/package_reference/main_classes#datasets.Features">Features</a> types of a dataset, for example.`,name:"features"},{anchor:"datasets.DatasetBuilder.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| String or boolean to use as Bearer token for remote files on the | |
| Datasets Hub. If <code>True</code>, will get token from <code>"~/.huggingface"</code>.`,name:"token"},{anchor:"datasets.DatasetBuilder.repo_id",description:`<strong>repo_id</strong> (<code>str</code>, <em>optional</em>) — | |
| ID of the dataset repository. | |
| Used to distinguish builders with the same name but not coming from the same namespace, for example “rajpurkar/squad” | |
| and “lhoestq/squad” repo IDs. In the latter, the builder name would be “lhoestq___squad”.`,name:"repo_id"},{anchor:"datasets.DatasetBuilder.data_files",description:`<strong>data_files</strong> (<code>str</code> or <code>Sequence</code> or <code>Mapping</code>, <em>optional</em>) — | |
| Path(s) to source data file(s). | |
| For builders like “csv” or “json” that need the user to specify data files. They can be either | |
| local or remote files. For convenience, you can use a <code>DataFilesDict</code>.`,name:"data_files"},{anchor:"datasets.DatasetBuilder.data_dir",description:`<strong>data_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to directory containing source data file(s). | |
| Use only if <code>data_files</code> is not passed, in which case it is equivalent to passing | |
| <code>os.path.join(data_dir, "**")</code> as <code>data_files</code>. | |
| For builders that require manual download, it must be the path to the local directory containing the | |
| manually downloaded data.`,name:"data_dir"},{anchor:"datasets.DatasetBuilder.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>) — | |
| Key/value pairs to be passed on to the dataset file-system backend, if any.`,name:"storage_options"},{anchor:"datasets.DatasetBuilder.writer_batch_size",description:`<strong>writer_batch_size</strong> (<code>int</code>, <em>optional</em>) — | |
| Batch size used by the ArrowWriter. | |
| It defines the number of samples that are kept in memory before writing them | |
| and also the length of the arrow chunks. | |
| None means that the ArrowWriter will use its default value.`,name:"writer_batch_size"},{anchor:"datasets.DatasetBuilder.*config_kwargs",description:`*<strong>*config_kwargs</strong> (additional keyword arguments) — Keyword arguments to be passed to the corresponding builder | |
| configuration class, set on the class attribute <a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.BuilderConfig">DatasetBuilder.BUILDER_CONFIG_CLASS</a>. The builder | |
| configuration class is <a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.BuilderConfig">BuilderConfig</a> or a subclass of it.`,name:"*config_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L209"}}),st=new J({props:{name:"as_dataset",anchor:"datasets.DatasetBuilder.as_dataset",parameters:[{name:"split",val:": typing.Union[str, datasets.splits.Split, list[str], list[datasets.splits.Split], NoneType] = None"},{name:"run_post_process",val:" = True"},{name:"verification_mode",val:": typing.Union[datasets.utils.info_utils.VerificationMode, str, NoneType] = None"},{name:"in_memory",val:" = False"}],parametersDescription:[{anchor:"datasets.DatasetBuilder.as_dataset.split",description:`<strong>split</strong> (<code>datasets.Split</code>) — | |
| Which subset of the data to return.`,name:"split"},{anchor:"datasets.DatasetBuilder.as_dataset.run_post_process",description:`<strong>run_post_process</strong> (<code>bool</code>, defaults to <code>True</code>) — | |
| Whether to run post-processing dataset transforms and/or add | |
| indexes.`,name:"run_post_process"},{anchor:"datasets.DatasetBuilder.as_dataset.verification_mode",description:`<strong>verification_mode</strong> (<a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.VerificationMode">VerificationMode</a> or <code>str</code>, defaults to <code>BASIC_CHECKS</code>) — | |
| Verification mode determining the checks to run on the | |
| downloaded/processed dataset information (checksums/size/splits/…).</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.9.1</p> | |
| </div>`,name:"verification_mode"},{anchor:"datasets.DatasetBuilder.as_dataset.in_memory",description:`<strong>in_memory</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| Whether to copy the data in-memory.`,name:"in_memory"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L992",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>datasets.Dataset</p> | |
| `}}),Ue=new C({props:{anchor:"datasets.DatasetBuilder.as_dataset.example",$$slots:{default:[fr]},$$scope:{ctx:v}}}),nt=new J({props:{name:"download_and_prepare",anchor:"datasets.DatasetBuilder.download_and_prepare",parameters:[{name:"output_dir",val:": typing.Optional[str] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"download_mode",val:": typing.Union[datasets.download.download_manager.DownloadMode, str, NoneType] = None"},{name:"verification_mode",val:": typing.Union[datasets.utils.info_utils.VerificationMode, str, NoneType] = None"},{name:"dl_manager",val:": typing.Optional[datasets.download.download_manager.DownloadManager] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"file_format",val:": str = 'arrow'"},{name:"max_shard_size",val:": typing.Union[str, int, NoneType] = None"},{name:"num_proc",val:": typing.Optional[int] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"**download_and_prepare_kwargs",val:""}],parametersDescription:[{anchor:"datasets.DatasetBuilder.download_and_prepare.output_dir",description:`<strong>output_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Output directory for the dataset. | |
| Default to this builder’s <code>cache_dir</code>, which is inside <code>~/.cache/huggingface/datasets</code> by default.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.5.0</p> | |
| </div>`,name:"output_dir"},{anchor:"datasets.DatasetBuilder.download_and_prepare.download_config",description:`<strong>download_config</strong> (<code>DownloadConfig</code>, <em>optional</em>) — | |
| Specific download configuration parameters.`,name:"download_config"},{anchor:"datasets.DatasetBuilder.download_and_prepare.download_mode",description:`<strong>download_mode</strong> (<a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.DownloadMode">DownloadMode</a> or <code>str</code>, <em>optional</em>) — | |
| Select the download/generate mode, default to <code>REUSE_DATASET_IF_EXISTS</code>.`,name:"download_mode"},{anchor:"datasets.DatasetBuilder.download_and_prepare.verification_mode",description:`<strong>verification_mode</strong> (<a href="/docs/datasets/pr_8113/en/package_reference/builder_classes#datasets.VerificationMode">VerificationMode</a> or <code>str</code>, defaults to <code>BASIC_CHECKS</code>) — | |
| Verification mode determining the checks to run on the downloaded/processed dataset information (checksums/size/splits/…).</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.9.1</p> | |
| </div>`,name:"verification_mode"},{anchor:"datasets.DatasetBuilder.download_and_prepare.dl_manager",description:`<strong>dl_manager</strong> (<code>DownloadManager</code>, <em>optional</em>) — | |
| Specific <code>DownloadManger</code> to use.`,name:"dl_manager"},{anchor:"datasets.DatasetBuilder.download_and_prepare.base_path",description:`<strong>base_path</strong> (<code>str</code>, <em>optional</em>) — | |
| Base path for relative paths that are used to download files. This can be a remote url. | |
| If not specified, the value of the <code>base_path</code> attribute (<code>self.base_path</code>) will be used instead.`,name:"base_path"},{anchor:"datasets.DatasetBuilder.download_and_prepare.file_format",description:`<strong>file_format</strong> (<code>str</code>, <em>optional</em>) — | |
| Format of the data files in which the dataset will be written. | |
| Supported formats: “arrow”, “parquet”. Default to “arrow” format. | |
| If the format is “parquet”, then image and audio data are embedded into the Parquet files instead of pointing to local files.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.5.0</p> | |
| </div>`,name:"file_format"},{anchor:"datasets.DatasetBuilder.download_and_prepare.max_shard_size",description:`<strong>max_shard_size</strong> (<code>Union[str, int]</code>, <em>optional</em>) — | |
| Maximum number of bytes written per shard, default is “500MB”. | |
| The size is based on uncompressed data size, so in practice your shard files may be smaller than | |
| <code>max_shard_size</code> thanks to Parquet compression for example.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.5.0</p> | |
| </div>`,name:"max_shard_size"},{anchor:"datasets.DatasetBuilder.download_and_prepare.num_proc",description:`<strong>num_proc</strong> (<code>int</code>, <em>optional</em>, defaults to <code>None</code>) — | |
| Number of processes when downloading and generating the dataset locally. | |
| Multiprocessing is disabled by default.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.7.0</p> | |
| </div>`,name:"num_proc"},{anchor:"datasets.DatasetBuilder.download_and_prepare.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>) — | |
| Key/value pairs to be passed on to the caching file-system backend, if any.</p> | |
| <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"> | |
| <p class="font-medium">Added in 2.5.0</p> | |
| </div>`,name:"storage_options"},{anchor:"datasets.DatasetBuilder.download_and_prepare.*download_and_prepare_kwargs",description:"*<strong>*download_and_prepare_kwargs</strong> (additional keyword arguments) — Keyword arguments.",name:"*download_and_prepare_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L683"}}),Re=new C({props:{anchor:"datasets.DatasetBuilder.download_and_prepare.example",$$slots:{default:[hr]},$$scope:{ctx:v}}}),je=new C({props:{anchor:"datasets.DatasetBuilder.download_and_prepare.example-2",$$slots:{default:[_r]},$$scope:{ctx:v}}}),Ne=new C({props:{anchor:"datasets.DatasetBuilder.download_and_prepare.example-3",$$slots:{default:[br]},$$scope:{ctx:v}}}),ot=new J({props:{name:"get_imported_module_dir",anchor:"datasets.DatasetBuilder.get_imported_module_dir",parameters:[],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L675"}}),rt=new J({props:{name:"class datasets.GeneratorBasedBuilder",anchor:"datasets.GeneratorBasedBuilder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L1329"}}),lt=new J({props:{name:"class datasets.ArrowBasedBuilder",anchor:"datasets.ArrowBasedBuilder",parameters:[{name:"cache_dir",val:": typing.Optional[str] = None"},{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"config_name",val:": typing.Optional[str] = None"},{name:"hash",val:": typing.Optional[str] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"info",val:": typing.Optional[datasets.info.DatasetInfo] = None"},{name:"features",val:": typing.Optional[datasets.features.features.Features] = None"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"repo_id",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[str, list, dict, datasets.data_files.DataFilesDict, NoneType] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"storage_options",val:": typing.Optional[dict] = None"},{name:"writer_batch_size",val:": typing.Optional[int] = None"},{name:"config_id",val:": typing.Optional[str] = None"},{name:"**config_kwargs",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L1647"}}),dt=new J({props:{name:"class datasets.BuilderConfig",anchor:"datasets.BuilderConfig",parameters:[{name:"name",val:": str = 'default'"},{name:"version",val:": typing.Union[datasets.utils.version.Version, str, NoneType] = 0.0.0"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"data_files",val:": typing.Union[datasets.data_files.DataFilesDict, datasets.data_files.DataFilesPatternsDict, NoneType] = None"},{name:"description",val:": typing.Optional[str] = None"}],parametersDescription:[{anchor:"datasets.BuilderConfig.name",description:`<strong>name</strong> (<code>str</code>, defaults to <code>default</code>) — | |
| The name of the configuration.`,name:"name"},{anchor:"datasets.BuilderConfig.version",description:`<strong>version</strong> (<code>Version</code> or <code>str</code>, defaults to <code>0.0.0</code>) — | |
| The version of the configuration.`,name:"version"},{anchor:"datasets.BuilderConfig.data_dir",description:`<strong>data_dir</strong> (<code>str</code>, <em>optional</em>) — | |
| Path to the directory containing the source data.`,name:"data_dir"},{anchor:"datasets.BuilderConfig.data_files",description:`<strong>data_files</strong> (<code>str</code> or <code>Sequence</code> or <code>Mapping</code>, <em>optional</em>) — | |
| Path(s) to source data file(s).`,name:"data_files"},{anchor:"datasets.BuilderConfig.description",description:`<strong>description</strong> (<code>str</code>, <em>optional</em>) — | |
| A human description of the configuration.`,name:"description"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L96"}}),it=new J({props:{name:"create_config_id",anchor:"datasets.BuilderConfig.create_config_id",parameters:[{name:"config_kwargs",val:": dict"},{name:"custom_features",val:": typing.Optional[datasets.features.features.Features] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/builder.py#L139"}}),ct=new Ya({props:{title:"Download",local:"datasets.DownloadManager",headingTag:"h2"}}),pt=new J({props:{name:"class datasets.DownloadManager",anchor:"datasets.DownloadManager",parameters:[{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"base_path",val:": typing.Optional[str] = None"},{name:"record_checksums",val:" = True"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L71"}}),mt=new J({props:{name:"download",anchor:"datasets.DownloadManager.download",parameters:[{name:"url_or_urls",val:""}],parametersDescription:[{anchor:"datasets.DownloadManager.download.url_or_urls",description:`<strong>url_or_urls</strong> (<code>str</code> or <code>list</code> or <code>dict</code>) — | |
| URL or <code>list</code> or <code>dict</code> of URLs to download. Each URL is a <code>str</code>.`,name:"url_or_urls"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L131",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The downloaded paths matching the given input <code>url_or_urls</code>.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>str</code> or <code>list</code> or <code>dict</code></p> | |
| `}}),Ze=new C({props:{anchor:"datasets.DownloadManager.download.example",$$slots:{default:[$r]},$$scope:{ctx:v}}}),gt=new J({props:{name:"download_and_extract",anchor:"datasets.DownloadManager.download_and_extract",parameters:[{name:"url_or_urls",val:""}],parametersDescription:[{anchor:"datasets.DownloadManager.download_and_extract.url_or_urls",description:`<strong>url_or_urls</strong> (<code>str</code> or <code>list</code> or <code>dict</code>) — | |
| URL or <code>list</code> or <code>dict</code> of URLs to download and extract. Each URL is a <code>str</code>.`,name:"url_or_urls"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L310",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>str</code>, extracted paths of given URL(s).</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>extracted_path(s)</p> | |
| `}}),Xe=new C({props:{anchor:"datasets.DownloadManager.download_and_extract.example",$$slots:{default:[xr]},$$scope:{ctx:v}}}),ut=new J({props:{name:"extract",anchor:"datasets.DownloadManager.extract",parameters:[{name:"path_or_paths",val:""}],parametersDescription:[{anchor:"datasets.DownloadManager.extract.path_or_paths",description:`<strong>path_or_paths</strong> (path or <code>list</code> or <code>dict</code>) — | |
| Path of file to extract. Each path is a <code>str</code>.`,name:"path_or_paths"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L278",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>str</code>, The extracted paths matching the given input | |
| path_or_paths.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>extracted_path(s)</p> | |
| `}}),Ie=new C({props:{anchor:"datasets.DownloadManager.extract.example",$$slots:{default:[yr]},$$scope:{ctx:v}}}),ft=new J({props:{name:"iter_archive",anchor:"datasets.DownloadManager.iter_archive",parameters:[{name:"path_or_buf",val:": typing.Union[str, _io.BufferedReader]"}],parametersDescription:[{anchor:"datasets.DownloadManager.iter_archive.path_or_buf",description:`<strong>path_or_buf</strong> (<code>str</code> or <code>io.BufferedReader</code>) — | |
| Archive path or archive binary file object.`,name:"path_or_buf"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L234",returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>tuple[str, io.BufferedReader]</code></p> | |
| `,isYield:!0}}),De=new C({props:{anchor:"datasets.DownloadManager.iter_archive.example",$$slots:{default:[wr]},$$scope:{ctx:v}}}),ht=new J({props:{name:"iter_files",anchor:"datasets.DownloadManager.iter_files",parameters:[{name:"paths",val:": typing.Union[str, list[str]]"}],parametersDescription:[{anchor:"datasets.DownloadManager.iter_files.paths",description:`<strong>paths</strong> (<code>str</code> or <code>list</code> of <code>str</code>) — | |
| Root paths.`,name:"paths"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L259",returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>str</code></p> | |
| `,isYield:!0}}),Be=new C({props:{anchor:"datasets.DownloadManager.iter_files.example",$$slots:{default:[vr]},$$scope:{ctx:v}}}),_t=new J({props:{name:"class datasets.StreamingDownloadManager",anchor:"datasets.StreamingDownloadManager",parameters:[{name:"dataset_name",val:": typing.Optional[str] = None"},{name:"data_dir",val:": typing.Optional[str] = None"},{name:"download_config",val:": typing.Optional[datasets.download.download_config.DownloadConfig] = None"},{name:"base_path",val:": typing.Optional[str] = None"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L47"}}),bt=new J({props:{name:"download",anchor:"datasets.StreamingDownloadManager.download",parameters:[{name:"url_or_urls",val:""}],parametersDescription:[{anchor:"datasets.StreamingDownloadManager.download.url_or_urls",description:`<strong>url_or_urls</strong> (<code>str</code> or <code>list</code> or <code>dict</code>) — | |
| URL(s) of files to stream data from. Each url is a <code>str</code>.`,name:"url_or_urls"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L75",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>(<code>str</code> or <code>list</code> or <code>dict</code>), URL(s) to stream data from matching the given input url_or_urls.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>url(s)</p> | |
| `}}),Fe=new C({props:{anchor:"datasets.StreamingDownloadManager.download.example",$$slots:{default:[Mr]},$$scope:{ctx:v}}}),$t=new J({props:{name:"download_and_extract",anchor:"datasets.StreamingDownloadManager.download_and_extract",parameters:[{name:"url_or_urls",val:""}],parametersDescription:[{anchor:"datasets.StreamingDownloadManager.download_and_extract.url_or_urls",description:`<strong>url_or_urls</strong> (<code>str</code> or <code>list</code> or <code>dict</code>) — | |
| URL(s) to stream from data from. Each url is a <code>str</code>.`,name:"url_or_urls"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L151",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>(<code>str</code> or <code>list</code> or <code>dict</code>), URL(s) to stream data from matching the given input <code>url_or_urls</code>.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>url(s)</p> | |
| `}}),Ve=new C({props:{anchor:"datasets.StreamingDownloadManager.download_and_extract.example",$$slots:{default:[Tr]},$$scope:{ctx:v}}}),xt=new J({props:{name:"extract",anchor:"datasets.StreamingDownloadManager.extract",parameters:[{name:"url_or_urls",val:""}],parametersDescription:[{anchor:"datasets.StreamingDownloadManager.extract.url_or_urls",description:`<strong>url_or_urls</strong> (<code>str</code> or <code>list</code> or <code>dict</code>) — | |
| URL(s) of files to stream data from. Each url is a <code>str</code>.`,name:"url_or_urls"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L102",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>(<code>str</code> or <code>list</code> or <code>dict</code>), URL(s) to stream data from matching the given input <code>url_or_urls</code>.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>url(s)</p> | |
| `}}),Se=new C({props:{anchor:"datasets.StreamingDownloadManager.extract.example",$$slots:{default:[Jr]},$$scope:{ctx:v}}}),yt=new J({props:{name:"iter_archive",anchor:"datasets.StreamingDownloadManager.iter_archive",parameters:[{name:"urlpath_or_buf",val:": typing.Union[str, _io.BufferedReader]"}],parametersDescription:[{anchor:"datasets.StreamingDownloadManager.iter_archive.urlpath_or_buf",description:`<strong>urlpath_or_buf</strong> (<code>str</code> or <code>io.BufferedReader</code>) — | |
| Archive path or archive binary file object.`,name:"urlpath_or_buf"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L171",returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>tuple[str, io.BufferedReader]</code></p> | |
| `,isYield:!0}}),ze=new C({props:{anchor:"datasets.StreamingDownloadManager.iter_archive.example",$$slots:{default:[Cr]},$$scope:{ctx:v}}}),wt=new J({props:{name:"iter_files",anchor:"datasets.StreamingDownloadManager.iter_files",parameters:[{name:"urlpaths",val:": typing.Union[str, list[str]]"}],parametersDescription:[{anchor:"datasets.StreamingDownloadManager.iter_files.urlpaths",description:`<strong>urlpaths</strong> (<code>str</code> or <code>list</code> of <code>str</code>) — | |
| Root paths.`,name:"urlpaths"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/streaming_download_manager.py#L196",returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>str</p> | |
| `,isYield:!0}}),Ee=new C({props:{anchor:"datasets.StreamingDownloadManager.iter_files.example",$$slots:{default:[kr]},$$scope:{ctx:v}}}),vt=new J({props:{name:"class datasets.DownloadConfig",anchor:"datasets.DownloadConfig",parameters:[{name:"cache_dir",val:": typing.Union[str, pathlib.Path, NoneType] = None"},{name:"force_download",val:": bool = False"},{name:"resume_download",val:": bool = False"},{name:"local_files_only",val:": bool = False"},{name:"proxies",val:": typing.Optional[dict] = None"},{name:"user_agent",val:": typing.Optional[str] = None"},{name:"extract_compressed_file",val:": bool = False"},{name:"force_extract",val:": bool = False"},{name:"delete_extracted",val:": bool = False"},{name:"extract_on_the_fly",val:": bool = False"},{name:"use_etag",val:": bool = True"},{name:"num_proc",val:": typing.Optional[int] = None"},{name:"max_retries",val:": int = 1"},{name:"token",val:": typing.Union[str, bool, NoneType] = None"},{name:"storage_options",val:": dict = <factory>"},{name:"download_desc",val:": typing.Optional[str] = None"},{name:"disable_tqdm",val:": bool = False"}],parametersDescription:[{anchor:"datasets.DownloadConfig.cache_dir",description:`<strong>cache_dir</strong> (<code>str</code> or <code>Path</code>, <em>optional</em>) — | |
| Specify a cache directory to save the file to (overwrite the | |
| default cache dir).`,name:"cache_dir"},{anchor:"datasets.DownloadConfig.force_download",description:`<strong>force_download</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code>, re-download the file even if it’s already cached in | |
| the cache dir.`,name:"force_download"},{anchor:"datasets.DownloadConfig.resume_download",description:`<strong>resume_download</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code>, resume the download if an incompletely received file is | |
| found.`,name:"resume_download"},{anchor:"datasets.DownloadConfig.proxies",description:"<strong>proxies</strong> (<code>dict</code>, <em>optional</em>) —",name:"proxies"},{anchor:"datasets.DownloadConfig.user_agent",description:`<strong>user_agent</strong> (<code>str</code>, <em>optional</em>) — | |
| Optional string or dict that will be appended to the user-agent on remote | |
| requests.`,name:"user_agent"},{anchor:"datasets.DownloadConfig.extract_compressed_file",description:`<strong>extract_compressed_file</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code> and the path point to a zip or tar file, | |
| extract the compressed file in a folder along the archive.`,name:"extract_compressed_file"},{anchor:"datasets.DownloadConfig.force_extract",description:`<strong>force_extract</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code> when <code>extract_compressed_file</code> is <code>True</code> and the archive | |
| was already extracted, re-extract the archive and override the folder where it was extracted.`,name:"force_extract"},{anchor:"datasets.DownloadConfig.delete_extracted",description:`<strong>delete_extracted</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| Whether to delete (or keep) the extracted files.`,name:"delete_extracted"},{anchor:"datasets.DownloadConfig.extract_on_the_fly",description:`<strong>extract_on_the_fly</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| If <code>True</code>, extract compressed files while they are being read.`,name:"extract_on_the_fly"},{anchor:"datasets.DownloadConfig.use_etag",description:`<strong>use_etag</strong> (<code>bool</code>, defaults to <code>True</code>) — | |
| Whether to use the ETag HTTP response header to validate the cached files.`,name:"use_etag"},{anchor:"datasets.DownloadConfig.num_proc",description:`<strong>num_proc</strong> (<code>int</code>, <em>optional</em>) — | |
| The number of processes to launch to download the files in parallel.`,name:"num_proc"},{anchor:"datasets.DownloadConfig.max_retries",description:`<strong>max_retries</strong> (<code>int</code>, default to <code>1</code>) — | |
| The number of times to retry an HTTP request if it fails.`,name:"max_retries"},{anchor:"datasets.DownloadConfig.token",description:`<strong>token</strong> (<code>str</code> or <code>bool</code>, <em>optional</em>) — | |
| Optional string or boolean to use as Bearer token | |
| for remote files on the Datasets Hub. If <code>True</code>, or not specified, will get token from <code>~/.huggingface</code>.`,name:"token"},{anchor:"datasets.DownloadConfig.storage_options",description:`<strong>storage_options</strong> (<code>dict</code>, <em>optional</em>) — | |
| Key/value pairs to be passed on to the dataset file-system backend, if any.`,name:"storage_options"},{anchor:"datasets.DownloadConfig.download_desc",description:`<strong>download_desc</strong> (<code>str</code>, <em>optional</em>) — | |
| A description to be displayed alongside with the progress bar while downloading the files.`,name:"download_desc"},{anchor:"datasets.DownloadConfig.disable_tqdm",description:`<strong>disable_tqdm</strong> (<code>bool</code>, defaults to <code>False</code>) — | |
| Whether to disable the individual files download progress bar`,name:"disable_tqdm"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_config.py#L10"}}),Mt=new J({props:{name:"class datasets.DownloadMode",anchor:"datasets.DownloadMode",parameters:[{name:"value",val:""},{name:"names",val:" = None"},{name:"module",val:" = None"},{name:"qualname",val:" = None"},{name:"type",val:" = None"},{name:"start",val:" = 1"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/download/download_manager.py#L50"}}),Tt=new Ya({props:{title:"Verification",local:"datasets.VerificationMode",headingTag:"h2"}}),Jt=new J({props:{name:"class datasets.VerificationMode",anchor:"datasets.VerificationMode",parameters:[{name:"value",val:""},{name:"names",val:" = None"},{name:"module",val:" = None"},{name:"qualname",val:" = None"},{name:"type",val:" = None"},{name:"start",val:" = 1"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/utils/info_utils.py#L22"}}),Ct=new Ya({props:{title:"Splits",local:"datasets.SplitGenerator",headingTag:"h2"}}),kt=new J({props:{name:"class datasets.SplitGenerator",anchor:"datasets.SplitGenerator",parameters:[{name:"name",val:": str"},{name:"gen_kwargs",val:": dict = <factory>"}],parametersDescription:[{anchor:"datasets.SplitGenerator.name",description:`<strong>name</strong> (<code>str</code>) — | |
| Name of the <code>Split</code> for which the generator will | |
| create the examples.`,name:"name"},{anchor:"datasets.SplitGenerator.*gen_kwargs",description:`*<strong>*gen_kwargs</strong> (additional keyword arguments) — | |
| Keyword arguments to forward to the <code>DatasetBuilder._generate_examples</code> method | |
| of the builder.`,name:"*gen_kwargs"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/splits.py#L604"}}),Qe=new C({props:{anchor:"datasets.SplitGenerator.example",$$slots:{default:[Ur]},$$scope:{ctx:v}}}),Ut=new J({props:{name:"class datasets.Split",anchor:"datasets.Split",parameters:[{name:"name",val:""}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/splits.py#L407"}}),Ye=new C({props:{anchor:"datasets.Split.example",$$slots:{default:[Rr]},$$scope:{ctx:v}}}),Rt=new J({props:{name:"class datasets.NamedSplit",anchor:"datasets.NamedSplit",parameters:[{name:"name",val:": str"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/splits.py#L315"}}),We=new C({props:{anchor:"datasets.NamedSplit.example",$$slots:{default:[jr]},$$scope:{ctx:v}}}),Le=new C({props:{anchor:"datasets.NamedSplit.example-2",$$slots:{default:[Nr]},$$scope:{ctx:v}}}),He=new C({props:{anchor:"datasets.NamedSplit.example-3",$$slots:{default:[Gr]},$$scope:{ctx:v}}}),Ae=new C({props:{anchor:"datasets.NamedSplit.example-4",$$slots:{default:[Zr]},$$scope:{ctx:v}}}),jt=new J({props:{name:"class datasets.NamedSplitAll",anchor:"datasets.NamedSplitAll",parameters:[],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/splits.py#L392"}}),Nt=new J({props:{name:"class datasets.ReadInstruction",anchor:"datasets.ReadInstruction",parameters:[{name:"split_name",val:""},{name:"rounding",val:" = None"},{name:"from_",val:" = None"},{name:"to",val:" = None"},{name:"unit",val:" = None"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/arrow_reader.py#L456"}}),Pe=new C({props:{anchor:"datasets.ReadInstruction.example",$$slots:{default:[Xr]},$$scope:{ctx:v}}}),Gt=new J({props:{name:"from_spec",anchor:"datasets.ReadInstruction.from_spec",parameters:[{name:"spec",val:""}],parametersDescription:[{anchor:"datasets.ReadInstruction.from_spec.spec",description:`<strong>spec</strong> (<code>str</code>) — | |
| Split(s) + optional slice(s) to read + optional rounding | |
| if percents are used as the slicing unit. A slice can be specified, | |
| using absolute numbers (<code>int</code>) or percentages (<code>int</code>).`,name:"spec"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/arrow_reader.py#L536",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>ReadInstruction instance.</p> | |
| `}}),qe=new C({props:{anchor:"datasets.ReadInstruction.from_spec.example",$$slots:{default:[Ir]},$$scope:{ctx:v}}}),Zt=new J({props:{name:"to_absolute",anchor:"datasets.ReadInstruction.to_absolute",parameters:[{name:"name2len",val:""}],parametersDescription:[{anchor:"datasets.ReadInstruction.to_absolute.name2len",description:`<strong>name2len</strong> (<code>dict</code>) — | |
| Associating split names to number of examples.`,name:"name2len"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/arrow_reader.py#L608",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>list of _AbsoluteInstruction instances (corresponds to the + in spec).</p> | |
| `}}),Xt=new Ya({props:{title:"Version",local:"datasets.Version",headingTag:"h2"}}),It=new J({props:{name:"class datasets.Version",anchor:"datasets.Version",parameters:[{name:"version_str",val:": str"},{name:"description",val:": typing.Optional[str] = None"},{name:"major",val:": typing.Union[str, int, NoneType] = None"},{name:"minor",val:": typing.Union[str, int, NoneType] = None"},{name:"patch",val:": typing.Union[str, int, NoneType] = None"}],parametersDescription:[{anchor:"datasets.Version.version_str",description:`<strong>version_str</strong> (<code>str</code>) — | |
| The dataset version.`,name:"version_str"},{anchor:"datasets.Version.description",description:`<strong>description</strong> (<code>str</code>) — | |
| A description of what is new in this version.`,name:"description"},{anchor:"datasets.Version.major",description:"<strong>major</strong> (<code>str</code>) —",name:"major"},{anchor:"datasets.Version.minor",description:"<strong>minor</strong> (<code>str</code>) —",name:"minor"},{anchor:"datasets.Version.patch",description:"<strong>patch</strong> (<code>str</code>) —",name:"patch"}],source:"https://github.com/huggingface/datasets/blob/r_8113/src/datasets/utils/version.py#L30"}}),Oe=new C({props:{anchor:"datasets.Version.example",$$slots:{default:[Dr]},$$scope:{ctx:v}}}),Dt=new ur({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/package_reference/builder_classes.mdx"}}),{c(){a=c("meta"),w=o(),i=c("p"),s=o(),f(d.$$.fragment),e=o(),f(g.$$.fragment),La=o(),f(et.$$.fragment),Ha=o(),tt=c("p"),tt.innerHTML=oo,Aa=o(),R=c("div"),f(at.$$.fragment),bs=o(),St=c("p"),St.textContent=ro,$s=o(),zt=c("p"),zt.innerHTML=lo,xs=o(),Et=c("ul"),Et.innerHTML=io,ys=o(),Qt=c("p"),Qt.innerHTML=co,ws=o(),re=c("div"),f(st.$$.fragment),vs=o(),Yt=c("p"),Yt.textContent=po,Ms=o(),f(Ue.$$.fragment),Ts=o(),D=c("div"),f(nt.$$.fragment),Js=o(),Wt=c("p"),Wt.textContent=mo,Cs=o(),Lt=c("p"),Lt.textContent=go,ks=o(),f(Re.$$.fragment),Us=o(),f(je.$$.fragment),Rs=o(),f(Ne.$$.fragment),js=o(),Ge=c("div"),f(ot.$$.fragment),Ns=o(),Ht=c("p"),Ht.textContent=uo,Pa=o(),K=c("div"),f(rt.$$.fragment),Gs=o(),At=c("p"),At.textContent=fo,Zs=o(),Pt=c("p"),Pt.innerHTML=ho,qa=o(),_e=c("div"),f(lt.$$.fragment),Xs=o(),qt=c("p"),qt.textContent=_o,Oa=o(),Y=c("div"),f(dt.$$.fragment),Is=o(),Ot=c("p"),Ot.innerHTML=bo,Ds=o(),Kt=c("p"),Kt.innerHTML=$o,Bs=o(),A=c("div"),f(it.$$.fragment),Fs=o(),ea=c("p"),ea.textContent=xo,Vs=o(),ta=c("ul"),ta.innerHTML=yo,Ss=o(),aa=c("p"),aa.textContent=wo,Ka=o(),f(ct.$$.fragment),es=o(),X=c("div"),f(pt.$$.fragment),zs=o(),P=c("div"),f(mt.$$.fragment),Es=o(),sa=c("p"),sa.textContent=vo,Qs=o(),na=c("p"),na.innerHTML=Mo,Ys=o(),f(Ze.$$.fragment),Ws=o(),le=c("div"),f(gt.$$.fragment),Ls=o(),oa=c("p"),oa.innerHTML=To,Hs=o(),f(Xe.$$.fragment),As=o(),de=c("div"),f(ut.$$.fragment),Ps=o(),ra=c("p"),ra.textContent=Jo,qs=o(),f(Ie.$$.fragment),Os=o(),ie=c("div"),f(ft.$$.fragment),Ks=o(),la=c("p"),la.textContent=Co,en=o(),f(De.$$.fragment),tn=o(),ce=c("div"),f(ht.$$.fragment),an=o(),da=c("p"),da.textContent=ko,sn=o(),f(Be.$$.fragment),ts=o(),N=c("div"),f(_t.$$.fragment),nn=o(),ia=c("p"),ia.innerHTML=Uo,on=o(),pe=c("div"),f(bt.$$.fragment),rn=o(),ca=c("p"),ca.innerHTML=Ro,ln=o(),f(Fe.$$.fragment),dn=o(),q=c("div"),f($t.$$.fragment),cn=o(),pa=c("p"),pa.innerHTML=jo,pn=o(),ma=c("p"),ma.innerHTML=No,mn=o(),f(Ve.$$.fragment),gn=o(),O=c("div"),f(xt.$$.fragment),un=o(),ga=c("p"),ga.textContent=Go,fn=o(),ua=c("p"),ua.innerHTML=Zo,hn=o(),f(Se.$$.fragment),_n=o(),me=c("div"),f(yt.$$.fragment),bn=o(),fa=c("p"),fa.textContent=Xo,$n=o(),f(ze.$$.fragment),xn=o(),ge=c("div"),f(wt.$$.fragment),yn=o(),ha=c("p"),ha.textContent=Io,wn=o(),f(Ee.$$.fragment),as=o(),be=c("div"),f(vt.$$.fragment),vn=o(),_a=c("p"),_a.textContent=Do,ss=o(),F=c("div"),f(Mt.$$.fragment),Mn=o(),ba=c("p"),ba.innerHTML=Bo,Tn=o(),$a=c("p"),$a.innerHTML=Fo,Jn=o(),xa=c("p"),xa.textContent=Vo,Cn=o(),ya=c("table"),ya.innerHTML=So,ns=o(),f(Tt.$$.fragment),os=o(),V=c("div"),f(Jt.$$.fragment),kn=o(),wa=c("p"),wa.innerHTML=zo,Un=o(),va=c("p"),va.innerHTML=Eo,Rn=o(),Ma=c("p"),Ma.textContent=Qo,jn=o(),Ta=c("table"),Ta.innerHTML=Yo,rs=o(),f(Ct.$$.fragment),ls=o(),W=c("div"),f(kt.$$.fragment),Nn=o(),Ja=c("p"),Ja.textContent=Wo,Gn=o(),Ca=c("p"),Ca.innerHTML=Lo,Zn=o(),f(Qe.$$.fragment),ds=o(),G=c("div"),f(Ut.$$.fragment),Xn=o(),ka=c("p"),ka.innerHTML=Ho,In=o(),Ua=c("p"),Ua.textContent=Ao,Dn=o(),Ra=c("ul"),Ra.innerHTML=Po,Bn=o(),ja=c("p"),ja.innerHTML=qo,Fn=o(),Na=c("p"),Na.innerHTML=Oo,Vn=o(),f(Ye.$$.fragment),is=o(),j=c("div"),f(Rt.$$.fragment),Sn=o(),Ga=c("p"),Ga.textContent=Ko,zn=o(),Za=c("p"),Za.textContent=er,En=o(),f(We.$$.fragment),Qn=o(),Xa=c("p"),Xa.textContent=tr,Yn=o(),f(Le.$$.fragment),Wn=o(),f(He.$$.fragment),Ln=o(),f(Ae.$$.fragment),cs=o(),$e=c("div"),f(jt.$$.fragment),Hn=o(),Ia=c("p"),Ia.textContent=ar,ps=o(),S=c("div"),f(Nt.$$.fragment),An=o(),Da=c("p"),Da.textContent=sr,Pn=o(),f(Pe.$$.fragment),qn=o(),ue=c("div"),f(Gt.$$.fragment),On=o(),Ba=c("p"),Ba.innerHTML=nr,Kn=o(),f(qe.$$.fragment),eo=o(),fe=c("div"),f(Zt.$$.fragment),to=o(),Fa=c("p"),Fa.textContent=or,ao=o(),Va=c("p"),Va.textContent=rr,ms=o(),f(Xt.$$.fragment),gs=o(),ee=c("div"),f(It.$$.fragment),so=o(),Sa=c("p"),Sa.innerHTML=lr,no=o(),f(Oe.$$.fragment),us=o(),f(Dt.$$.fragment),fs=o(),Wa=c("p"),this.h()},l(t){const m=mr("svelte-u9bgzb",document.head);a=p(m,"META",{name:!0,content:!0}),m.forEach(l),w=r(t),i=p(t,"P",{}),M(i).forEach(l),s=r(t),h(d.$$.fragment,t),e=r(t),h(g.$$.fragment,t),La=r(t),h(et.$$.fragment,t),Ha=r(t),tt=p(t,"P",{"data-svelte-h":!0}),y(tt)!=="svelte-1uyxb2k"&&(tt.innerHTML=oo),Aa=r(t),R=p(t,"DIV",{class:!0});var Z=M(R);h(at.$$.fragment,Z),bs=r(Z),St=p(Z,"P",{"data-svelte-h":!0}),y(St)!=="svelte-krqj9a"&&(St.textContent=ro),$s=r(Z),zt=p(Z,"P",{"data-svelte-h":!0}),y(zt)!=="svelte-apl31e"&&(zt.innerHTML=lo),xs=r(Z),Et=p(Z,"UL",{"data-svelte-h":!0}),y(Et)!=="svelte-141wb0u"&&(Et.innerHTML=io),ys=r(Z),Qt=p(Z,"P",{"data-svelte-h":!0}),y(Qt)!=="svelte-h46v2c"&&(Qt.innerHTML=co),ws=r(Z),re=p(Z,"DIV",{class:!0});var xe=M(re);h(st.$$.fragment,xe),vs=r(xe),Yt=p(xe,"P",{"data-svelte-h":!0}),y(Yt)!=="svelte-1q1652n"&&(Yt.textContent=po),Ms=r(xe),h(Ue.$$.fragment,xe),xe.forEach(l),Ts=r(Z),D=p(Z,"DIV",{class:!0});var z=M(D);h(nt.$$.fragment,z),Js=r(z),Wt=p(z,"P",{"data-svelte-h":!0}),y(Wt)!=="svelte-9mag6f"&&(Wt.textContent=mo),Cs=r(z),Lt=p(z,"P",{"data-svelte-h":!0}),y(Lt)!=="svelte-11lpom8"&&(Lt.textContent=go),ks=r(z),h(Re.$$.fragment,z),Us=r(z),h(je.$$.fragment,z),Rs=r(z),h(Ne.$$.fragment,z),z.forEach(l),js=r(Z),Ge=p(Z,"DIV",{class:!0});var Bt=M(Ge);h(ot.$$.fragment,Bt),Ns=r(Bt),Ht=p(Bt,"P",{"data-svelte-h":!0}),y(Ht)!=="svelte-1jq5ljq"&&(Ht.textContent=uo),Bt.forEach(l),Z.forEach(l),Pa=r(t),K=p(t,"DIV",{class:!0});var ye=M(K);h(rt.$$.fragment,ye),Gs=r(ye),At=p(ye,"P",{"data-svelte-h":!0}),y(At)!=="svelte-5e48ll"&&(At.textContent=fo),Zs=r(ye),Pt=p(ye,"P",{"data-svelte-h":!0}),y(Pt)!=="svelte-1b18j1y"&&(Pt.innerHTML=ho),ye.forEach(l),qa=r(t),_e=p(t,"DIV",{class:!0});var Ft=M(_e);h(lt.$$.fragment,Ft),Xs=r(Ft),qt=p(Ft,"P",{"data-svelte-h":!0}),y(qt)!=="svelte-17aolem"&&(qt.textContent=_o),Ft.forEach(l),Oa=r(t),Y=p(t,"DIV",{class:!0});var te=M(Y);h(dt.$$.fragment,te),Is=r(te),Ot=p(te,"P",{"data-svelte-h":!0}),y(Ot)!=="svelte-15gz2dy"&&(Ot.innerHTML=bo),Ds=r(te),Kt=p(te,"P",{"data-svelte-h":!0}),y(Kt)!=="svelte-1m2o9um"&&(Kt.innerHTML=$o),Bs=r(te),A=p(te,"DIV",{class:!0});var ae=M(A);h(it.$$.fragment,ae),Fs=r(ae),ea=p(ae,"P",{"data-svelte-h":!0}),y(ea)!=="svelte-1h03lp6"&&(ea.textContent=xo),Vs=r(ae),ta=p(ae,"UL",{"data-svelte-h":!0}),y(ta)!=="svelte-y69nf2"&&(ta.innerHTML=yo),Ss=r(ae),aa=p(ae,"P",{"data-svelte-h":!0}),y(aa)!=="svelte-d2y9u1"&&(aa.textContent=wo),ae.forEach(l),te.forEach(l),Ka=r(t),h(ct.$$.fragment,t),es=r(t),X=p(t,"DIV",{class:!0});var E=M(X);h(pt.$$.fragment,E),zs=r(E),P=p(E,"DIV",{class:!0});var se=M(P);h(mt.$$.fragment,se),Es=r(se),sa=p(se,"P",{"data-svelte-h":!0}),y(sa)!=="svelte-19i8z0e"&&(sa.textContent=vo),Qs=r(se),na=p(se,"P",{"data-svelte-h":!0}),y(na)!=="svelte-a30fyv"&&(na.innerHTML=Mo),Ys=r(se),h(Ze.$$.fragment,se),se.forEach(l),Ws=r(E),le=p(E,"DIV",{class:!0});var we=M(le);h(gt.$$.fragment,we),Ls=r(we),oa=p(we,"P",{"data-svelte-h":!0}),y(oa)!=="svelte-xkawo0"&&(oa.innerHTML=To),Hs=r(we),h(Xe.$$.fragment,we),we.forEach(l),As=r(E),de=p(E,"DIV",{class:!0});var ve=M(de);h(ut.$$.fragment,ve),Ps=r(ve),ra=p(ve,"P",{"data-svelte-h":!0}),y(ra)!=="svelte-1vlembv"&&(ra.textContent=Jo),qs=r(ve),h(Ie.$$.fragment,ve),ve.forEach(l),Os=r(E),ie=p(E,"DIV",{class:!0});var Me=M(ie);h(ft.$$.fragment,Me),Ks=r(Me),la=p(Me,"P",{"data-svelte-h":!0}),y(la)!=="svelte-1ephcm7"&&(la.textContent=Co),en=r(Me),h(De.$$.fragment,Me),Me.forEach(l),tn=r(E),ce=p(E,"DIV",{class:!0});var Te=M(ce);h(ht.$$.fragment,Te),an=r(Te),da=p(Te,"P",{"data-svelte-h":!0}),y(da)!=="svelte-1sadrg0"&&(da.textContent=ko),sn=r(Te),h(Be.$$.fragment,Te),Te.forEach(l),E.forEach(l),ts=r(t),N=p(t,"DIV",{class:!0});var I=M(N);h(_t.$$.fragment,I),nn=r(I),ia=p(I,"P",{"data-svelte-h":!0}),y(ia)!=="svelte-2x5qds"&&(ia.innerHTML=Uo),on=r(I),pe=p(I,"DIV",{class:!0});var Je=M(pe);h(bt.$$.fragment,Je),rn=r(Je),ca=p(Je,"P",{"data-svelte-h":!0}),y(ca)!=="svelte-z89fig"&&(ca.innerHTML=Ro),ln=r(Je),h(Fe.$$.fragment,Je),Je.forEach(l),dn=r(I),q=p(I,"DIV",{class:!0});var ne=M(q);h($t.$$.fragment,ne),cn=r(ne),pa=p(ne,"P",{"data-svelte-h":!0}),y(pa)!=="svelte-vb22br"&&(pa.innerHTML=jo),pn=r(ne),ma=p(ne,"P",{"data-svelte-h":!0}),y(ma)!=="svelte-1hkl2pt"&&(ma.innerHTML=No),mn=r(ne),h(Ve.$$.fragment,ne),ne.forEach(l),gn=r(I),O=p(I,"DIV",{class:!0});var oe=M(O);h(xt.$$.fragment,oe),un=r(oe),ga=p(oe,"P",{"data-svelte-h":!0}),y(ga)!=="svelte-sgtbuc"&&(ga.textContent=Go),fn=r(oe),ua=p(oe,"P",{"data-svelte-h":!0}),y(ua)!=="svelte-5qr3om"&&(ua.innerHTML=Zo),hn=r(oe),h(Se.$$.fragment,oe),oe.forEach(l),_n=r(I),me=p(I,"DIV",{class:!0});var Ce=M(me);h(yt.$$.fragment,Ce),bn=r(Ce),fa=p(Ce,"P",{"data-svelte-h":!0}),y(fa)!=="svelte-1ephcm7"&&(fa.textContent=Xo),$n=r(Ce),h(ze.$$.fragment,Ce),Ce.forEach(l),xn=r(I),ge=p(I,"DIV",{class:!0});var ke=M(ge);h(wt.$$.fragment,ke),yn=r(ke),ha=p(ke,"P",{"data-svelte-h":!0}),y(ha)!=="svelte-1kdi5en"&&(ha.textContent=Io),wn=r(ke),h(Ee.$$.fragment,ke),ke.forEach(l),I.forEach(l),as=r(t),be=p(t,"DIV",{class:!0});var Vt=M(be);h(vt.$$.fragment,Vt),vn=r(Vt),_a=p(Vt,"P",{"data-svelte-h":!0}),y(_a)!=="svelte-1equdtb"&&(_a.textContent=Do),Vt.forEach(l),ss=r(t),F=p(t,"DIV",{class:!0});var L=M(F);h(Mt.$$.fragment,L),Mn=r(L),ba=p(L,"P",{"data-svelte-h":!0}),y(ba)!=="svelte-nmj2uv"&&(ba.innerHTML=Bo),Tn=r(L),$a=p(L,"P",{"data-svelte-h":!0}),y($a)!=="svelte-xtdw9q"&&($a.innerHTML=Fo),Jn=r(L),xa=p(L,"P",{"data-svelte-h":!0}),y(xa)!=="svelte-a4zsdi"&&(xa.textContent=Vo),Cn=r(L),ya=p(L,"TABLE",{"data-svelte-h":!0}),y(ya)!=="svelte-pv0x4r"&&(ya.innerHTML=So),L.forEach(l),ns=r(t),h(Tt.$$.fragment,t),os=r(t),V=p(t,"DIV",{class:!0});var H=M(V);h(Jt.$$.fragment,H),kn=r(H),wa=p(H,"P",{"data-svelte-h":!0}),y(wa)!=="svelte-1eqexd"&&(wa.innerHTML=zo),Un=r(H),va=p(H,"P",{"data-svelte-h":!0}),y(va)!=="svelte-13iynk4"&&(va.innerHTML=Eo),Rn=r(H),Ma=p(H,"P",{"data-svelte-h":!0}),y(Ma)!=="svelte-1luabaw"&&(Ma.textContent=Qo),jn=r(H),Ta=p(H,"TABLE",{"data-svelte-h":!0}),y(Ta)!=="svelte-c0njzf"&&(Ta.innerHTML=Yo),H.forEach(l),rs=r(t),h(Ct.$$.fragment,t),ls=r(t),W=p(t,"DIV",{class:!0});var Ke=M(W);h(kt.$$.fragment,Ke),Nn=r(Ke),Ja=p(Ke,"P",{"data-svelte-h":!0}),y(Ja)!=="svelte-18qkgha"&&(Ja.textContent=Wo),Gn=r(Ke),Ca=p(Ke,"P",{"data-svelte-h":!0}),y(Ca)!=="svelte-fn7w0y"&&(Ca.innerHTML=Lo),Zn=r(Ke),h(Qe.$$.fragment,Ke),Ke.forEach(l),ds=r(t),G=p(t,"DIV",{class:!0});var Q=M(G);h(Ut.$$.fragment,Q),Xn=r(Q),ka=p(Q,"P",{"data-svelte-h":!0}),y(ka)!=="svelte-ewaslg"&&(ka.innerHTML=Ho),In=r(Q),Ua=p(Q,"P",{"data-svelte-h":!0}),y(Ua)!=="svelte-l59ai2"&&(Ua.textContent=Ao),Dn=r(Q),Ra=p(Q,"UL",{"data-svelte-h":!0}),y(Ra)!=="svelte-sbyekb"&&(Ra.innerHTML=Po),Bn=r(Q),ja=p(Q,"P",{"data-svelte-h":!0}),y(ja)!=="svelte-20xdby"&&(ja.innerHTML=qo),Fn=r(Q),Na=p(Q,"P",{"data-svelte-h":!0}),y(Na)!=="svelte-1cn2wk0"&&(Na.innerHTML=Oo),Vn=r(Q),h(Ye.$$.fragment,Q),Q.forEach(l),is=r(t),j=p(t,"DIV",{class:!0});var B=M(j);h(Rt.$$.fragment,B),Sn=r(B),Ga=p(B,"P",{"data-svelte-h":!0}),y(Ga)!=="svelte-124iux6"&&(Ga.textContent=Ko),zn=r(B),Za=p(B,"P",{"data-svelte-h":!0}),y(Za)!=="svelte-11lpom8"&&(Za.textContent=er),En=r(B),h(We.$$.fragment,B),Qn=r(B),Xa=p(B,"P",{"data-svelte-h":!0}),y(Xa)!=="svelte-5jjbkq"&&(Xa.textContent=tr),Yn=r(B),h(Le.$$.fragment,B),Wn=r(B),h(He.$$.fragment,B),Ln=r(B),h(Ae.$$.fragment,B),B.forEach(l),cs=r(t),$e=p(t,"DIV",{class:!0});var _s=M($e);h(jt.$$.fragment,_s),Hn=r(_s),Ia=p(_s,"P",{"data-svelte-h":!0}),y(Ia)!=="svelte-11ie63y"&&(Ia.textContent=ar),_s.forEach(l),ps=r(t),S=p(t,"DIV",{class:!0});var he=M(S);h(Nt.$$.fragment,he),An=r(he),Da=p(he,"P",{"data-svelte-h":!0}),y(Da)!=="svelte-3dhs2m"&&(Da.textContent=sr),Pn=r(he),h(Pe.$$.fragment,he),qn=r(he),ue=p(he,"DIV",{class:!0});var za=M(ue);h(Gt.$$.fragment,za),On=r(za),Ba=p(za,"P",{"data-svelte-h":!0}),y(Ba)!=="svelte-1g1y80g"&&(Ba.innerHTML=nr),Kn=r(za),h(qe.$$.fragment,za),za.forEach(l),eo=r(he),fe=p(he,"DIV",{class:!0});var Ea=M(fe);h(Zt.$$.fragment,Ea),to=r(Ea),Fa=p(Ea,"P",{"data-svelte-h":!0}),y(Fa)!=="svelte-14tg07e"&&(Fa.textContent=or),ao=r(Ea),Va=p(Ea,"P",{"data-svelte-h":!0}),y(Va)!=="svelte-l773xk"&&(Va.textContent=rr),Ea.forEach(l),he.forEach(l),ms=r(t),h(Xt.$$.fragment,t),gs=r(t),ee=p(t,"DIV",{class:!0});var Qa=M(ee);h(It.$$.fragment,Qa),so=r(Qa),Sa=p(Qa,"P",{"data-svelte-h":!0}),y(Sa)!=="svelte-14lq7j5"&&(Sa.innerHTML=lr),no=r(Qa),h(Oe.$$.fragment,Qa),Qa.forEach(l),us=r(t),h(Dt.$$.fragment,t),fs=r(t),Wa=p(t,"P",{}),M(Wa).forEach(l),this.h()},h(){T(a,"name","hf:doc:metadata"),T(a,"content",Fr),T(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(Ge,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(_e,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(le,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(de,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(ie,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(ce,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(pe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(me,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(ge,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(be,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T($e,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(ue,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(fe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),T(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(t,m){n(document.head,a),u(t,w,m),u(t,i,m),u(t,s,m),_(d,t,m),u(t,e,m),_(g,t,m),u(t,La,m),_(et,t,m),u(t,Ha,m),u(t,tt,m),u(t,Aa,m),u(t,R,m),_(at,R,null),n(R,bs),n(R,St),n(R,$s),n(R,zt),n(R,xs),n(R,Et),n(R,ys),n(R,Qt),n(R,ws),n(R,re),_(st,re,null),n(re,vs),n(re,Yt),n(re,Ms),_(Ue,re,null),n(R,Ts),n(R,D),_(nt,D,null),n(D,Js),n(D,Wt),n(D,Cs),n(D,Lt),n(D,ks),_(Re,D,null),n(D,Us),_(je,D,null),n(D,Rs),_(Ne,D,null),n(R,js),n(R,Ge),_(ot,Ge,null),n(Ge,Ns),n(Ge,Ht),u(t,Pa,m),u(t,K,m),_(rt,K,null),n(K,Gs),n(K,At),n(K,Zs),n(K,Pt),u(t,qa,m),u(t,_e,m),_(lt,_e,null),n(_e,Xs),n(_e,qt),u(t,Oa,m),u(t,Y,m),_(dt,Y,null),n(Y,Is),n(Y,Ot),n(Y,Ds),n(Y,Kt),n(Y,Bs),n(Y,A),_(it,A,null),n(A,Fs),n(A,ea),n(A,Vs),n(A,ta),n(A,Ss),n(A,aa),u(t,Ka,m),_(ct,t,m),u(t,es,m),u(t,X,m),_(pt,X,null),n(X,zs),n(X,P),_(mt,P,null),n(P,Es),n(P,sa),n(P,Qs),n(P,na),n(P,Ys),_(Ze,P,null),n(X,Ws),n(X,le),_(gt,le,null),n(le,Ls),n(le,oa),n(le,Hs),_(Xe,le,null),n(X,As),n(X,de),_(ut,de,null),n(de,Ps),n(de,ra),n(de,qs),_(Ie,de,null),n(X,Os),n(X,ie),_(ft,ie,null),n(ie,Ks),n(ie,la),n(ie,en),_(De,ie,null),n(X,tn),n(X,ce),_(ht,ce,null),n(ce,an),n(ce,da),n(ce,sn),_(Be,ce,null),u(t,ts,m),u(t,N,m),_(_t,N,null),n(N,nn),n(N,ia),n(N,on),n(N,pe),_(bt,pe,null),n(pe,rn),n(pe,ca),n(pe,ln),_(Fe,pe,null),n(N,dn),n(N,q),_($t,q,null),n(q,cn),n(q,pa),n(q,pn),n(q,ma),n(q,mn),_(Ve,q,null),n(N,gn),n(N,O),_(xt,O,null),n(O,un),n(O,ga),n(O,fn),n(O,ua),n(O,hn),_(Se,O,null),n(N,_n),n(N,me),_(yt,me,null),n(me,bn),n(me,fa),n(me,$n),_(ze,me,null),n(N,xn),n(N,ge),_(wt,ge,null),n(ge,yn),n(ge,ha),n(ge,wn),_(Ee,ge,null),u(t,as,m),u(t,be,m),_(vt,be,null),n(be,vn),n(be,_a),u(t,ss,m),u(t,F,m),_(Mt,F,null),n(F,Mn),n(F,ba),n(F,Tn),n(F,$a),n(F,Jn),n(F,xa),n(F,Cn),n(F,ya),u(t,ns,m),_(Tt,t,m),u(t,os,m),u(t,V,m),_(Jt,V,null),n(V,kn),n(V,wa),n(V,Un),n(V,va),n(V,Rn),n(V,Ma),n(V,jn),n(V,Ta),u(t,rs,m),_(Ct,t,m),u(t,ls,m),u(t,W,m),_(kt,W,null),n(W,Nn),n(W,Ja),n(W,Gn),n(W,Ca),n(W,Zn),_(Qe,W,null),u(t,ds,m),u(t,G,m),_(Ut,G,null),n(G,Xn),n(G,ka),n(G,In),n(G,Ua),n(G,Dn),n(G,Ra),n(G,Bn),n(G,ja),n(G,Fn),n(G,Na),n(G,Vn),_(Ye,G,null),u(t,is,m),u(t,j,m),_(Rt,j,null),n(j,Sn),n(j,Ga),n(j,zn),n(j,Za),n(j,En),_(We,j,null),n(j,Qn),n(j,Xa),n(j,Yn),_(Le,j,null),n(j,Wn),_(He,j,null),n(j,Ln),_(Ae,j,null),u(t,cs,m),u(t,$e,m),_(jt,$e,null),n($e,Hn),n($e,Ia),u(t,ps,m),u(t,S,m),_(Nt,S,null),n(S,An),n(S,Da),n(S,Pn),_(Pe,S,null),n(S,qn),n(S,ue),_(Gt,ue,null),n(ue,On),n(ue,Ba),n(ue,Kn),_(qe,ue,null),n(S,eo),n(S,fe),_(Zt,fe,null),n(fe,to),n(fe,Fa),n(fe,ao),n(fe,Va),u(t,ms,m),_(Xt,t,m),u(t,gs,m),u(t,ee,m),_(It,ee,null),n(ee,so),n(ee,Sa),n(ee,no),_(Oe,ee,null),u(t,us,m),_(Dt,t,m),u(t,fs,m),u(t,Wa,m),hs=!0},p(t,[m]){const Z={};m&2&&(Z.$$scope={dirty:m,ctx:t}),Ue.$set(Z);const xe={};m&2&&(xe.$$scope={dirty:m,ctx:t}),Re.$set(xe);const z={};m&2&&(z.$$scope={dirty:m,ctx:t}),je.$set(z);const Bt={};m&2&&(Bt.$$scope={dirty:m,ctx:t}),Ne.$set(Bt);const ye={};m&2&&(ye.$$scope={dirty:m,ctx:t}),Ze.$set(ye);const Ft={};m&2&&(Ft.$$scope={dirty:m,ctx:t}),Xe.$set(Ft);const te={};m&2&&(te.$$scope={dirty:m,ctx:t}),Ie.$set(te);const ae={};m&2&&(ae.$$scope={dirty:m,ctx:t}),De.$set(ae);const E={};m&2&&(E.$$scope={dirty:m,ctx:t}),Be.$set(E);const se={};m&2&&(se.$$scope={dirty:m,ctx:t}),Fe.$set(se);const we={};m&2&&(we.$$scope={dirty:m,ctx:t}),Ve.$set(we);const ve={};m&2&&(ve.$$scope={dirty:m,ctx:t}),Se.$set(ve);const Me={};m&2&&(Me.$$scope={dirty:m,ctx:t}),ze.$set(Me);const Te={};m&2&&(Te.$$scope={dirty:m,ctx:t}),Ee.$set(Te);const I={};m&2&&(I.$$scope={dirty:m,ctx:t}),Qe.$set(I);const Je={};m&2&&(Je.$$scope={dirty:m,ctx:t}),Ye.$set(Je);const ne={};m&2&&(ne.$$scope={dirty:m,ctx:t}),We.$set(ne);const oe={};m&2&&(oe.$$scope={dirty:m,ctx:t}),Le.$set(oe);const Ce={};m&2&&(Ce.$$scope={dirty:m,ctx:t}),He.$set(Ce);const ke={};m&2&&(ke.$$scope={dirty:m,ctx:t}),Ae.$set(ke);const Vt={};m&2&&(Vt.$$scope={dirty:m,ctx:t}),Pe.$set(Vt);const L={};m&2&&(L.$$scope={dirty:m,ctx:t}),qe.$set(L);const H={};m&2&&(H.$$scope={dirty:m,ctx:t}),Oe.$set(H)},i(t){hs||(b(d.$$.fragment,t),b(g.$$.fragment,t),b(et.$$.fragment,t),b(at.$$.fragment,t),b(st.$$.fragment,t),b(Ue.$$.fragment,t),b(nt.$$.fragment,t),b(Re.$$.fragment,t),b(je.$$.fragment,t),b(Ne.$$.fragment,t),b(ot.$$.fragment,t),b(rt.$$.fragment,t),b(lt.$$.fragment,t),b(dt.$$.fragment,t),b(it.$$.fragment,t),b(ct.$$.fragment,t),b(pt.$$.fragment,t),b(mt.$$.fragment,t),b(Ze.$$.fragment,t),b(gt.$$.fragment,t),b(Xe.$$.fragment,t),b(ut.$$.fragment,t),b(Ie.$$.fragment,t),b(ft.$$.fragment,t),b(De.$$.fragment,t),b(ht.$$.fragment,t),b(Be.$$.fragment,t),b(_t.$$.fragment,t),b(bt.$$.fragment,t),b(Fe.$$.fragment,t),b($t.$$.fragment,t),b(Ve.$$.fragment,t),b(xt.$$.fragment,t),b(Se.$$.fragment,t),b(yt.$$.fragment,t),b(ze.$$.fragment,t),b(wt.$$.fragment,t),b(Ee.$$.fragment,t),b(vt.$$.fragment,t),b(Mt.$$.fragment,t),b(Tt.$$.fragment,t),b(Jt.$$.fragment,t),b(Ct.$$.fragment,t),b(kt.$$.fragment,t),b(Qe.$$.fragment,t),b(Ut.$$.fragment,t),b(Ye.$$.fragment,t),b(Rt.$$.fragment,t),b(We.$$.fragment,t),b(Le.$$.fragment,t),b(He.$$.fragment,t),b(Ae.$$.fragment,t),b(jt.$$.fragment,t),b(Nt.$$.fragment,t),b(Pe.$$.fragment,t),b(Gt.$$.fragment,t),b(qe.$$.fragment,t),b(Zt.$$.fragment,t),b(Xt.$$.fragment,t),b(It.$$.fragment,t),b(Oe.$$.fragment,t),b(Dt.$$.fragment,t),hs=!0)},o(t){$(d.$$.fragment,t),$(g.$$.fragment,t),$(et.$$.fragment,t),$(at.$$.fragment,t),$(st.$$.fragment,t),$(Ue.$$.fragment,t),$(nt.$$.fragment,t),$(Re.$$.fragment,t),$(je.$$.fragment,t),$(Ne.$$.fragment,t),$(ot.$$.fragment,t),$(rt.$$.fragment,t),$(lt.$$.fragment,t),$(dt.$$.fragment,t),$(it.$$.fragment,t),$(ct.$$.fragment,t),$(pt.$$.fragment,t),$(mt.$$.fragment,t),$(Ze.$$.fragment,t),$(gt.$$.fragment,t),$(Xe.$$.fragment,t),$(ut.$$.fragment,t),$(Ie.$$.fragment,t),$(ft.$$.fragment,t),$(De.$$.fragment,t),$(ht.$$.fragment,t),$(Be.$$.fragment,t),$(_t.$$.fragment,t),$(bt.$$.fragment,t),$(Fe.$$.fragment,t),$($t.$$.fragment,t),$(Ve.$$.fragment,t),$(xt.$$.fragment,t),$(Se.$$.fragment,t),$(yt.$$.fragment,t),$(ze.$$.fragment,t),$(wt.$$.fragment,t),$(Ee.$$.fragment,t),$(vt.$$.fragment,t),$(Mt.$$.fragment,t),$(Tt.$$.fragment,t),$(Jt.$$.fragment,t),$(Ct.$$.fragment,t),$(kt.$$.fragment,t),$(Qe.$$.fragment,t),$(Ut.$$.fragment,t),$(Ye.$$.fragment,t),$(Rt.$$.fragment,t),$(We.$$.fragment,t),$(Le.$$.fragment,t),$(He.$$.fragment,t),$(Ae.$$.fragment,t),$(jt.$$.fragment,t),$(Nt.$$.fragment,t),$(Pe.$$.fragment,t),$(Gt.$$.fragment,t),$(qe.$$.fragment,t),$(Zt.$$.fragment,t),$(Xt.$$.fragment,t),$(It.$$.fragment,t),$(Oe.$$.fragment,t),$(Dt.$$.fragment,t),hs=!1},d(t){t&&(l(w),l(i),l(s),l(e),l(La),l(Ha),l(tt),l(Aa),l(R),l(Pa),l(K),l(qa),l(_e),l(Oa),l(Y),l(Ka),l(es),l(X),l(ts),l(N),l(as),l(be),l(ss),l(F),l(ns),l(os),l(V),l(rs),l(ls),l(W),l(ds),l(G),l(is),l(j),l(cs),l($e),l(ps),l(S),l(ms),l(gs),l(ee),l(us),l(fs),l(Wa)),l(a),x(d,t),x(g,t),x(et,t),x(at),x(st),x(Ue),x(nt),x(Re),x(je),x(Ne),x(ot),x(rt),x(lt),x(dt),x(it),x(ct,t),x(pt),x(mt),x(Ze),x(gt),x(Xe),x(ut),x(Ie),x(ft),x(De),x(ht),x(Be),x(_t),x(bt),x(Fe),x($t),x(Ve),x(xt),x(Se),x(yt),x(ze),x(wt),x(Ee),x(vt),x(Mt),x(Tt,t),x(Jt),x(Ct,t),x(kt),x(Qe),x(Ut),x(Ye),x(Rt),x(We),x(Le),x(He),x(Ae),x(jt),x(Nt),x(Pe),x(Gt),x(qe),x(Zt),x(Xt,t),x(It),x(Oe),x(Dt,t)}}}const Fr='{"title":"Builder classes","local":"builder-classes","sections":[{"title":"Builders","local":"datasets.DatasetBuilder","sections":[],"depth":2},{"title":"Download","local":"datasets.DownloadManager","sections":[],"depth":2},{"title":"Verification","local":"datasets.VerificationMode","sections":[],"depth":2},{"title":"Splits","local":"datasets.SplitGenerator","sections":[],"depth":2},{"title":"Version","local":"datasets.Version","sections":[],"depth":2}],"depth":1}';function Vr(v){return ir(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Lr extends cr{constructor(a){super(),pr(this,a,Vr,Br,dr,{})}}export{Lr as component}; | |
Xet Storage Details
- Size:
- 104 kB
- Xet hash:
- 4cdb5598947ba82e7033d59968c87cc2b242f1d3a1c13fe0db4cb321559f938e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.