Buckets:

hf-doc-build/doc-dev / hub /main /en /datasets-viewer.html
rtrm's picture
download
raw
25.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Dataset viewer&quot;,&quot;local&quot;:&quot;dataset-viewer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Inspect data distributions&quot;,&quot;local&quot;:&quot;inspect-data-distributions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Filter by value&quot;,&quot;local&quot;:&quot;filter-by-value&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Search a word in the dataset&quot;,&quot;local&quot;:&quot;search-a-word-in-the-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Share a specific row&quot;,&quot;local&quot;:&quot;share-a-specific-row&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large scale datasets&quot;,&quot;local&quot;:&quot;large-scale-datasets&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Access the parquet files&quot;,&quot;local&quot;:&quot;access-the-parquet-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Conversion bot&quot;,&quot;local&quot;:&quot;conversion-bot&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Programmatic access&quot;,&quot;local&quot;:&quot;programmatic-access&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Dataset preview&quot;,&quot;local&quot;:&quot;dataset-preview&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Embed the Dataset Viewer in a webpage&quot;,&quot;local&quot;:&quot;embed-the-dataset-viewer-in-a-webpage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configure the Dataset Viewer&quot;,&quot;local&quot;:&quot;configure-the-dataset-viewer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/hub/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/entry/start.d0cd5065.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/scheduler.d6170356.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/singletons.d032f1eb.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/paths.752f1c6b.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/entry/app.b6abe3c1.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/index.fcd4cc08.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/nodes/0.f045427f.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/nodes/42.a4bdfe22.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/Tip.b09c67cf.js">
<link rel="modulepreload" href="/docs/hub/main/en/_app/immutable/chunks/EditOnGithub.da2b595c.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Dataset viewer&quot;,&quot;local&quot;:&quot;dataset-viewer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Inspect data distributions&quot;,&quot;local&quot;:&quot;inspect-data-distributions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Filter by value&quot;,&quot;local&quot;:&quot;filter-by-value&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Search a word in the dataset&quot;,&quot;local&quot;:&quot;search-a-word-in-the-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Share a specific row&quot;,&quot;local&quot;:&quot;share-a-specific-row&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Large scale datasets&quot;,&quot;local&quot;:&quot;large-scale-datasets&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Access the parquet files&quot;,&quot;local&quot;:&quot;access-the-parquet-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Conversion bot&quot;,&quot;local&quot;:&quot;conversion-bot&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Programmatic access&quot;,&quot;local&quot;:&quot;programmatic-access&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Dataset preview&quot;,&quot;local&quot;:&quot;dataset-preview&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Embed the Dataset Viewer in a webpage&quot;,&quot;local&quot;:&quot;embed-the-dataset-viewer-in-a-webpage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Configure the Dataset Viewer&quot;,&quot;local&quot;:&quot;configure-the-dataset-viewer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="dataset-viewer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-viewer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset viewer</span></h1> <p data-svelte-h="svelte-hp9ih6">The dataset page includes a table with the contents of the dataset, arranged by pages of 100 rows. You can navigate between pages using the buttons at the bottom of the table.</p> <div class="flex justify-center" data-svelte-h="svelte-1ml06pn"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/dataset-viewer.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/dataset-viewer-dark.png"></div> <h2 class="relative group"><a id="inspect-data-distributions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inspect-data-distributions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inspect data distributions</span></h2> <p data-svelte-h="svelte-15na6b0">At the top of the columns you can see the graphs representing the distribution of their data. This gives you a quick insight on how balanced your classes are, what are the range and distribution of numerical data and lengths of texts, and what portion of the column data is missing.</p> <h2 class="relative group"><a id="filter-by-value" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#filter-by-value"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Filter by value</span></h2> <p data-svelte-h="svelte-1b9l4a1">If you click on a bar of a histogram from a numerical column, the dataset viewer will filter the data and show only the rows with values that fall in the selected range.
Similarly, if you select one class from a categorical column, it will show only the rows from the selected category.</p> <h2 class="relative group"><a id="search-a-word-in-the-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#search-a-word-in-the-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Search a word in the dataset</span></h2> <p data-svelte-h="svelte-1o314dq">You can search for a word in the dataset by typing it in the search bar at the top of the table. The search is case-insensitive and will match any row containing the word. The text is searched in the columns of <code>string</code>, even if the values are nested in a dictionary or a list.</p> <h2 class="relative group"><a id="share-a-specific-row" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#share-a-specific-row"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Share a specific row</span></h2> <p data-svelte-h="svelte-q0hltc">You can share a specific row by clicking on it, and then copying the URL in the address bar of your browser. For example <a href="https://huggingface.co/datasets/nyu-mll/glue/viewer/mrpc/test?p=2&row=241" rel="nofollow">https://huggingface.co/datasets/nyu-mll/glue/viewer/mrpc/test?p=2&amp;row=241</a> will open the dataset viewer on the MRPC dataset, on the test split, and on the 241st row.</p> <h2 class="relative group"><a id="large-scale-datasets" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#large-scale-datasets"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Large scale datasets</span></h2> <p data-svelte-h="svelte-1o4zkf5">The Dataset Viewer supports large scale datasets, but depending on the data format it may only show the first 5GB of the dataset:</p> <ul data-svelte-h="svelte-179pmm9"><li>For Parquet datasets: the Dataset Viewer shows the full dataset, but filtering and search are only enabled on the first 5GB.</li> <li>For datasets &gt;5GB in other formats (e.g. <a href="https://github.com/webdataset/webdataset" rel="nofollow">WebDataset</a> or JSON Lines): the Dataset Viewer only shows the first 5GB, and filtering and search are enabled on these first 5GB.</li></ul> <p data-svelte-h="svelte-1b89tmk">In this case, an informational message lets you know that the Viewer is partial. This should be a large enough sample to represent the full dataset accurately, let us know if you need a bigger sample.</p> <h2 class="relative group"><a id="access-the-parquet-files" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#access-the-parquet-files"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Access the parquet files</span></h2> <p data-svelte-h="svelte-4gncyv">To power the dataset viewer, the first 5GB of every dataset are auto-converted to the Parquet format (unless it was already a Parquet dataset). In the dataset viewer (for example, see <a href="https://huggingface.co/datasets/nyu-mll/glue" rel="nofollow">GLUE</a>), you can click on <a href="https://huggingface.co/datasets/nyu-mll/glue/tree/refs%2Fconvert%2Fparquet/cola" rel="nofollow"><em>“Auto-converted to Parquet”</em></a> to access the Parquet files. Please, refer to the <a href="/docs/datasets-server/parquet_process">dataset viewer docs</a> to learn how to query the dataset parquet files with libraries such as Polars, Pandas or DuckDB.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-f0ovyw">Parquet is a columnar storage format optimized for querying and processing large datasets. Parquet is a popular choice for big data processing and analytics and is widely used for data processing and machine learning. You can learn more about the advantages associated with this format in the <a href="https://huggingface.co/docs/datasets-server/parquet">documentation</a>.</p></div> <h3 class="relative group"><a id="conversion-bot" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conversion-bot"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conversion bot</span></h3> <p data-svelte-h="svelte-1aqnzxc">When you create a new dataset, the <a href="https://huggingface.co/parquet-converter" rel="nofollow"><code>parquet-converter</code> bot</a> notifies you once it converts the dataset to Parquet. The <a href="./repositories-pull-requests-discussions">discussion</a> it opens in the repository provides details about the Parquet format and links to the Parquet files.</p> <div class="flex justify-center" data-svelte-h="svelte-p1ei2c"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parquet-converter-profile-light.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parquet-converter-profile-dark.png"></div> <h3 class="relative group"><a id="programmatic-access" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#programmatic-access"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Programmatic access</span></h3> <p data-svelte-h="svelte-1tp9sa">You can also access the list of Parquet files programmatically using the <a href="./api#get-apidatasetsrepoidparquet">Hub API</a>; for example, endpoint <a href="https://huggingface.co/api/datasets/nyu-mll/glue/parquet" rel="nofollow"><code>https://huggingface.co/api/datasets/nyu-mll/glue/parquet</code></a> lists the parquet files of the <code>nyu-mll/glue</code> dataset.</p> <h2 class="relative group"><a id="dataset-preview" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-preview"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset preview</span></h2> <p data-svelte-h="svelte-147t9qz">For the biggest datasets, the page shows a preview of the first 100 rows instead of a full-featured viewer. This restriction only applies for datasets over 5GB that are not natively in Parquet format or that have not been auto-converted to Parquet.</p> <div class="flex justify-center" data-svelte-h="svelte-1o5nkpr"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/dataset-preview.png"> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/dataset-preview-dark.png"></div> <h2 class="relative group"><a id="embed-the-dataset-viewer-in-a-webpage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#embed-the-dataset-viewer-in-a-webpage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Embed the Dataset Viewer in a webpage</span></h2> <p data-svelte-h="svelte-a1bgxj">You can embed the Dataset Viewer in your own webpage using an iframe. The URL to use is <code>https://huggingface.co/datasets/&lt;namespace&gt;/&lt;dataset-name&gt;/embed/viewer</code>, where <code>&lt;namespace&gt;</code> is the owner of the dataset and <code>&lt;dataset-name&gt;</code> is the name of the dataset. You can also pass other parameters like the subset, split, filter, search or selected row.</p> <p data-svelte-h="svelte-mz2ymo">For more information see our guide on <a href="./datasets-viewer-embed">How to embed the Dataset Viewer in a webpage</a>.</p> <h2 class="relative group"><a id="configure-the-dataset-viewer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configure-the-dataset-viewer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configure the Dataset Viewer</span></h2> <p data-svelte-h="svelte-kgq8x5">To have a properly working Dataset Viewer for your dataset, make sure your dataset is in a supported format and structure.
There is also an option to configure your dataset using YAML.</p> <p data-svelte-h="svelte-1l3qyl4">For <strong>private</strong> datasets, the Dataset Viewer is enabled for <a href="https://huggingface.co/pricing" rel="nofollow">PRO users</a> and <a href="https://huggingface.co/enterprise" rel="nofollow">Enterprise Hub organizations</a>.</p> <p data-svelte-h="svelte-1upjktn">For more information see our guide on <a href="./datasets-viewer-configure">How to configure the Dataset Viewer</a>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hub-docs/blob/main/docs/hub/datasets-viewer.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1vatp3t = {
assets: "/docs/hub/main/en",
base: "/docs/hub/main/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/hub/main/en/_app/immutable/entry/start.d0cd5065.js"),
import("/docs/hub/main/en/_app/immutable/entry/app.b6abe3c1.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 42],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
25.9 kB
·
Xet hash:
d02a45e5ed90b15975e12ebed25b7885ef1f2737f4d4e1c908268342aae042bf

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.