Buckets:

rtrm's picture
download
raw
22.5 kB
import{s as rt,n as ct,o as mt}from"../chunks/scheduler.d75c11ed.js";import{S as ft,i as ht,e as p,s as l,c as d,h as yt,a as o,d as e,b as n,f as ut,g as u,j as i,k as es,l as Mt,m as a,n as r,t as c,o as m,p as f}from"../chunks/index.4ec9dfe9.js";import{C as Jt,H as Ds,E as Tt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ef04c636.js";import{C as h}from"../chunks/CodeBlock.2bffaee9.js";function wt(Ls){let y,as,ss,ls,w,ns,j,ps,Z,As="This guide will show you how to create a document dataset with <code>PdfFolder</code> and some metadata. This is a no-code solution for quickly creating a document dataset with several thousand PDFs.",os,M,Ss='<p>You can control access to your dataset by requiring users to share their contact information first. Check out the <a href="https://huggingface.co/docs/hub/datasets-gated" rel="nofollow">Gated datasets</a> guide for more information about how to enable this feature on the Hub.</p>',is,b,ds,I,Es="The <code>PdfFolder</code> is a dataset builder designed to quickly load a document dataset with several thousand PDFs without requiring you to write any code.",us,J,Ps='<p>💡 Take a look at the <a href="repository_structure#split-pattern-hierarchy">Split pattern hierarchy</a> to learn more about how <code>PdfFolder</code> creates dataset splits based on your dataset repository structure.</p>',rs,g,Ns="<code>PdfFolder</code> automatically infers the class labels of your dataset based on the directory name. Store your dataset in a directory structure like:",cs,$,ms,U,Qs='If the dataset follows the <code>PdfFolder</code> structure, then you can load it directly with <a href="/docs/datasets/pr_8121/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a>:',fs,G,hs,k,zs='This is equivalent to passing <code>pdffolder</code> manually in <a href="/docs/datasets/pr_8121/en/package_reference/loading_methods#datasets.load_dataset">load_dataset()</a> and the directory in <code>data_dir</code>:',ys,q,Ms,_,Os="You can also use <code>pdffolder</code> to load datasets involving multiple splits. To do so, your dataset directory should have the following structure:",Js,v,Ts,T,Ks="<p>If all PDF files are contained in a single directory or if they are not on the same level of directory structure, <code>label</code> column won’t be added automatically. If you need it, set <code>drop_labels=False</code> explicitly.</p>",ws,R,st="If there is additional information you’d like to include about your dataset, like text captions or bounding boxes, add it as a <code>metadata.csv</code> file in your folder. This lets you quickly create datasets for different computer vision tasks like text captioning or object detection. You can also use a JSONL file <code>metadata.jsonl</code> or a Parquet file <code>metadata.parquet</code>.",js,B,Zs,x,tt="Your <code>metadata.csv</code> file must have a <code>file_name</code> or <code>*_file_name</code> field which links PDF files with their metadata:",bs,Y,Is,X,et="or using <code>metadata.jsonl</code>:",gs,C,$s,W,at="Here the <code>file_name</code> must be the name of the PDF file next to the metadata file. More generally, it must be the relative path from the directory containing the metadata to the PDF file.",Us,H,lt="It’s possible to point to more than one PDF in each row in your dataset, for example if both your input and output are pdfs:",Gs,F,ks,V,nt="You can also define lists of PDFs. In that case you need to name the field <code>file_names</code> or <code>*_file_names</code>. Here is an example:",qs,D,_s,L,vs,A,pt="OCR datasets have the text contained in a PDF. An example <code>metadata.csv</code> may look like:",Rs,S,Bs,E,ot="Load the dataset with <code>PdfFolder</code>, and it will create a <code>text</code> column for the PDF captions:",xs,P,Ys,N,Xs,Q,it='Once you’ve created a dataset, you can share it to the using <code>huggingface_hub</code> for example. Make sure you have the <a href="https://huggingface.co/docs/huggingface_hub/index" rel="nofollow">huggingface_hub</a> library installed and you’re logged in to your Hugging Face account (see the <a href="upload_dataset#upload-with-python">Upload with Python tutorial</a> for more details).',Cs,z,dt="Upload your dataset with <code>huggingface_hub.HfApi.upload_folder</code>:",Ws,O,Hs,K,Fs,ts,Vs;return w=new Jt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),j=new Ds({props:{title:"Create a document dataset",local:"create-a-document-dataset",headingTag:"h1"}}),b=new Ds({props:{title:"PdfFolder",local:"pdffolder",headingTag:"h2"}}),$=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZyZXN1bWUlMkYwMDAxLnBkZiUwQWZvbGRlciUyRnRyYWluJTJGcmVzdW1lJTJGMDAwMi5wZGYlMEFmb2xkZXIlMkZ0cmFpbiUyRnJlc3VtZSUyRjAwMDMucGRmJTBBJTBBZm9sZGVyJTJGdHJhaW4lMkZpbnZvaWNlJTJGMDAwMS5wZGYlMEFmb2xkZXIlMkZ0cmFpbiUyRmludm9pY2UlMkYwMDAyLnBkZiUwQWZvbGRlciUyRnRyYWluJTJGaW52b2ljZSUyRjAwMDMucGRm",highlighted:`folder<span class="hljs-regexp">/train/</span>resume/<span class="hljs-number">0001</span>.pdf
folder<span class="hljs-regexp">/train/</span>resume/<span class="hljs-number">0002</span>.pdf
folder<span class="hljs-regexp">/train/</span>resume/<span class="hljs-number">0003</span>.pdf
folder<span class="hljs-regexp">/train/i</span>nvoice/<span class="hljs-number">0001</span>.pdf
folder<span class="hljs-regexp">/train/i</span>nvoice/<span class="hljs-number">0002</span>.pdf
folder<span class="hljs-regexp">/train/i</span>nvoice/<span class="hljs-number">0003</span>.pdf`,wrap:!1}}),G=new h({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJwYXRoJTJGdG8lMkZmb2xkZXIlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;path/to/folder&quot;</span>)`,wrap:!1}}),q=new h({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJwZGZmb2xkZXIlMjIlMkMlMjBkYXRhX2RpciUzRCUyMiUyRnBhdGglMkZ0byUyRmZvbGRlciUyMik=",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;pdffolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>)',wrap:!1}}),v=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZyZXN1bWUlMkYwMDAxLnBkZiUwQWZvbGRlciUyRnRyYWluJTJGcmVzdW1lJTJGMDAwMi5wZGYlMEFmb2xkZXIlMkZ0ZXN0JTJGaW52b2ljZSUyRjAwMDEucGRmJTBBZm9sZGVyJTJGdGVzdCUyRmludm9pY2UlMkYwMDAyLnBkZg==",highlighted:`folder<span class="hljs-regexp">/train/</span>resume/<span class="hljs-number">0001</span>.pdf
folder<span class="hljs-regexp">/train/</span>resume/<span class="hljs-number">0002</span>.pdf
folder<span class="hljs-regexp">/test/i</span>nvoice/<span class="hljs-number">0001</span>.pdf
folder<span class="hljs-regexp">/test/i</span>nvoice/<span class="hljs-number">0002</span>.pdf`,wrap:!1}}),B=new h({props:{code:"Zm9sZGVyJTJGdHJhaW4lMkZtZXRhZGF0YS5jc3YlMEFmb2xkZXIlMkZ0cmFpbiUyRjAwMDEucGRmJTBBZm9sZGVyJTJGdHJhaW4lMkYwMDAyLnBkZiUwQWZvbGRlciUyRnRyYWluJTJGMDAwMy5wZGY=",highlighted:`folder<span class="hljs-regexp">/train/m</span>etadata.csv
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0001</span>.pdf
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0002</span>.pdf
folder<span class="hljs-regexp">/train/</span><span class="hljs-number">0003</span>.pdf`,wrap:!1}}),Y=new h({props:{code:"ZmlsZV9uYW1lJTJDYWRkaXRpb25hbF9mZWF0dXJlJTBBMDAwMS5wZGYlMkNUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBwZGZzJTBBMDAwMi5wZGYlMkNUaGlzJTIwaXMlMjBhJTIwc2Vjb25kJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwcGRmcyUwQTAwMDMucGRmJTJDVGhpcyUyMGlzJTIwYSUyMHRoaXJkJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwcGRmcw==",highlighted:`file_name,additional_feature
<span class="hljs-number">0001.</span>pdf,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">first</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your pdfs
<span class="hljs-number">0002.</span>pdf,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">second</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your pdfs
<span class="hljs-number">0003.</span>pdf,This is <span class="hljs-keyword">a</span> <span class="hljs-keyword">third</span> <span class="hljs-built_in">value</span> <span class="hljs-keyword">of</span> <span class="hljs-keyword">a</span> <span class="hljs-keyword">text</span> feature you added <span class="hljs-built_in">to</span> your pdfs`,wrap:!1}}),C=new h({props:{code:"JTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5wZGYlMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwZmlyc3QlMjB2YWx1ZSUyMG9mJTIwYSUyMHRleHQlMjBmZWF0dXJlJTIweW91JTIwYWRkZWQlMjB0byUyMHlvdXIlMjBQREZzJTIyJTdEJTBBJTdCJTIyZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5wZGYlMjIlMkMlMjAlMjJhZGRpdGlvbmFsX2ZlYXR1cmUlMjIlM0ElMjAlMjJUaGlzJTIwaXMlMjBhJTIwc2Vjb25kJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwUERGcyUyMiU3RCUwQSU3QiUyMmZpbGVfbmFtZSUyMiUzQSUyMCUyMjAwMDMucGRmJTIyJTJDJTIwJTIyYWRkaXRpb25hbF9mZWF0dXJlJTIyJTNBJTIwJTIyVGhpcyUyMGlzJTIwYSUyMHRoaXJkJTIwdmFsdWUlMjBvZiUyMGElMjB0ZXh0JTIwZmVhdHVyZSUyMHlvdSUyMGFkZGVkJTIwdG8lMjB5b3VyJTIwUERGcyUyMiU3RA==",highlighted:`{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0001.pdf&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a first value of a text feature you added to your PDFs&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0002.pdf&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a second value of a text feature you added to your PDFs&quot;</span>}
{<span class="hljs-comment">&quot;file_name&quot;</span>: <span class="hljs-comment">&quot;0003.pdf&quot;</span>, <span class="hljs-comment">&quot;additional_feature&quot;</span>: <span class="hljs-comment">&quot;This is a third value of a text feature you added to your PDFs&quot;</span>}`,wrap:!1}}),F=new h({props:{code:"JTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMS5wZGYlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMV9vdXRwdXQucGRmJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMi5wZGYlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMl9vdXRwdXQucGRmJTIyJTdEJTBBJTdCJTIyaW5wdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwMy5wZGYlMjIlMkMlMjAlMjJvdXRwdXRfZmlsZV9uYW1lJTIyJTNBJTIwJTIyMDAwM19vdXRwdXQucGRmJTIyJTdE",highlighted:`<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001.pdf&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0001_output.pdf&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002.pdf&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0002_output.pdf&quot;</span><span class="hljs-punctuation">}</span>
<span class="hljs-punctuation">{</span><span class="hljs-attr">&quot;input_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003.pdf&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-attr">&quot;output_file_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;0003_output.pdf&quot;</span><span class="hljs-punctuation">}</span>`,wrap:!1}}),D=new h({props:{code:"JTdCJTIycGRmc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwMV9wYXJ0MS5wZGYlMjIlMkMlMjAlMjIwMDAxX3BhcnQyLnBkZiUyMiU1RCUyQyUyMCUyMmxhYmVsJTIyJTNBJTIwJTIydXJnZW50JTIyJTdEJTBBJTdCJTIycGRmc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwMl9wYXJ0MS5wZGYlMjIlMkMlMjAlMjIwMDAyX3BhcnQyLnBkZiUyMiU1RCUyQyUyMCUyMmxhYmVsJTIyJTNBJTIwJTIydXJnZW50JTIyJTdEJTBBJTdCJTIycGRmc19maWxlX25hbWVzJTIyJTNBJTIwJTVCJTIyMDAwM19wYXJ0MS5wZGYlMjIlMkMlMjAlMjIwMDAyX3BhcnQyLnBkZiUyMiU1RCUyQyUyMCUyMmxhYmVsJTIyJTNBJTIwJTIybm9ybWFsJTIyJTdE",highlighted:`{<span class="hljs-string">&quot;pdfs_file_names&quot;</span>: [<span class="hljs-string">&quot;0001_part1.pdf&quot;</span>, <span class="hljs-string">&quot;0001_part2.pdf&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;urgent&quot;</span>}
{<span class="hljs-string">&quot;pdfs_file_names&quot;</span>: [<span class="hljs-string">&quot;0002_part1.pdf&quot;</span>, <span class="hljs-string">&quot;0002_part2.pdf&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;urgent&quot;</span>}
{<span class="hljs-string">&quot;pdfs_file_names&quot;</span>: [<span class="hljs-string">&quot;0003_part1.pdf&quot;</span>, <span class="hljs-string">&quot;0002_part2.pdf&quot;</span>], <span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-string">&quot;normal&quot;</span>}`,wrap:!1}}),L=new Ds({props:{title:"OCR (Optical Character Recognition)",local:"ocr-optical-character-recognition",headingTag:"h3"}}),S=new h({props:{code:"ZmlsZV9uYW1lJTJDdGV4dCUwQTAwMDEucGRmJTJDSW52b2ljZSUyMDEyMzQlMjBmcm9tJTIwMDElMkYwMSUyRjE5NzAuLi4lMEEwMDAyLnBkZiUyQ1NvZnR3YXJlJTIwRW5naW5lZXIlMjBSZXN1bWUuJTIwRWR1Y2F0aW9uJTNBJTIwLi4uJTBBMDAwMy5wZGYlMkNBdHRlbnRpb24lMjBpcyUyMGFsbCUyMHlvdSUyMG5lZWQuJTIwQWJzdHJhY3QuJTIwVGhlJTIwLi4u",highlighted:`<span class="hljs-attribute">file_name</span>,text
<span class="hljs-attribute">0001</span>.pdf,Invoice <span class="hljs-number">1234</span> from <span class="hljs-number">01</span>/<span class="hljs-number">01</span>/<span class="hljs-number">1970</span>...
<span class="hljs-attribute">0002</span>.pdf,Software Engineer Resume. Education: ...
<span class="hljs-attribute">0003</span>.pdf,Attention is <span class="hljs-literal">all</span> you need. Abstract. The ...`,wrap:!1}}),P=new h({props:{code:"ZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJwZGZmb2xkZXIlMjIlMkMlMjBkYXRhX2RpciUzRCUyMiUyRnBhdGglMkZ0byUyRmZvbGRlciUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIpJTBBZGF0YXNldCU1QjAlNUQlNUIlMjJ0ZXh0JTIyJTVE",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;pdffolder&quot;</span>, data_dir=<span class="hljs-string">&quot;/path/to/folder&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;text&quot;</span>]
<span class="hljs-string">&quot;Invoice 1234 from 01/01/1970...&quot;</span>`,wrap:!1}}),N=new Ds({props:{title:"Upload dataset to the Hub",local:"upload-dataset-to-the-hub",headingTag:"h3"}}),O=new h({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEhmQXBpJTBBYXBpJTIwJTNEJTIwSGZBcGkoKSUwQSUwQWFwaS51cGxvYWRfZm9sZGVyKCUwQSUyMCUyMCUyMCUyMGZvbGRlcl9wYXRoJTNEJTIyJTJGcGF0aCUyRnRvJTJGbG9jYWwlMkZkYXRhc2V0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmVwb19pZCUzRCUyMnVzZXJuYW1lJTJGbXktY29vbC1kYXRhc2V0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmVwb190eXBlJTNEJTIyZGF0YXNldCUyMiUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> HfApi
api = HfApi()
api.upload_folder(
folder_path=<span class="hljs-string">&quot;/path/to/local/dataset&quot;</span>,
repo_id=<span class="hljs-string">&quot;username/my-cool-dataset&quot;</span>,
repo_type=<span class="hljs-string">&quot;dataset&quot;</span>,
)`,wrap:!1}}),K=new Tt({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/document_dataset.mdx"}}),{c(){y=p("meta"),as=l(),ss=p("p"),ls=l(),d(w.$$.fragment),ns=l(),d(j.$$.fragment),ps=l(),Z=p("p"),Z.innerHTML=As,os=l(),M=p("blockquote"),M.innerHTML=Ss,is=l(),d(b.$$.fragment),ds=l(),I=p("p"),I.innerHTML=Es,us=l(),J=p("blockquote"),J.innerHTML=Ps,rs=l(),g=p("p"),g.innerHTML=Ns,cs=l(),d($.$$.fragment),ms=l(),U=p("p"),U.innerHTML=Qs,fs=l(),d(G.$$.fragment),hs=l(),k=p("p"),k.innerHTML=zs,ys=l(),d(q.$$.fragment),Ms=l(),_=p("p"),_.innerHTML=Os,Js=l(),d(v.$$.fragment),Ts=l(),T=p("blockquote"),T.innerHTML=Ks,ws=l(),R=p("p"),R.innerHTML=st,js=l(),d(B.$$.fragment),Zs=l(),x=p("p"),x.innerHTML=tt,bs=l(),d(Y.$$.fragment),Is=l(),X=p("p"),X.innerHTML=et,gs=l(),d(C.$$.fragment),$s=l(),W=p("p"),W.innerHTML=at,Us=l(),H=p("p"),H.textContent=lt,Gs=l(),d(F.$$.fragment),ks=l(),V=p("p"),V.innerHTML=nt,qs=l(),d(D.$$.fragment),_s=l(),d(L.$$.fragment),vs=l(),A=p("p"),A.innerHTML=pt,Rs=l(),d(S.$$.fragment),Bs=l(),E=p("p"),E.innerHTML=ot,xs=l(),d(P.$$.fragment),Ys=l(),d(N.$$.fragment),Xs=l(),Q=p("p"),Q.innerHTML=it,Cs=l(),z=p("p"),z.innerHTML=dt,Ws=l(),d(O.$$.fragment),Hs=l(),d(K.$$.fragment),Fs=l(),ts=p("p"),this.h()},l(s){const t=yt("svelte-u9bgzb",document.head);y=o(t,"META",{name:!0,content:!0}),t.forEach(e),as=n(s),ss=o(s,"P",{}),ut(ss).forEach(e),ls=n(s),u(w.$$.fragment,s),ns=n(s),u(j.$$.fragment,s),ps=n(s),Z=o(s,"P",{"data-svelte-h":!0}),i(Z)!=="svelte-19nddia"&&(Z.innerHTML=As),os=n(s),M=o(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(M)!=="svelte-diqosn"&&(M.innerHTML=Ss),is=n(s),u(b.$$.fragment,s),ds=n(s),I=o(s,"P",{"data-svelte-h":!0}),i(I)!=="svelte-1f1cbsv"&&(I.innerHTML=Es),us=n(s),J=o(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(J)!=="svelte-1ew7ms2"&&(J.innerHTML=Ps),rs=n(s),g=o(s,"P",{"data-svelte-h":!0}),i(g)!=="svelte-1relwut"&&(g.innerHTML=Ns),cs=n(s),u($.$$.fragment,s),ms=n(s),U=o(s,"P",{"data-svelte-h":!0}),i(U)!=="svelte-hgxnrx"&&(U.innerHTML=Qs),fs=n(s),u(G.$$.fragment,s),hs=n(s),k=o(s,"P",{"data-svelte-h":!0}),i(k)!=="svelte-9qg94t"&&(k.innerHTML=zs),ys=n(s),u(q.$$.fragment,s),Ms=n(s),_=o(s,"P",{"data-svelte-h":!0}),i(_)!=="svelte-1d19187"&&(_.innerHTML=Os),Js=n(s),u(v.$$.fragment,s),Ts=n(s),T=o(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(T)!=="svelte-1xmcaan"&&(T.innerHTML=Ks),ws=n(s),R=o(s,"P",{"data-svelte-h":!0}),i(R)!=="svelte-69vkpo"&&(R.innerHTML=st),js=n(s),u(B.$$.fragment,s),Zs=n(s),x=o(s,"P",{"data-svelte-h":!0}),i(x)!=="svelte-ac9qof"&&(x.innerHTML=tt),bs=n(s),u(Y.$$.fragment,s),Is=n(s),X=o(s,"P",{"data-svelte-h":!0}),i(X)!=="svelte-16ywdcf"&&(X.innerHTML=et),gs=n(s),u(C.$$.fragment,s),$s=n(s),W=o(s,"P",{"data-svelte-h":!0}),i(W)!=="svelte-8800e3"&&(W.innerHTML=at),Us=n(s),H=o(s,"P",{"data-svelte-h":!0}),i(H)!=="svelte-bvympb"&&(H.textContent=lt),Gs=n(s),u(F.$$.fragment,s),ks=n(s),V=o(s,"P",{"data-svelte-h":!0}),i(V)!=="svelte-1xrviqt"&&(V.innerHTML=nt),qs=n(s),u(D.$$.fragment,s),_s=n(s),u(L.$$.fragment,s),vs=n(s),A=o(s,"P",{"data-svelte-h":!0}),i(A)!=="svelte-6yorao"&&(A.innerHTML=pt),Rs=n(s),u(S.$$.fragment,s),Bs=n(s),E=o(s,"P",{"data-svelte-h":!0}),i(E)!=="svelte-ff2l4e"&&(E.innerHTML=ot),xs=n(s),u(P.$$.fragment,s),Ys=n(s),u(N.$$.fragment,s),Xs=n(s),Q=o(s,"P",{"data-svelte-h":!0}),i(Q)!=="svelte-lvrx1l"&&(Q.innerHTML=it),Cs=n(s),z=o(s,"P",{"data-svelte-h":!0}),i(z)!=="svelte-1y2guln"&&(z.innerHTML=dt),Ws=n(s),u(O.$$.fragment,s),Hs=n(s),u(K.$$.fragment,s),Fs=n(s),ts=o(s,"P",{}),ut(ts).forEach(e),this.h()},h(){es(y,"name","hf:doc:metadata"),es(y,"content",jt),es(M,"class","tip"),es(J,"class","tip"),es(T,"class","warning")},m(s,t){Mt(document.head,y),a(s,as,t),a(s,ss,t),a(s,ls,t),r(w,s,t),a(s,ns,t),r(j,s,t),a(s,ps,t),a(s,Z,t),a(s,os,t),a(s,M,t),a(s,is,t),r(b,s,t),a(s,ds,t),a(s,I,t),a(s,us,t),a(s,J,t),a(s,rs,t),a(s,g,t),a(s,cs,t),r($,s,t),a(s,ms,t),a(s,U,t),a(s,fs,t),r(G,s,t),a(s,hs,t),a(s,k,t),a(s,ys,t),r(q,s,t),a(s,Ms,t),a(s,_,t),a(s,Js,t),r(v,s,t),a(s,Ts,t),a(s,T,t),a(s,ws,t),a(s,R,t),a(s,js,t),r(B,s,t),a(s,Zs,t),a(s,x,t),a(s,bs,t),r(Y,s,t),a(s,Is,t),a(s,X,t),a(s,gs,t),r(C,s,t),a(s,$s,t),a(s,W,t),a(s,Us,t),a(s,H,t),a(s,Gs,t),r(F,s,t),a(s,ks,t),a(s,V,t),a(s,qs,t),r(D,s,t),a(s,_s,t),r(L,s,t),a(s,vs,t),a(s,A,t),a(s,Rs,t),r(S,s,t),a(s,Bs,t),a(s,E,t),a(s,xs,t),r(P,s,t),a(s,Ys,t),r(N,s,t),a(s,Xs,t),a(s,Q,t),a(s,Cs,t),a(s,z,t),a(s,Ws,t),r(O,s,t),a(s,Hs,t),r(K,s,t),a(s,Fs,t),a(s,ts,t),Vs=!0},p:ct,i(s){Vs||(c(w.$$.fragment,s),c(j.$$.fragment,s),c(b.$$.fragment,s),c($.$$.fragment,s),c(G.$$.fragment,s),c(q.$$.fragment,s),c(v.$$.fragment,s),c(B.$$.fragment,s),c(Y.$$.fragment,s),c(C.$$.fragment,s),c(F.$$.fragment,s),c(D.$$.fragment,s),c(L.$$.fragment,s),c(S.$$.fragment,s),c(P.$$.fragment,s),c(N.$$.fragment,s),c(O.$$.fragment,s),c(K.$$.fragment,s),Vs=!0)},o(s){m(w.$$.fragment,s),m(j.$$.fragment,s),m(b.$$.fragment,s),m($.$$.fragment,s),m(G.$$.fragment,s),m(q.$$.fragment,s),m(v.$$.fragment,s),m(B.$$.fragment,s),m(Y.$$.fragment,s),m(C.$$.fragment,s),m(F.$$.fragment,s),m(D.$$.fragment,s),m(L.$$.fragment,s),m(S.$$.fragment,s),m(P.$$.fragment,s),m(N.$$.fragment,s),m(O.$$.fragment,s),m(K.$$.fragment,s),Vs=!1},d(s){s&&(e(as),e(ss),e(ls),e(ns),e(ps),e(Z),e(os),e(M),e(is),e(ds),e(I),e(us),e(J),e(rs),e(g),e(cs),e(ms),e(U),e(fs),e(hs),e(k),e(ys),e(Ms),e(_),e(Js),e(Ts),e(T),e(ws),e(R),e(js),e(Zs),e(x),e(bs),e(Is),e(X),e(gs),e($s),e(W),e(Us),e(H),e(Gs),e(ks),e(V),e(qs),e(_s),e(vs),e(A),e(Rs),e(Bs),e(E),e(xs),e(Ys),e(Xs),e(Q),e(Cs),e(z),e(Ws),e(Hs),e(Fs),e(ts)),e(y),f(w,s),f(j,s),f(b,s),f($,s),f(G,s),f(q,s),f(v,s),f(B,s),f(Y,s),f(C,s),f(F,s),f(D,s),f(L,s),f(S,s),f(P,s),f(N,s),f(O,s),f(K,s)}}}const jt='{"title":"Create a document dataset","local":"create-a-document-dataset","sections":[{"title":"PdfFolder","local":"pdffolder","sections":[{"title":"OCR (Optical Character Recognition)","local":"ocr-optical-character-recognition","sections":[],"depth":3},{"title":"Upload dataset to the Hub","local":"upload-dataset-to-the-hub","sections":[],"depth":3}],"depth":2}],"depth":1}';function Zt(Ls){return mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ut extends ft{constructor(y){super(),ht(this,y,Zt,wt,rt,{})}}export{Ut as component};

Xet Storage Details

Size:
22.5 kB
·
Xet hash:
6d2c45cc57f360d85fd963cdb7c70f45fc42cf2e896a88e5c0958cdb125b43f8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.