Buckets:

rtrm's picture
download
raw
16.8 kB
import{s as bt,n as wt,o as vt}from"../chunks/scheduler.bdbef820.js";import{S as _t,i as Tt,g as l,s as i,r as u,A as Mt,h as o,f as s,c as n,j as $t,u as p,x as r,k as yt,y as kt,a,v as c,d as h,t as m,w as f}from"../chunks/index.c0aea24a.js";import{C as We}from"../chunks/CodeBlock.6ccca92e.js";import{H as d,E as Ht}from"../chunks/EditOnGithub.725ee0c1.js";function xt(ze){let g,ne,ae,le,$,oe,y,Ne=`This guide aims to provide you the tools and knowledge required to navigate some common issues. If the suggestions listed
in this guide do not cover your such situation, please refer to the <a href="#asking-for-help">Asking for Help</a> section to learn where to
find help with your specific issue.`,re,b,ue,w,pe,v,Ve=`If you are experiencing authentication issues when sharing a dataset on 🤗 Hub using <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> and a Hugging Face
access token:`,ce,_,Oe="<li>Make sure that the Hugging Face token you’re using to authenticate yourself is a token with <strong>write</strong> permission.</li> <li>On OSX, it may help to clean up all the huggingface.co passwords on your keychain access, as well as reconfigure <code>git config --global credential.helper osxkeychain</code>, before using <code>huggingface-cli login</code>.</li>",he,T,Ke='Alternatively, you can use SSH keys to authenticate yourself - read more in the <a href="https://huggingface.co/docs/hub/security-git-ssh" rel="nofollow">🤗 Hub documentation</a>.',me,M,fe,k,et=`When uploading large datasets to Hub, if the number of dataset shards is large, it can create too many commits for the Hub in a
short period. This will result in a connection error.
The connection error can also be caused by a HTTP 500 error returned by AWS S3 bucket that Hub uses internally.
In either situation, you can re-run <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> to proceed with the dataset upload. Hub will check the SHAs
of already uploaded shards to avoid reuploading them.
We are working on making upload process more robust to transient errors, so updating to the latest library version is
always a good idea.`,de,H,ge,x,tt="Uploading large datasets via <code>push_to_hub()</code> can result in an error:",$e,C,ye,U,st="If you encounter this issue, you need to upgrade the <code>datasets</code> library to the latest version (or at least <code>2.15.0</code>).",be,L,we,I,ve,j,at=`When creating a dataset from a folder, one of the most common issues is that the file structure does not follow the
expected format, or there’s an issue with the metadata file.`,_e,P,it="Learn more about required folder structure in corresponding documentation pages:",Te,J,nt='<li><a href="https://huggingface.co/docs/datasets/audio_dataset#audiofolder" rel="nofollow">AudioFolder</a></li> <li><a href="https://huggingface.co/docs/datasets/image_dataset#imagefolder" rel="nofollow">ImageFolder</a></li>',Me,D,ke,G,He,S,lt=`When creating a dataset, <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.IterableDataset.from_generator">IterableDataset.from_generator()</a> and <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.from_generator">Dataset.from_generator()</a> expect a “picklable” generator function.
This is required to hash the function using <a href="https://docs.python.org/3/library/pickle.html" rel="nofollow"><code>pickle</code></a> to be able to cache the dataset on disk.`,xe,E,ot=`While generator functions are generally “picklable”, note that generator objects are not. So if you’re using a generator object,
you will encounter a <code>TypeError</code> like this:`,Ce,Z,Ue,B,rt=`This error can also occur when using a generator function that uses a global object that is not “picklable”, such as a
DB connection, for example. If that’s the case, you can initialize such object directly inside the generator function to
avoid this error.`,Le,F,Ie,q,ut=`Pickling errors can also happen in the multiprocess <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.map">Dataset.map()</a> - objects are pickled to be passed to child processes.
If the objects used in the transformation are not picklable, it’s not possible to cache the result of <code>map</code>, which leads to an error being raised.`,je,R,pt="Here are some ways to address this issue:",Pe,X,ct='<li>A universal solution to pickle issues is to make sure the objects (or generator classes) are pickable manually by implementing <code>__getstate__</code> / <code>__setstate__</code> / <code>__reduce__</code>.</li> <li>You can also provide your own unique hash in <code>map</code> with the <code>new_fingerprint</code> argument.</li> <li>You can also disable caching by calling <code>datasets.disable_caching()</code>, however, this is undesirable - <a href="cache">read more about importance of cache</a></li>',Je,Y,De,A,ht="If the above troubleshooting advice did not help you resolve your issue, reach out for help to the community and the team.",Ge,Q,Se,W,mt=`Ask for help on the Hugging Face forums - post your question in the <a href="https://discuss.huggingface.co/c/datasets/10" rel="nofollow">🤗Datasets category</a>
Make sure to write a descriptive post with relevant context about your setup and reproducible code to maximize the likelihood that your problem is solved!`,Ee,z,Ze,N,ft='Post a question on <a href="http://hf.co/join/discord" rel="nofollow">Discord</a>, and let the team and the community help you.',Be,V,Fe,O,dt=`If you are facing issues creating a custom dataset with a script on Hub, you can ask the Hugging Face team for help by opening
a discussion in the Community tab of your dataset with this message:`,qe,K,Re,ee,Xe,te,gt=`Finally, if you suspect to have found a bug related to the library itself, create an Issue on the 🤗 Datasets
<a href="https://github.com/huggingface/datasets/issues" rel="nofollow">GitHub repository</a>. Include context regarding the bug: code snippet to reproduce,
details about your environment and data, etc. to help us figure out what’s wrong and how we can fix it.`,Ye,se,Ae,ie,Qe;return $=new d({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h1"}}),b=new d({props:{title:"Issues when uploading datasets with push_to_hub",local:"issues-when-uploading-datasets-with-pushtohub",headingTag:"h2"}}),w=new d({props:{title:"Authentication issues",local:"authentication-issues",headingTag:"h3"}}),M=new d({props:{title:"Lost connection on large dataset upload",local:"lost-connection-on-large-dataset-upload",headingTag:"h3"}}),H=new d({props:{title:"Too Many Requests",local:"too-many-requests",headingTag:"h3"}}),C=new We({props:{code:"SGZIdWJIVFRQRXJyb3IlM0ElMjA0MjklMjBDbGllbnQlMjBFcnJvciUzQSUyMFRvbyUyME1hbnklMjBSZXF1ZXN0cyUyMGZvciUyMHVybCUzQSUyMC4uLiUwQVlvdSUyMGhhdmUlMjBleGNlZWRlZCUyMG91ciUyMGhvdXJseSUyMHF1b3RhcyUyMGZvciUyMGFjdGlvbiUzQSUyMGNvbW1pdC4lMjBXZSUyMGludml0ZSUyMHlvdSUyMHRvJTIwcmV0cnklMjBsYXRlci4=",highlighted:`HfHubHTTPError: 429 Client Error: Too Many Requests <span class="hljs-keyword">for</span> url: ...
You have exceeded our hourly quotas <span class="hljs-keyword">for</span> action: commit. We invite you to retry later.`,wrap:!1}}),L=new d({props:{title:"Issues when creating datasets from custom data",local:"issues-when-creating-datasets-from-custom-data",headingTag:"h2"}}),I=new d({props:{title:"Loading images and audio from a folder",local:"loading-images-and-audio-from-a-folder",headingTag:"h3"}}),D=new d({props:{title:"Pickling issues",local:"pickling-issues",headingTag:"h3"}}),G=new d({props:{title:"Pickling issues when using Dataset.from_generator",local:"pickling-issues-when-using-datasetfromgenerator",headingTag:"h4"}}),Z=new We({props:{code:"VHlwZUVycm9yJTNBJTIwY2Fubm90JTIwcGlja2xlJTIwJ2dlbmVyYXRvciclMjBvYmplY3Q=",highlighted:'TypeError: cannot pickle <span class="hljs-string">&#x27;generator&#x27;</span> object',wrap:!1}}),F=new d({props:{title:"Pickling issues with Dataset.map",local:"pickling-issues-with-datasetmap",headingTag:"h4"}}),Y=new d({props:{title:"Asking for help",local:"asking-for-help",headingTag:"h2"}}),Q=new d({props:{title:"Forums",local:"forums",headingTag:"h3"}}),z=new d({props:{title:"Discord",local:"discord",headingTag:"h3"}}),V=new d({props:{title:"Community Discussions on 🤗 Hub",local:"community-discussions-on--hub",headingTag:"h3"}}),K=new We({props:{code:"JTIzJTIwRGF0YXNldCUyMHJld2lldyUyMHJlcXVlc3QlMjBmb3IlMjAlM0NEYXRhc2V0JTIwbmFtZSUzRSUwQSUwQSUyMyUyMyUyMERlc2NyaXB0aW9uJTBBJTBBJTNDYnJpZWYlMjBkZXNjcmlwdGlvbiUyMG9mJTIwdGhlJTIwZGF0YXNldCUzRSUwQSUwQSUyMyUyMyUyMEZpbGVzJTIwdG8lMjByZXZpZXclMEElMEEtJTIwZmlsZTElMEEtJTIwZmlsZTIlMEEtJTIwLi4uJTBBJTBBY2MlMjAlNDBsaG9lc3RxJTIwJTQwcG9saW5hZXRlcm5hJTIwJTQwbWFyaW9zYXNrbyUyMCU0MGFsYmVydHZpbGxhbm92YQ==",highlighted:`# Dataset rewiew request for &lt;Dataset name&gt;
## Description
&lt;brief description of the dataset&gt;
## Files to review
- file1
- file2
- ...
cc @lhoestq @polinaeterna @mariosasko @albertvillanova`,wrap:!1}}),ee=new d({props:{title:"GitHub Issues",local:"github-issues",headingTag:"h3"}}),se=new Ht({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/troubleshoot.mdx"}}),{c(){g=l("meta"),ne=i(),ae=l("p"),le=i(),u($.$$.fragment),oe=i(),y=l("p"),y.innerHTML=Ne,re=i(),u(b.$$.fragment),ue=i(),u(w.$$.fragment),pe=i(),v=l("p"),v.innerHTML=Ve,ce=i(),_=l("ul"),_.innerHTML=Oe,he=i(),T=l("p"),T.innerHTML=Ke,me=i(),u(M.$$.fragment),fe=i(),k=l("p"),k.innerHTML=et,de=i(),u(H.$$.fragment),ge=i(),x=l("p"),x.innerHTML=tt,$e=i(),u(C.$$.fragment),ye=i(),U=l("p"),U.innerHTML=st,be=i(),u(L.$$.fragment),we=i(),u(I.$$.fragment),ve=i(),j=l("p"),j.textContent=at,_e=i(),P=l("p"),P.textContent=it,Te=i(),J=l("ul"),J.innerHTML=nt,Me=i(),u(D.$$.fragment),ke=i(),u(G.$$.fragment),He=i(),S=l("p"),S.innerHTML=lt,xe=i(),E=l("p"),E.innerHTML=ot,Ce=i(),u(Z.$$.fragment),Ue=i(),B=l("p"),B.textContent=rt,Le=i(),u(F.$$.fragment),Ie=i(),q=l("p"),q.innerHTML=ut,je=i(),R=l("p"),R.textContent=pt,Pe=i(),X=l("ul"),X.innerHTML=ct,Je=i(),u(Y.$$.fragment),De=i(),A=l("p"),A.textContent=ht,Ge=i(),u(Q.$$.fragment),Se=i(),W=l("p"),W.innerHTML=mt,Ee=i(),u(z.$$.fragment),Ze=i(),N=l("p"),N.innerHTML=ft,Be=i(),u(V.$$.fragment),Fe=i(),O=l("p"),O.textContent=dt,qe=i(),u(K.$$.fragment),Re=i(),u(ee.$$.fragment),Xe=i(),te=l("p"),te.innerHTML=gt,Ye=i(),u(se.$$.fragment),Ae=i(),ie=l("p"),this.h()},l(e){const t=Mt("svelte-u9bgzb",document.head);g=o(t,"META",{name:!0,content:!0}),t.forEach(s),ne=n(e),ae=o(e,"P",{}),$t(ae).forEach(s),le=n(e),p($.$$.fragment,e),oe=n(e),y=o(e,"P",{"data-svelte-h":!0}),r(y)!=="svelte-wtzssa"&&(y.innerHTML=Ne),re=n(e),p(b.$$.fragment,e),ue=n(e),p(w.$$.fragment,e),pe=n(e),v=o(e,"P",{"data-svelte-h":!0}),r(v)!=="svelte-1ljw89s"&&(v.innerHTML=Ve),ce=n(e),_=o(e,"UL",{"data-svelte-h":!0}),r(_)!=="svelte-1rjun7q"&&(_.innerHTML=Oe),he=n(e),T=o(e,"P",{"data-svelte-h":!0}),r(T)!=="svelte-lnijwm"&&(T.innerHTML=Ke),me=n(e),p(M.$$.fragment,e),fe=n(e),k=o(e,"P",{"data-svelte-h":!0}),r(k)!=="svelte-1s0y4qr"&&(k.innerHTML=et),de=n(e),p(H.$$.fragment,e),ge=n(e),x=o(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-18w8g2a"&&(x.innerHTML=tt),$e=n(e),p(C.$$.fragment,e),ye=n(e),U=o(e,"P",{"data-svelte-h":!0}),r(U)!=="svelte-okquay"&&(U.innerHTML=st),be=n(e),p(L.$$.fragment,e),we=n(e),p(I.$$.fragment,e),ve=n(e),j=o(e,"P",{"data-svelte-h":!0}),r(j)!=="svelte-1nemcci"&&(j.textContent=at),_e=n(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-1bjwb5o"&&(P.textContent=it),Te=n(e),J=o(e,"UL",{"data-svelte-h":!0}),r(J)!=="svelte-1upvcmt"&&(J.innerHTML=nt),Me=n(e),p(D.$$.fragment,e),ke=n(e),p(G.$$.fragment,e),He=n(e),S=o(e,"P",{"data-svelte-h":!0}),r(S)!=="svelte-vnn5lh"&&(S.innerHTML=lt),xe=n(e),E=o(e,"P",{"data-svelte-h":!0}),r(E)!=="svelte-1drlc3v"&&(E.innerHTML=ot),Ce=n(e),p(Z.$$.fragment,e),Ue=n(e),B=o(e,"P",{"data-svelte-h":!0}),r(B)!=="svelte-18tjciu"&&(B.textContent=rt),Le=n(e),p(F.$$.fragment,e),Ie=n(e),q=o(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-ap4xs5"&&(q.innerHTML=ut),je=n(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-18m3iu6"&&(R.textContent=pt),Pe=n(e),X=o(e,"UL",{"data-svelte-h":!0}),r(X)!=="svelte-1d6loxh"&&(X.innerHTML=ct),Je=n(e),p(Y.$$.fragment,e),De=n(e),A=o(e,"P",{"data-svelte-h":!0}),r(A)!=="svelte-kruaya"&&(A.textContent=ht),Ge=n(e),p(Q.$$.fragment,e),Se=n(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-vitubn"&&(W.innerHTML=mt),Ee=n(e),p(z.$$.fragment,e),Ze=n(e),N=o(e,"P",{"data-svelte-h":!0}),r(N)!=="svelte-qmbfjm"&&(N.innerHTML=ft),Be=n(e),p(V.$$.fragment,e),Fe=n(e),O=o(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-1d58zg"&&(O.textContent=dt),qe=n(e),p(K.$$.fragment,e),Re=n(e),p(ee.$$.fragment,e),Xe=n(e),te=o(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-2r68yz"&&(te.innerHTML=gt),Ye=n(e),p(se.$$.fragment,e),Ae=n(e),ie=o(e,"P",{}),$t(ie).forEach(s),this.h()},h(){yt(g,"name","hf:doc:metadata"),yt(g,"content",Ct)},m(e,t){kt(document.head,g),a(e,ne,t),a(e,ae,t),a(e,le,t),c($,e,t),a(e,oe,t),a(e,y,t),a(e,re,t),c(b,e,t),a(e,ue,t),c(w,e,t),a(e,pe,t),a(e,v,t),a(e,ce,t),a(e,_,t),a(e,he,t),a(e,T,t),a(e,me,t),c(M,e,t),a(e,fe,t),a(e,k,t),a(e,de,t),c(H,e,t),a(e,ge,t),a(e,x,t),a(e,$e,t),c(C,e,t),a(e,ye,t),a(e,U,t),a(e,be,t),c(L,e,t),a(e,we,t),c(I,e,t),a(e,ve,t),a(e,j,t),a(e,_e,t),a(e,P,t),a(e,Te,t),a(e,J,t),a(e,Me,t),c(D,e,t),a(e,ke,t),c(G,e,t),a(e,He,t),a(e,S,t),a(e,xe,t),a(e,E,t),a(e,Ce,t),c(Z,e,t),a(e,Ue,t),a(e,B,t),a(e,Le,t),c(F,e,t),a(e,Ie,t),a(e,q,t),a(e,je,t),a(e,R,t),a(e,Pe,t),a(e,X,t),a(e,Je,t),c(Y,e,t),a(e,De,t),a(e,A,t),a(e,Ge,t),c(Q,e,t),a(e,Se,t),a(e,W,t),a(e,Ee,t),c(z,e,t),a(e,Ze,t),a(e,N,t),a(e,Be,t),c(V,e,t),a(e,Fe,t),a(e,O,t),a(e,qe,t),c(K,e,t),a(e,Re,t),c(ee,e,t),a(e,Xe,t),a(e,te,t),a(e,Ye,t),c(se,e,t),a(e,Ae,t),a(e,ie,t),Qe=!0},p:wt,i(e){Qe||(h($.$$.fragment,e),h(b.$$.fragment,e),h(w.$$.fragment,e),h(M.$$.fragment,e),h(H.$$.fragment,e),h(C.$$.fragment,e),h(L.$$.fragment,e),h(I.$$.fragment,e),h(D.$$.fragment,e),h(G.$$.fragment,e),h(Z.$$.fragment,e),h(F.$$.fragment,e),h(Y.$$.fragment,e),h(Q.$$.fragment,e),h(z.$$.fragment,e),h(V.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(se.$$.fragment,e),Qe=!0)},o(e){m($.$$.fragment,e),m(b.$$.fragment,e),m(w.$$.fragment,e),m(M.$$.fragment,e),m(H.$$.fragment,e),m(C.$$.fragment,e),m(L.$$.fragment,e),m(I.$$.fragment,e),m(D.$$.fragment,e),m(G.$$.fragment,e),m(Z.$$.fragment,e),m(F.$$.fragment,e),m(Y.$$.fragment,e),m(Q.$$.fragment,e),m(z.$$.fragment,e),m(V.$$.fragment,e),m(K.$$.fragment,e),m(ee.$$.fragment,e),m(se.$$.fragment,e),Qe=!1},d(e){e&&(s(ne),s(ae),s(le),s(oe),s(y),s(re),s(ue),s(pe),s(v),s(ce),s(_),s(he),s(T),s(me),s(fe),s(k),s(de),s(ge),s(x),s($e),s(ye),s(U),s(be),s(we),s(ve),s(j),s(_e),s(P),s(Te),s(J),s(Me),s(ke),s(He),s(S),s(xe),s(E),s(Ce),s(Ue),s(B),s(Le),s(Ie),s(q),s(je),s(R),s(Pe),s(X),s(Je),s(De),s(A),s(Ge),s(Se),s(W),s(Ee),s(Ze),s(N),s(Be),s(Fe),s(O),s(qe),s(Re),s(Xe),s(te),s(Ye),s(Ae),s(ie)),s(g),f($,e),f(b,e),f(w,e),f(M,e),f(H,e),f(C,e),f(L,e),f(I,e),f(D,e),f(G,e),f(Z,e),f(F,e),f(Y,e),f(Q,e),f(z,e),f(V,e),f(K,e),f(ee,e),f(se,e)}}}const Ct='{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Issues when uploading datasets with push_to_hub","local":"issues-when-uploading-datasets-with-pushtohub","sections":[{"title":"Authentication issues","local":"authentication-issues","sections":[],"depth":3},{"title":"Lost connection on large dataset upload","local":"lost-connection-on-large-dataset-upload","sections":[],"depth":3},{"title":"Too Many Requests","local":"too-many-requests","sections":[],"depth":3}],"depth":2},{"title":"Issues when creating datasets from custom data","local":"issues-when-creating-datasets-from-custom-data","sections":[{"title":"Loading images and audio from a folder","local":"loading-images-and-audio-from-a-folder","sections":[],"depth":3},{"title":"Pickling issues","local":"pickling-issues","sections":[{"title":"Pickling issues when using Dataset.from_generator","local":"pickling-issues-when-using-datasetfromgenerator","sections":[],"depth":4},{"title":"Pickling issues with Dataset.map","local":"pickling-issues-with-datasetmap","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Asking for help","local":"asking-for-help","sections":[{"title":"Forums","local":"forums","sections":[],"depth":3},{"title":"Discord","local":"discord","sections":[],"depth":3},{"title":"Community Discussions on 🤗 Hub","local":"community-discussions-on--hub","sections":[],"depth":3},{"title":"GitHub Issues","local":"github-issues","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ut(ze){return vt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Jt extends _t{constructor(g){super(),Tt(this,g,Ut,xt,bt,{})}}export{Jt as component};

Xet Storage Details

Size:
16.8 kB
·
Xet hash:
6a5cd082f3935fa397b37876380b91a083b2f971478f2664a82b4bbde5d02ffc

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.