Buckets:
| import{s as bt,n as wt,o as vt}from"../chunks/scheduler.bdbef820.js";import{S as _t,i as Tt,g as l,s as i,r as u,A as Mt,h as o,f as s,c as n,j as $t,u as p,x as r,k as yt,y as kt,a,v as c,d as h,t as m,w as f}from"../chunks/index.c0aea24a.js";import{C as We}from"../chunks/CodeBlock.6ccca92e.js";import{H as d,E as Ht}from"../chunks/EditOnGithub.725ee0c1.js";function xt(ze){let g,ne,ae,le,$,oe,y,Ne=`This guide aims to provide you the tools and knowledge required to navigate some common issues. If the suggestions listed | |
| in this guide do not cover your such situation, please refer to the <a href="#asking-for-help">Asking for Help</a> section to learn where to | |
| find help with your specific issue.`,re,b,ue,w,pe,v,Ve=`If you are experiencing authentication issues when sharing a dataset on 🤗 Hub using <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> and a Hugging Face | |
| access token:`,ce,_,Oe="<li>Make sure that the Hugging Face token you’re using to authenticate yourself is a token with <strong>write</strong> permission.</li> <li>On OSX, it may help to clean up all the huggingface.co passwords on your keychain access, as well as reconfigure <code>git config --global credential.helper osxkeychain</code>, before using <code>huggingface-cli login</code>.</li>",he,T,Ke='Alternatively, you can use SSH keys to authenticate yourself - read more in the <a href="https://huggingface.co/docs/hub/security-git-ssh" rel="nofollow">🤗 Hub documentation</a>.',me,M,fe,k,et=`When uploading large datasets to Hub, if the number of dataset shards is large, it can create too many commits for the Hub in a | |
| short period. This will result in a connection error. | |
| The connection error can also be caused by a HTTP 500 error returned by AWS S3 bucket that Hub uses internally. | |
| In either situation, you can re-run <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> to proceed with the dataset upload. Hub will check the SHAs | |
| of already uploaded shards to avoid reuploading them. | |
| We are working on making upload process more robust to transient errors, so updating to the latest library version is | |
| always a good idea.`,de,H,ge,x,tt="Uploading large datasets via <code>push_to_hub()</code> can result in an error:",$e,C,ye,U,st="If you encounter this issue, you need to upgrade the <code>datasets</code> library to the latest version (or at least <code>2.15.0</code>).",be,L,we,I,ve,j,at=`When creating a dataset from a folder, one of the most common issues is that the file structure does not follow the | |
| expected format, or there’s an issue with the metadata file.`,_e,P,it="Learn more about required folder structure in corresponding documentation pages:",Te,J,nt='<li><a href="https://huggingface.co/docs/datasets/audio_dataset#audiofolder" rel="nofollow">AudioFolder</a></li> <li><a href="https://huggingface.co/docs/datasets/image_dataset#imagefolder" rel="nofollow">ImageFolder</a></li>',Me,D,ke,G,He,S,lt=`When creating a dataset, <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.IterableDataset.from_generator">IterableDataset.from_generator()</a> and <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.from_generator">Dataset.from_generator()</a> expect a “picklable” generator function. | |
| This is required to hash the function using <a href="https://docs.python.org/3/library/pickle.html" rel="nofollow"><code>pickle</code></a> to be able to cache the dataset on disk.`,xe,E,ot=`While generator functions are generally “picklable”, note that generator objects are not. So if you’re using a generator object, | |
| you will encounter a <code>TypeError</code> like this:`,Ce,Z,Ue,B,rt=`This error can also occur when using a generator function that uses a global object that is not “picklable”, such as a | |
| DB connection, for example. If that’s the case, you can initialize such object directly inside the generator function to | |
| avoid this error.`,Le,F,Ie,q,ut=`Pickling errors can also happen in the multiprocess <a href="/docs/datasets/pr_7227/en/package_reference/main_classes#datasets.Dataset.map">Dataset.map()</a> - objects are pickled to be passed to child processes. | |
| If the objects used in the transformation are not picklable, it’s not possible to cache the result of <code>map</code>, which leads to an error being raised.`,je,R,pt="Here are some ways to address this issue:",Pe,X,ct='<li>A universal solution to pickle issues is to make sure the objects (or generator classes) are pickable manually by implementing <code>__getstate__</code> / <code>__setstate__</code> / <code>__reduce__</code>.</li> <li>You can also provide your own unique hash in <code>map</code> with the <code>new_fingerprint</code> argument.</li> <li>You can also disable caching by calling <code>datasets.disable_caching()</code>, however, this is undesirable - <a href="cache">read more about importance of cache</a></li>',Je,Y,De,A,ht="If the above troubleshooting advice did not help you resolve your issue, reach out for help to the community and the team.",Ge,Q,Se,W,mt=`Ask for help on the Hugging Face forums - post your question in the <a href="https://discuss.huggingface.co/c/datasets/10" rel="nofollow">🤗Datasets category</a> | |
| Make sure to write a descriptive post with relevant context about your setup and reproducible code to maximize the likelihood that your problem is solved!`,Ee,z,Ze,N,ft='Post a question on <a href="http://hf.co/join/discord" rel="nofollow">Discord</a>, and let the team and the community help you.',Be,V,Fe,O,dt=`If you are facing issues creating a custom dataset with a script on Hub, you can ask the Hugging Face team for help by opening | |
| a discussion in the Community tab of your dataset with this message:`,qe,K,Re,ee,Xe,te,gt=`Finally, if you suspect to have found a bug related to the library itself, create an Issue on the 🤗 Datasets | |
| <a href="https://github.com/huggingface/datasets/issues" rel="nofollow">GitHub repository</a>. Include context regarding the bug: code snippet to reproduce, | |
| details about your environment and data, etc. to help us figure out what’s wrong and how we can fix it.`,Ye,se,Ae,ie,Qe;return $=new d({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h1"}}),b=new d({props:{title:"Issues when uploading datasets with push_to_hub",local:"issues-when-uploading-datasets-with-pushtohub",headingTag:"h2"}}),w=new d({props:{title:"Authentication issues",local:"authentication-issues",headingTag:"h3"}}),M=new d({props:{title:"Lost connection on large dataset upload",local:"lost-connection-on-large-dataset-upload",headingTag:"h3"}}),H=new d({props:{title:"Too Many Requests",local:"too-many-requests",headingTag:"h3"}}),C=new We({props:{code:"SGZIdWJIVFRQRXJyb3IlM0ElMjA0MjklMjBDbGllbnQlMjBFcnJvciUzQSUyMFRvbyUyME1hbnklMjBSZXF1ZXN0cyUyMGZvciUyMHVybCUzQSUyMC4uLiUwQVlvdSUyMGhhdmUlMjBleGNlZWRlZCUyMG91ciUyMGhvdXJseSUyMHF1b3RhcyUyMGZvciUyMGFjdGlvbiUzQSUyMGNvbW1pdC4lMjBXZSUyMGludml0ZSUyMHlvdSUyMHRvJTIwcmV0cnklMjBsYXRlci4=",highlighted:`HfHubHTTPError: 429 Client Error: Too Many Requests <span class="hljs-keyword">for</span> url: ... | |
| You have exceeded our hourly quotas <span class="hljs-keyword">for</span> action: commit. We invite you to retry later.`,wrap:!1}}),L=new d({props:{title:"Issues when creating datasets from custom data",local:"issues-when-creating-datasets-from-custom-data",headingTag:"h2"}}),I=new d({props:{title:"Loading images and audio from a folder",local:"loading-images-and-audio-from-a-folder",headingTag:"h3"}}),D=new d({props:{title:"Pickling issues",local:"pickling-issues",headingTag:"h3"}}),G=new d({props:{title:"Pickling issues when using Dataset.from_generator",local:"pickling-issues-when-using-datasetfromgenerator",headingTag:"h4"}}),Z=new We({props:{code:"VHlwZUVycm9yJTNBJTIwY2Fubm90JTIwcGlja2xlJTIwJ2dlbmVyYXRvciclMjBvYmplY3Q=",highlighted:'TypeError: cannot pickle <span class="hljs-string">'generator'</span> object',wrap:!1}}),F=new d({props:{title:"Pickling issues with Dataset.map",local:"pickling-issues-with-datasetmap",headingTag:"h4"}}),Y=new d({props:{title:"Asking for help",local:"asking-for-help",headingTag:"h2"}}),Q=new d({props:{title:"Forums",local:"forums",headingTag:"h3"}}),z=new d({props:{title:"Discord",local:"discord",headingTag:"h3"}}),V=new d({props:{title:"Community Discussions on 🤗 Hub",local:"community-discussions-on--hub",headingTag:"h3"}}),K=new We({props:{code:"JTIzJTIwRGF0YXNldCUyMHJld2lldyUyMHJlcXVlc3QlMjBmb3IlMjAlM0NEYXRhc2V0JTIwbmFtZSUzRSUwQSUwQSUyMyUyMyUyMERlc2NyaXB0aW9uJTBBJTBBJTNDYnJpZWYlMjBkZXNjcmlwdGlvbiUyMG9mJTIwdGhlJTIwZGF0YXNldCUzRSUwQSUwQSUyMyUyMyUyMEZpbGVzJTIwdG8lMjByZXZpZXclMEElMEEtJTIwZmlsZTElMEEtJTIwZmlsZTIlMEEtJTIwLi4uJTBBJTBBY2MlMjAlNDBsaG9lc3RxJTIwJTQwcG9saW5hZXRlcm5hJTIwJTQwbWFyaW9zYXNrbyUyMCU0MGFsYmVydHZpbGxhbm92YQ==",highlighted:`# Dataset rewiew request for <Dataset name> | |
| ## Description | |
| <brief description of the dataset> | |
| ## Files to review | |
| - file1 | |
| - file2 | |
| - ... | |
| cc @lhoestq @polinaeterna @mariosasko @albertvillanova`,wrap:!1}}),ee=new d({props:{title:"GitHub Issues",local:"github-issues",headingTag:"h3"}}),se=new Ht({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/troubleshoot.mdx"}}),{c(){g=l("meta"),ne=i(),ae=l("p"),le=i(),u($.$$.fragment),oe=i(),y=l("p"),y.innerHTML=Ne,re=i(),u(b.$$.fragment),ue=i(),u(w.$$.fragment),pe=i(),v=l("p"),v.innerHTML=Ve,ce=i(),_=l("ul"),_.innerHTML=Oe,he=i(),T=l("p"),T.innerHTML=Ke,me=i(),u(M.$$.fragment),fe=i(),k=l("p"),k.innerHTML=et,de=i(),u(H.$$.fragment),ge=i(),x=l("p"),x.innerHTML=tt,$e=i(),u(C.$$.fragment),ye=i(),U=l("p"),U.innerHTML=st,be=i(),u(L.$$.fragment),we=i(),u(I.$$.fragment),ve=i(),j=l("p"),j.textContent=at,_e=i(),P=l("p"),P.textContent=it,Te=i(),J=l("ul"),J.innerHTML=nt,Me=i(),u(D.$$.fragment),ke=i(),u(G.$$.fragment),He=i(),S=l("p"),S.innerHTML=lt,xe=i(),E=l("p"),E.innerHTML=ot,Ce=i(),u(Z.$$.fragment),Ue=i(),B=l("p"),B.textContent=rt,Le=i(),u(F.$$.fragment),Ie=i(),q=l("p"),q.innerHTML=ut,je=i(),R=l("p"),R.textContent=pt,Pe=i(),X=l("ul"),X.innerHTML=ct,Je=i(),u(Y.$$.fragment),De=i(),A=l("p"),A.textContent=ht,Ge=i(),u(Q.$$.fragment),Se=i(),W=l("p"),W.innerHTML=mt,Ee=i(),u(z.$$.fragment),Ze=i(),N=l("p"),N.innerHTML=ft,Be=i(),u(V.$$.fragment),Fe=i(),O=l("p"),O.textContent=dt,qe=i(),u(K.$$.fragment),Re=i(),u(ee.$$.fragment),Xe=i(),te=l("p"),te.innerHTML=gt,Ye=i(),u(se.$$.fragment),Ae=i(),ie=l("p"),this.h()},l(e){const t=Mt("svelte-u9bgzb",document.head);g=o(t,"META",{name:!0,content:!0}),t.forEach(s),ne=n(e),ae=o(e,"P",{}),$t(ae).forEach(s),le=n(e),p($.$$.fragment,e),oe=n(e),y=o(e,"P",{"data-svelte-h":!0}),r(y)!=="svelte-wtzssa"&&(y.innerHTML=Ne),re=n(e),p(b.$$.fragment,e),ue=n(e),p(w.$$.fragment,e),pe=n(e),v=o(e,"P",{"data-svelte-h":!0}),r(v)!=="svelte-1ljw89s"&&(v.innerHTML=Ve),ce=n(e),_=o(e,"UL",{"data-svelte-h":!0}),r(_)!=="svelte-1rjun7q"&&(_.innerHTML=Oe),he=n(e),T=o(e,"P",{"data-svelte-h":!0}),r(T)!=="svelte-lnijwm"&&(T.innerHTML=Ke),me=n(e),p(M.$$.fragment,e),fe=n(e),k=o(e,"P",{"data-svelte-h":!0}),r(k)!=="svelte-1s0y4qr"&&(k.innerHTML=et),de=n(e),p(H.$$.fragment,e),ge=n(e),x=o(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-18w8g2a"&&(x.innerHTML=tt),$e=n(e),p(C.$$.fragment,e),ye=n(e),U=o(e,"P",{"data-svelte-h":!0}),r(U)!=="svelte-okquay"&&(U.innerHTML=st),be=n(e),p(L.$$.fragment,e),we=n(e),p(I.$$.fragment,e),ve=n(e),j=o(e,"P",{"data-svelte-h":!0}),r(j)!=="svelte-1nemcci"&&(j.textContent=at),_e=n(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-1bjwb5o"&&(P.textContent=it),Te=n(e),J=o(e,"UL",{"data-svelte-h":!0}),r(J)!=="svelte-1upvcmt"&&(J.innerHTML=nt),Me=n(e),p(D.$$.fragment,e),ke=n(e),p(G.$$.fragment,e),He=n(e),S=o(e,"P",{"data-svelte-h":!0}),r(S)!=="svelte-vnn5lh"&&(S.innerHTML=lt),xe=n(e),E=o(e,"P",{"data-svelte-h":!0}),r(E)!=="svelte-1drlc3v"&&(E.innerHTML=ot),Ce=n(e),p(Z.$$.fragment,e),Ue=n(e),B=o(e,"P",{"data-svelte-h":!0}),r(B)!=="svelte-18tjciu"&&(B.textContent=rt),Le=n(e),p(F.$$.fragment,e),Ie=n(e),q=o(e,"P",{"data-svelte-h":!0}),r(q)!=="svelte-ap4xs5"&&(q.innerHTML=ut),je=n(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-18m3iu6"&&(R.textContent=pt),Pe=n(e),X=o(e,"UL",{"data-svelte-h":!0}),r(X)!=="svelte-1d6loxh"&&(X.innerHTML=ct),Je=n(e),p(Y.$$.fragment,e),De=n(e),A=o(e,"P",{"data-svelte-h":!0}),r(A)!=="svelte-kruaya"&&(A.textContent=ht),Ge=n(e),p(Q.$$.fragment,e),Se=n(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-vitubn"&&(W.innerHTML=mt),Ee=n(e),p(z.$$.fragment,e),Ze=n(e),N=o(e,"P",{"data-svelte-h":!0}),r(N)!=="svelte-qmbfjm"&&(N.innerHTML=ft),Be=n(e),p(V.$$.fragment,e),Fe=n(e),O=o(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-1d58zg"&&(O.textContent=dt),qe=n(e),p(K.$$.fragment,e),Re=n(e),p(ee.$$.fragment,e),Xe=n(e),te=o(e,"P",{"data-svelte-h":!0}),r(te)!=="svelte-2r68yz"&&(te.innerHTML=gt),Ye=n(e),p(se.$$.fragment,e),Ae=n(e),ie=o(e,"P",{}),$t(ie).forEach(s),this.h()},h(){yt(g,"name","hf:doc:metadata"),yt(g,"content",Ct)},m(e,t){kt(document.head,g),a(e,ne,t),a(e,ae,t),a(e,le,t),c($,e,t),a(e,oe,t),a(e,y,t),a(e,re,t),c(b,e,t),a(e,ue,t),c(w,e,t),a(e,pe,t),a(e,v,t),a(e,ce,t),a(e,_,t),a(e,he,t),a(e,T,t),a(e,me,t),c(M,e,t),a(e,fe,t),a(e,k,t),a(e,de,t),c(H,e,t),a(e,ge,t),a(e,x,t),a(e,$e,t),c(C,e,t),a(e,ye,t),a(e,U,t),a(e,be,t),c(L,e,t),a(e,we,t),c(I,e,t),a(e,ve,t),a(e,j,t),a(e,_e,t),a(e,P,t),a(e,Te,t),a(e,J,t),a(e,Me,t),c(D,e,t),a(e,ke,t),c(G,e,t),a(e,He,t),a(e,S,t),a(e,xe,t),a(e,E,t),a(e,Ce,t),c(Z,e,t),a(e,Ue,t),a(e,B,t),a(e,Le,t),c(F,e,t),a(e,Ie,t),a(e,q,t),a(e,je,t),a(e,R,t),a(e,Pe,t),a(e,X,t),a(e,Je,t),c(Y,e,t),a(e,De,t),a(e,A,t),a(e,Ge,t),c(Q,e,t),a(e,Se,t),a(e,W,t),a(e,Ee,t),c(z,e,t),a(e,Ze,t),a(e,N,t),a(e,Be,t),c(V,e,t),a(e,Fe,t),a(e,O,t),a(e,qe,t),c(K,e,t),a(e,Re,t),c(ee,e,t),a(e,Xe,t),a(e,te,t),a(e,Ye,t),c(se,e,t),a(e,Ae,t),a(e,ie,t),Qe=!0},p:wt,i(e){Qe||(h($.$$.fragment,e),h(b.$$.fragment,e),h(w.$$.fragment,e),h(M.$$.fragment,e),h(H.$$.fragment,e),h(C.$$.fragment,e),h(L.$$.fragment,e),h(I.$$.fragment,e),h(D.$$.fragment,e),h(G.$$.fragment,e),h(Z.$$.fragment,e),h(F.$$.fragment,e),h(Y.$$.fragment,e),h(Q.$$.fragment,e),h(z.$$.fragment,e),h(V.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(se.$$.fragment,e),Qe=!0)},o(e){m($.$$.fragment,e),m(b.$$.fragment,e),m(w.$$.fragment,e),m(M.$$.fragment,e),m(H.$$.fragment,e),m(C.$$.fragment,e),m(L.$$.fragment,e),m(I.$$.fragment,e),m(D.$$.fragment,e),m(G.$$.fragment,e),m(Z.$$.fragment,e),m(F.$$.fragment,e),m(Y.$$.fragment,e),m(Q.$$.fragment,e),m(z.$$.fragment,e),m(V.$$.fragment,e),m(K.$$.fragment,e),m(ee.$$.fragment,e),m(se.$$.fragment,e),Qe=!1},d(e){e&&(s(ne),s(ae),s(le),s(oe),s(y),s(re),s(ue),s(pe),s(v),s(ce),s(_),s(he),s(T),s(me),s(fe),s(k),s(de),s(ge),s(x),s($e),s(ye),s(U),s(be),s(we),s(ve),s(j),s(_e),s(P),s(Te),s(J),s(Me),s(ke),s(He),s(S),s(xe),s(E),s(Ce),s(Ue),s(B),s(Le),s(Ie),s(q),s(je),s(R),s(Pe),s(X),s(Je),s(De),s(A),s(Ge),s(Se),s(W),s(Ee),s(Ze),s(N),s(Be),s(Fe),s(O),s(qe),s(Re),s(Xe),s(te),s(Ye),s(Ae),s(ie)),s(g),f($,e),f(b,e),f(w,e),f(M,e),f(H,e),f(C,e),f(L,e),f(I,e),f(D,e),f(G,e),f(Z,e),f(F,e),f(Y,e),f(Q,e),f(z,e),f(V,e),f(K,e),f(ee,e),f(se,e)}}}const Ct='{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Issues when uploading datasets with push_to_hub","local":"issues-when-uploading-datasets-with-pushtohub","sections":[{"title":"Authentication issues","local":"authentication-issues","sections":[],"depth":3},{"title":"Lost connection on large dataset upload","local":"lost-connection-on-large-dataset-upload","sections":[],"depth":3},{"title":"Too Many Requests","local":"too-many-requests","sections":[],"depth":3}],"depth":2},{"title":"Issues when creating datasets from custom data","local":"issues-when-creating-datasets-from-custom-data","sections":[{"title":"Loading images and audio from a folder","local":"loading-images-and-audio-from-a-folder","sections":[],"depth":3},{"title":"Pickling issues","local":"pickling-issues","sections":[{"title":"Pickling issues when using Dataset.from_generator","local":"pickling-issues-when-using-datasetfromgenerator","sections":[],"depth":4},{"title":"Pickling issues with Dataset.map","local":"pickling-issues-with-datasetmap","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Asking for help","local":"asking-for-help","sections":[{"title":"Forums","local":"forums","sections":[],"depth":3},{"title":"Discord","local":"discord","sections":[],"depth":3},{"title":"Community Discussions on 🤗 Hub","local":"community-discussions-on--hub","sections":[],"depth":3},{"title":"GitHub Issues","local":"github-issues","sections":[],"depth":3}],"depth":2}],"depth":1}';function Ut(ze){return vt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Jt extends _t{constructor(g){super(),Tt(this,g,Ut,xt,bt,{})}}export{Jt as component}; | |
Xet Storage Details
- Size:
- 16.8 kB
- Xet hash:
- 6a5cd082f3935fa397b37876380b91a083b2f971478f2664a82b4bbde5d02ffc
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.