Buckets:
| import{s as vt,n as _t,o as Tt}from"../chunks/scheduler.d75c11ed.js";import{S as Mt,i as kt,e as l,s as i,c as u,h as Ht,a as o,d as s,b as n,f as bt,g as p,j as r,k as wt,l as xt,m as a,n as m,t as f,o as c,p as h}from"../chunks/index.4ec9dfe9.js";import{C as Ct,H as d,E as Lt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.6e4d9034.js";import{C as Ne}from"../chunks/CodeBlock.5b8abc23.js";function Ut(Ve){let g,le,ie,oe,$,re,y,ue,b,Oe=`This guide aims to provide you the tools and knowledge required to navigate some common issues. If the suggestions listed | |
| in this guide do not cover your such situation, please refer to the <a href="#asking-for-help">Asking for Help</a> section to learn where to | |
| find help with your specific issue.`,pe,w,me,v,fe,_,Ke=`If you are experiencing authentication issues when sharing a dataset on 🤗 Hub using <a href="/docs/datasets/pr_8154/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> and a Hugging Face | |
| access token:`,ce,T,et="<li>Make sure that the Hugging Face token you’re using to authenticate yourself is a token with <strong>write</strong> permission.</li> <li>On OSX, it may help to clean up all the huggingface.co passwords on your keychain access, as well as reconfigure <code>git config --global credential.helper osxkeychain</code>, before using <code>huggingface-cli login</code>.</li>",he,M,tt='Alternatively, you can use SSH keys to authenticate yourself - read more in the <a href="https://huggingface.co/docs/hub/security-git-ssh" rel="nofollow">🤗 Hub documentation</a>.',de,k,ge,H,st=`When uploading large datasets to Hub, if the number of dataset shards is large, it can create too many commits for the Hub in a | |
| short period. This will result in a connection error. | |
| The connection error can also be caused by a HTTP 500 error returned by AWS S3 bucket that Hub uses internally. | |
| In either situation, you can re-run <a href="/docs/datasets/pr_8154/en/package_reference/main_classes#datasets.Dataset.push_to_hub">Dataset.push_to_hub()</a> to proceed with the dataset upload. Hub will check the SHAs | |
| of already uploaded shards to avoid reuploading them. | |
| We are working on making upload process more robust to transient errors, so updating to the latest library version is | |
| always a good idea.`,$e,x,ye,C,at="Uploading large datasets via <code>push_to_hub()</code> can result in an error:",be,L,we,U,it="If you encounter this issue, you need to upgrade the <code>datasets</code> library to the latest version (or at least <code>2.15.0</code>).",ve,I,_e,j,Te,P,nt=`When creating a dataset from a folder, one of the most common issues is that the file structure does not follow the | |
| expected format, or there’s an issue with the metadata file.`,Me,J,lt="Learn more about required folder structure in corresponding documentation pages:",ke,D,ot='<li><a href="https://huggingface.co/docs/datasets/audio_dataset#audiofolder" rel="nofollow">AudioFolder</a></li> <li><a href="https://huggingface.co/docs/datasets/image_dataset#imagefolder" rel="nofollow">ImageFolder</a></li>',He,S,xe,G,Ce,E,rt=`When creating a dataset, <a href="/docs/datasets/pr_8154/en/package_reference/main_classes#datasets.IterableDataset.from_generator">IterableDataset.from_generator()</a> and <a href="/docs/datasets/pr_8154/en/package_reference/main_classes#datasets.Dataset.from_generator">Dataset.from_generator()</a> expect a “picklable” generator function. | |
| This is required to hash the function using <a href="https://docs.python.org/3/library/pickle.html" rel="nofollow"><code>pickle</code></a> to be able to cache the dataset on disk.`,Le,Z,ut=`While generator functions are generally “picklable”, note that generator objects are not. So if you’re using a generator object, | |
| you will encounter a <code>TypeError</code> like this:`,Ue,B,Ie,F,pt=`This error can also occur when using a generator function that uses a global object that is not “picklable”, such as a | |
| DB connection, for example. If that’s the case, you can initialize such object directly inside the generator function to | |
| avoid this error.`,je,q,Pe,R,mt=`Pickling errors can also happen in the multiprocess <a href="/docs/datasets/pr_8154/en/package_reference/main_classes#datasets.Dataset.map">Dataset.map()</a> - objects are pickled to be passed to child processes. | |
| If the objects used in the transformation are not picklable, it’s not possible to cache the result of <code>map</code>, which leads to an error being raised.`,Je,X,ft="Here are some ways to address this issue:",De,A,ct='<li>A universal solution to pickle issues is to make sure the objects (or generator classes) are pickable manually by implementing <code>__getstate__</code> / <code>__setstate__</code> / <code>__reduce__</code>.</li> <li>You can also provide your own unique hash in <code>map</code> with the <code>new_fingerprint</code> argument.</li> <li>You can also disable caching by calling <code>datasets.disable_caching()</code>, however, this is undesirable - <a href="cache">read more about importance of cache</a></li>',Se,Y,Ge,z,ht="If the above troubleshooting advice did not help you resolve your issue, reach out for help to the community and the team.",Ee,Q,Ze,W,dt=`Ask for help on the Hugging Face forums - post your question in the <a href="https://discuss.huggingface.co/c/datasets/10" rel="nofollow">🤗Datasets category</a> | |
| Make sure to write a descriptive post with relevant context about your setup and reproducible code to maximize the likelihood that your problem is solved!`,Be,N,Fe,V,gt='Post a question on <a href="http://hf.co/join/discord" rel="nofollow">Discord</a>, and let the team and the community help you.',qe,O,Re,K,$t="If you are facing issues creating a custom dataset on Hub, you can ask the Hugging Face team for help by opening a discussion in the Community tab of your dataset with this message:",Xe,ee,Ae,te,Ye,se,yt=`Finally, if you suspect to have found a bug related to the library itself, create an Issue on the 🤗 Datasets | |
| <a href="https://github.com/huggingface/datasets/issues" rel="nofollow">GitHub repository</a>. Include context regarding the bug: code snippet to reproduce, | |
| details about your environment and data, etc. to help us figure out what’s wrong and how we can fix it.`,ze,ae,Qe,ne,We;return $=new Ct({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new d({props:{title:"Troubleshooting",local:"troubleshooting",headingTag:"h1"}}),w=new d({props:{title:"Issues when uploading datasets with push_to_hub",local:"issues-when-uploading-datasets-with-pushtohub",headingTag:"h2"}}),v=new d({props:{title:"Authentication issues",local:"authentication-issues",headingTag:"h3"}}),k=new d({props:{title:"Lost connection on large dataset upload",local:"lost-connection-on-large-dataset-upload",headingTag:"h3"}}),x=new d({props:{title:"Too Many Requests",local:"too-many-requests",headingTag:"h3"}}),L=new Ne({props:{code:"SGZIdWJIVFRQRXJyb3IlM0ElMjA0MjklMjBDbGllbnQlMjBFcnJvciUzQSUyMFRvbyUyME1hbnklMjBSZXF1ZXN0cyUyMGZvciUyMHVybCUzQSUyMC4uLiUwQVlvdSUyMGhhdmUlMjBleGNlZWRlZCUyMG91ciUyMGhvdXJseSUyMHF1b3RhcyUyMGZvciUyMGFjdGlvbiUzQSUyMGNvbW1pdC4lMjBXZSUyMGludml0ZSUyMHlvdSUyMHRvJTIwcmV0cnklMjBsYXRlci4=",highlighted:`HfHubHTTPError: 429 Client Error: Too Many Requests <span class="hljs-keyword">for</span> url: ... | |
| You have exceeded our hourly quotas <span class="hljs-keyword">for</span> action: commit. We invite you to retry later.`,wrap:!1}}),I=new d({props:{title:"Issues when creating datasets from custom data",local:"issues-when-creating-datasets-from-custom-data",headingTag:"h2"}}),j=new d({props:{title:"Loading images and audio from a folder",local:"loading-images-and-audio-from-a-folder",headingTag:"h3"}}),S=new d({props:{title:"Pickling issues",local:"pickling-issues",headingTag:"h3"}}),G=new d({props:{title:"Pickling issues when using Dataset.from_generator",local:"pickling-issues-when-using-datasetfromgenerator",headingTag:"h4"}}),B=new Ne({props:{code:"VHlwZUVycm9yJTNBJTIwY2Fubm90JTIwcGlja2xlJTIwJ2dlbmVyYXRvciclMjBvYmplY3Q=",highlighted:'TypeError: cannot pickle <span class="hljs-string">'generator'</span> object',wrap:!1}}),q=new d({props:{title:"Pickling issues with Dataset.map",local:"pickling-issues-with-datasetmap",headingTag:"h4"}}),Y=new d({props:{title:"Asking for help",local:"asking-for-help",headingTag:"h2"}}),Q=new d({props:{title:"Forums",local:"forums",headingTag:"h3"}}),N=new d({props:{title:"Discord",local:"discord",headingTag:"h3"}}),O=new d({props:{title:"Community Discussions on 🤗 Hub",local:"community-discussions-on--hub",headingTag:"h3"}}),ee=new Ne({props:{code:"JTIzJTIwRGF0YXNldCUyMHJld2lldyUyMHJlcXVlc3QlMjBmb3IlMjAlM0NEYXRhc2V0JTIwbmFtZSUzRSUwQSUwQSUyMyUyMyUyMERlc2NyaXB0aW9uJTBBJTBBJTNDYnJpZWYlMjBkZXNjcmlwdGlvbiUyMG9mJTIwdGhlJTIwZGF0YXNldCUzRSUwQSUwQSUyMyUyMyUyMEZpbGVzJTIwdG8lMjByZXZpZXclMEElMEEtJTIwZmlsZTElMEEtJTIwZmlsZTIlMEEtJTIwLi4uJTBBJTBBY2MlMjAlNDBsaG9lc3RxJTIwJTQwYWxiZXJ0dmlsbGFub3Zh",highlighted:`# Dataset rewiew request for <Dataset name> | |
| ## Description | |
| <brief description of the dataset> | |
| ## Files to review | |
| - file1 | |
| - file2 | |
| - ... | |
| cc @lhoestq @albertvillanova`,wrap:!1}}),te=new d({props:{title:"GitHub Issues",local:"github-issues",headingTag:"h3"}}),ae=new Lt({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/troubleshoot.mdx"}}),{c(){g=l("meta"),le=i(),ie=l("p"),oe=i(),u($.$$.fragment),re=i(),u(y.$$.fragment),ue=i(),b=l("p"),b.innerHTML=Oe,pe=i(),u(w.$$.fragment),me=i(),u(v.$$.fragment),fe=i(),_=l("p"),_.innerHTML=Ke,ce=i(),T=l("ul"),T.innerHTML=et,he=i(),M=l("p"),M.innerHTML=tt,de=i(),u(k.$$.fragment),ge=i(),H=l("p"),H.innerHTML=st,$e=i(),u(x.$$.fragment),ye=i(),C=l("p"),C.innerHTML=at,be=i(),u(L.$$.fragment),we=i(),U=l("p"),U.innerHTML=it,ve=i(),u(I.$$.fragment),_e=i(),u(j.$$.fragment),Te=i(),P=l("p"),P.textContent=nt,Me=i(),J=l("p"),J.textContent=lt,ke=i(),D=l("ul"),D.innerHTML=ot,He=i(),u(S.$$.fragment),xe=i(),u(G.$$.fragment),Ce=i(),E=l("p"),E.innerHTML=rt,Le=i(),Z=l("p"),Z.innerHTML=ut,Ue=i(),u(B.$$.fragment),Ie=i(),F=l("p"),F.textContent=pt,je=i(),u(q.$$.fragment),Pe=i(),R=l("p"),R.innerHTML=mt,Je=i(),X=l("p"),X.textContent=ft,De=i(),A=l("ul"),A.innerHTML=ct,Se=i(),u(Y.$$.fragment),Ge=i(),z=l("p"),z.textContent=ht,Ee=i(),u(Q.$$.fragment),Ze=i(),W=l("p"),W.innerHTML=dt,Be=i(),u(N.$$.fragment),Fe=i(),V=l("p"),V.innerHTML=gt,qe=i(),u(O.$$.fragment),Re=i(),K=l("p"),K.textContent=$t,Xe=i(),u(ee.$$.fragment),Ae=i(),u(te.$$.fragment),Ye=i(),se=l("p"),se.innerHTML=yt,ze=i(),u(ae.$$.fragment),Qe=i(),ne=l("p"),this.h()},l(e){const t=Ht("svelte-u9bgzb",document.head);g=o(t,"META",{name:!0,content:!0}),t.forEach(s),le=n(e),ie=o(e,"P",{}),bt(ie).forEach(s),oe=n(e),p($.$$.fragment,e),re=n(e),p(y.$$.fragment,e),ue=n(e),b=o(e,"P",{"data-svelte-h":!0}),r(b)!=="svelte-wtzssa"&&(b.innerHTML=Oe),pe=n(e),p(w.$$.fragment,e),me=n(e),p(v.$$.fragment,e),fe=n(e),_=o(e,"P",{"data-svelte-h":!0}),r(_)!=="svelte-r307qs"&&(_.innerHTML=Ke),ce=n(e),T=o(e,"UL",{"data-svelte-h":!0}),r(T)!=="svelte-1rjun7q"&&(T.innerHTML=et),he=n(e),M=o(e,"P",{"data-svelte-h":!0}),r(M)!=="svelte-lnijwm"&&(M.innerHTML=tt),de=n(e),p(k.$$.fragment,e),ge=n(e),H=o(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-1kr2tvb"&&(H.innerHTML=st),$e=n(e),p(x.$$.fragment,e),ye=n(e),C=o(e,"P",{"data-svelte-h":!0}),r(C)!=="svelte-18w8g2a"&&(C.innerHTML=at),be=n(e),p(L.$$.fragment,e),we=n(e),U=o(e,"P",{"data-svelte-h":!0}),r(U)!=="svelte-okquay"&&(U.innerHTML=it),ve=n(e),p(I.$$.fragment,e),_e=n(e),p(j.$$.fragment,e),Te=n(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-1nemcci"&&(P.textContent=nt),Me=n(e),J=o(e,"P",{"data-svelte-h":!0}),r(J)!=="svelte-1bjwb5o"&&(J.textContent=lt),ke=n(e),D=o(e,"UL",{"data-svelte-h":!0}),r(D)!=="svelte-1upvcmt"&&(D.innerHTML=ot),He=n(e),p(S.$$.fragment,e),xe=n(e),p(G.$$.fragment,e),Ce=n(e),E=o(e,"P",{"data-svelte-h":!0}),r(E)!=="svelte-nw2m8l"&&(E.innerHTML=rt),Le=n(e),Z=o(e,"P",{"data-svelte-h":!0}),r(Z)!=="svelte-1drlc3v"&&(Z.innerHTML=ut),Ue=n(e),p(B.$$.fragment,e),Ie=n(e),F=o(e,"P",{"data-svelte-h":!0}),r(F)!=="svelte-18tjciu"&&(F.textContent=pt),je=n(e),p(q.$$.fragment,e),Pe=n(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-anhv0h"&&(R.innerHTML=mt),Je=n(e),X=o(e,"P",{"data-svelte-h":!0}),r(X)!=="svelte-18m3iu6"&&(X.textContent=ft),De=n(e),A=o(e,"UL",{"data-svelte-h":!0}),r(A)!=="svelte-1d6loxh"&&(A.innerHTML=ct),Se=n(e),p(Y.$$.fragment,e),Ge=n(e),z=o(e,"P",{"data-svelte-h":!0}),r(z)!=="svelte-kruaya"&&(z.textContent=ht),Ee=n(e),p(Q.$$.fragment,e),Ze=n(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-vitubn"&&(W.innerHTML=dt),Be=n(e),p(N.$$.fragment,e),Fe=n(e),V=o(e,"P",{"data-svelte-h":!0}),r(V)!=="svelte-qmbfjm"&&(V.innerHTML=gt),qe=n(e),p(O.$$.fragment,e),Re=n(e),K=o(e,"P",{"data-svelte-h":!0}),r(K)!=="svelte-kisz70"&&(K.textContent=$t),Xe=n(e),p(ee.$$.fragment,e),Ae=n(e),p(te.$$.fragment,e),Ye=n(e),se=o(e,"P",{"data-svelte-h":!0}),r(se)!=="svelte-2r68yz"&&(se.innerHTML=yt),ze=n(e),p(ae.$$.fragment,e),Qe=n(e),ne=o(e,"P",{}),bt(ne).forEach(s),this.h()},h(){wt(g,"name","hf:doc:metadata"),wt(g,"content",It)},m(e,t){xt(document.head,g),a(e,le,t),a(e,ie,t),a(e,oe,t),m($,e,t),a(e,re,t),m(y,e,t),a(e,ue,t),a(e,b,t),a(e,pe,t),m(w,e,t),a(e,me,t),m(v,e,t),a(e,fe,t),a(e,_,t),a(e,ce,t),a(e,T,t),a(e,he,t),a(e,M,t),a(e,de,t),m(k,e,t),a(e,ge,t),a(e,H,t),a(e,$e,t),m(x,e,t),a(e,ye,t),a(e,C,t),a(e,be,t),m(L,e,t),a(e,we,t),a(e,U,t),a(e,ve,t),m(I,e,t),a(e,_e,t),m(j,e,t),a(e,Te,t),a(e,P,t),a(e,Me,t),a(e,J,t),a(e,ke,t),a(e,D,t),a(e,He,t),m(S,e,t),a(e,xe,t),m(G,e,t),a(e,Ce,t),a(e,E,t),a(e,Le,t),a(e,Z,t),a(e,Ue,t),m(B,e,t),a(e,Ie,t),a(e,F,t),a(e,je,t),m(q,e,t),a(e,Pe,t),a(e,R,t),a(e,Je,t),a(e,X,t),a(e,De,t),a(e,A,t),a(e,Se,t),m(Y,e,t),a(e,Ge,t),a(e,z,t),a(e,Ee,t),m(Q,e,t),a(e,Ze,t),a(e,W,t),a(e,Be,t),m(N,e,t),a(e,Fe,t),a(e,V,t),a(e,qe,t),m(O,e,t),a(e,Re,t),a(e,K,t),a(e,Xe,t),m(ee,e,t),a(e,Ae,t),m(te,e,t),a(e,Ye,t),a(e,se,t),a(e,ze,t),m(ae,e,t),a(e,Qe,t),a(e,ne,t),We=!0},p:_t,i(e){We||(f($.$$.fragment,e),f(y.$$.fragment,e),f(w.$$.fragment,e),f(v.$$.fragment,e),f(k.$$.fragment,e),f(x.$$.fragment,e),f(L.$$.fragment,e),f(I.$$.fragment,e),f(j.$$.fragment,e),f(S.$$.fragment,e),f(G.$$.fragment,e),f(B.$$.fragment,e),f(q.$$.fragment,e),f(Y.$$.fragment,e),f(Q.$$.fragment,e),f(N.$$.fragment,e),f(O.$$.fragment,e),f(ee.$$.fragment,e),f(te.$$.fragment,e),f(ae.$$.fragment,e),We=!0)},o(e){c($.$$.fragment,e),c(y.$$.fragment,e),c(w.$$.fragment,e),c(v.$$.fragment,e),c(k.$$.fragment,e),c(x.$$.fragment,e),c(L.$$.fragment,e),c(I.$$.fragment,e),c(j.$$.fragment,e),c(S.$$.fragment,e),c(G.$$.fragment,e),c(B.$$.fragment,e),c(q.$$.fragment,e),c(Y.$$.fragment,e),c(Q.$$.fragment,e),c(N.$$.fragment,e),c(O.$$.fragment,e),c(ee.$$.fragment,e),c(te.$$.fragment,e),c(ae.$$.fragment,e),We=!1},d(e){e&&(s(le),s(ie),s(oe),s(re),s(ue),s(b),s(pe),s(me),s(fe),s(_),s(ce),s(T),s(he),s(M),s(de),s(ge),s(H),s($e),s(ye),s(C),s(be),s(we),s(U),s(ve),s(_e),s(Te),s(P),s(Me),s(J),s(ke),s(D),s(He),s(xe),s(Ce),s(E),s(Le),s(Z),s(Ue),s(Ie),s(F),s(je),s(Pe),s(R),s(Je),s(X),s(De),s(A),s(Se),s(Ge),s(z),s(Ee),s(Ze),s(W),s(Be),s(Fe),s(V),s(qe),s(Re),s(K),s(Xe),s(Ae),s(Ye),s(se),s(ze),s(Qe),s(ne)),s(g),h($,e),h(y,e),h(w,e),h(v,e),h(k,e),h(x,e),h(L,e),h(I,e),h(j,e),h(S,e),h(G,e),h(B,e),h(q,e),h(Y,e),h(Q,e),h(N,e),h(O,e),h(ee,e),h(te,e),h(ae,e)}}}const It='{"title":"Troubleshooting","local":"troubleshooting","sections":[{"title":"Issues when uploading datasets with push_to_hub","local":"issues-when-uploading-datasets-with-pushtohub","sections":[{"title":"Authentication issues","local":"authentication-issues","sections":[],"depth":3},{"title":"Lost connection on large dataset upload","local":"lost-connection-on-large-dataset-upload","sections":[],"depth":3},{"title":"Too Many Requests","local":"too-many-requests","sections":[],"depth":3}],"depth":2},{"title":"Issues when creating datasets from custom data","local":"issues-when-creating-datasets-from-custom-data","sections":[{"title":"Loading images and audio from a folder","local":"loading-images-and-audio-from-a-folder","sections":[],"depth":3},{"title":"Pickling issues","local":"pickling-issues","sections":[{"title":"Pickling issues when using Dataset.from_generator","local":"pickling-issues-when-using-datasetfromgenerator","sections":[],"depth":4},{"title":"Pickling issues with Dataset.map","local":"pickling-issues-with-datasetmap","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Asking for help","local":"asking-for-help","sections":[{"title":"Forums","local":"forums","sections":[],"depth":3},{"title":"Discord","local":"discord","sections":[],"depth":3},{"title":"Community Discussions on 🤗 Hub","local":"community-discussions-on--hub","sections":[],"depth":3},{"title":"GitHub Issues","local":"github-issues","sections":[],"depth":3}],"depth":2}],"depth":1}';function jt(Ve){return Tt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Gt extends Mt{constructor(g){super(),kt(this,g,jt,Ut,vt,{})}}export{Gt as component}; | |
Xet Storage Details
- Size:
- 17 kB
- Xet hash:
- e650fb3fa0d061da24c4800ccfd89963864a3cf22f05f36fb29bb23ae3a5c36c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.