Buckets:

rtrm's picture
download
raw
76.6 kB
import{s as xt,o as Zt,n as Os}from"../chunks/scheduler.37c15a92.js";import{S as Gt,i as It,g as p,s as a,r as M,A as Rt,h as c,f as l,c as n,j as wt,u as d,x as r,k as N,y as Ct,a as t,v as J,d as m,t as o,w as y}from"../chunks/index.2bf4358c.js";import{T as qs}from"../chunks/Tip.363c041f.js";import{Y as Xt}from"../chunks/Youtube.1e50a667.js";import{C as h}from"../chunks/CodeBlock.4e987730.js";import{C as zt}from"../chunks/CourseFloatingBanner.6add7356.js";import{H as Ps,E as Nt}from"../chunks/getInferenceSnippets.24b50994.js";function _t(j){let i,T="✏️ <strong>Încercați!</strong> Faceți clic pe câteva dintre URL-urile din payload-ul JSON de mai sus pentru a vă familiariza cu tipul de informații către care se face referire pentru fiecare GitHub issue.";return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-sjr1zc"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function kt(j){let i,T="⚠️ Nu oferiți nimănui un notebook cu <code>GITHUB_TOKEN</code> în el . Vă recomandăm să ștergeți ultima celulă odată ce ați executat-o pentru a evita scurgerea accidentală a acestor informații. Chiar mai bine, stocați tokenul într-un fișier <em>.env</em> și utilizați biblioteca <code>python-dotenv</code> pentru a îl încărca automat ca variabilă de mediu.";return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-18ilrx3"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function vt(j){let i,T="✏️ <strong>Încercați!</strong> Calculați timpul mediu necesar pentru închiderea issue-urilor în Datasets. Vă poate fi utilă funcția <code>Dataset.filter()</code> pentru a filtra pull requesturile și issue-urile deschise, și puteți utiliza funcția <code>Dataset.set_format()</code> pentru a converti datasetul într-un <code>DataFrame</code> astfel încât să puteți manipula cu ușurință timestampurile <code>created_at</code> și <code>closed_at</code>. Pentru puncte bonus, calculați timpul mediu necesar pentru închiderea pull requesturilor.";return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1l6kipy"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function Vt(j){let i,T='💡 De asemenea, puteți încărca un dataset pe Hugging Face Hub direct din terminal utilizând <code>huggingface-cli</code> și puțină magie Git. Consultați <a href="https://huggingface.co/docs/datasets/share#share-a-dataset-using-the-cli" rel="nofollow">ghidul 🤗 Datasets</a> pentru detalii despre cum puteți face asta.';return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-vijdga"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function Bt(j){let i,T='✏️ <strong>Încercați!</strong> Utilizați aplicația <code>dataset-tagging</code> și <a href="https://github.com/huggingface/datasets/blob/master/templates/README_guide.md" rel="nofollow">ghidul 🤗 Datasets</a> pentru a completa fișierul <em>README.md</em> pentru datasetul de probleme GitHub.';return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-17ir9t2"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function $t(j){let i,T="✏️ <strong>Încercați!</strong> Treceți prin pașii pe care i-am făcut în această secțiune pentru a crea un dataset de issues GitHub pentru o biblioteca open source care îți place(alegeți altceva înafară de 🤗 Datasets, desigur!). Pentru puncte bonus, faceți fine-tune unui multilabel classifier pentru a prezice tagurile prezente în câmpul <code>labels</code>.";return{c(){i=p("p"),i.innerHTML=T},l(u){i=c(u,"P",{"data-svelte-h":!0}),r(i)!=="svelte-1cvtjan"&&(i.innerHTML=T)},m(u,U){t(u,i,U)},p:Os,d(u){u&&l(i)}}}function Wt(j){let i,T,u,U,_,se,k,ee,v,Rl='Uneori, datasetul necesar pentru a construi o aplicație NLP nu există, astfel încât veți trebui să-l creați singuri. În această secțiune vom arăta cum să creați un corpus de <a href="https://github.com/features/issues/" rel="nofollow">GitHub issues</a>, care sunt utilizate în mod obișnuit pentru a urmări erorile sau feature-urile din repositoriile GitHub. Acest corpus poate fi folosit pentru diverse scopuri, inclusiv:',le,V,Cl="<li>Explorarea timpului necesar pentru închiderea unor issues deschise sau pull requesturi</li> <li>Antrenarea unui <em>multilabel classifier</em> care poate eticheta issue-urile cu metadate pe baza descrierii issue-urilor (de exemplu, “bug”, “enhancement” sau “question”)</li> <li>Crearea unui motor de căutare semantică pentru a găsi care issues se potrivesc query-ului utilizatorului</li>",te,B,Xl="În această secțiune ne vom focusa pe crearea corpusului, și în următoarea vom aborda aplicația motorului de căutare semantic. Pentru a păstra lucrurile meta, vom folosi issue-urile GitHub asociate cu un proiect open source popular: 🤗 Datasets! Să vedem cum să obținem datele și să explorăm informațiile conținute în aceste issue-uri.",ae,$,ne,W,zl='Puteți găsi toate issue-urile din 🤗 Datasets navigând către tabul <a href="https://github.com/huggingface/datasets/issues" rel="nofollow">Issues</a> al repositorului. Așa cum arată următorul screenshot, la momentul scrierii acestui text existau 331 de issues deschise și 668 închise.',ie,g,Nl='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues.png" alt="Issue-urile GitHub asociate cu 🤗 Datasets." width="80%"/>',pe,H,_l="Dacă ați da clic pe una dintre aceste issue-uri veți găsi că aceasta conține un titlu, o descriere și un set de labeluri care caracterizează issue-ul. Un exemplu este prezentat în screenshotul următor.",ce,b,kl='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues-single.png" alt="Un issue tipic în GitHub din repositoriul 🤗 Datasets." width="80%"/>',re,Q,vl='Pentru a descărca toate issue-urile din repositoriu, vom folosi <a href="https://docs.github.com/en/rest" rel="nofollow">GitHub REST API</a> pentru a enumera <a href="https://docs.github.com/en/rest/reference/issues#list-repository-issues" rel="nofollow"><code>Issues</code> endpoint</a>. Aceast endpoint returnează o listă de obiecte JSON, cu fiecare obiect conținând un număr mare de câmpuri care includ titlul și descrierea precum și metadata despre starea issue-ului și așa mai departe.',ue,F,Vl="Un mod convenabil de descărcare a issue-urilor este prin utilizarea librăriei <code>requests</code>, care este modalitatea standard pentru a face cereri HTTP în Python. Puteți instala libraria rulând comanda:",Me,Y,de,E,Bl="Odată cu instalarea librariei, puteți face cereri GET la <code>Issues</code> endpoint prin invocarea funcției <code>requests.get()</code>. De exemplu, puteți rula următorul cod pentru a obține primul issue din prima pagină:",Je,A,me,S,$l="Obiectul <code>response</code> conține o cantitate mare de informații utile despre requestul efectuat, inclusiv HTTP status code:",oe,D,ye,L,he,q,Wl='unde statusul <code>200</code> înseamnă că cererea a fost reușită (puteți găsi o listă completă de status coduri <a href="https://en.wikipedia.org/wiki/List_of_HTTP_status_codes" rel="nofollow">aici</a>). De ceea ce suntem însă interesați este <em>payload</em>, care poate fi accesat în diverse formaturi precum bytes, string sau JSON. Deoarece știm că issue-urile noastre sunt în format JSON, să inspectăm payload-ul astfel:',Te,P,Ue,O,je,K,Hl="Uau, aceasta e o cantitate mare de informație! Putem vedea câmpuri utile cum ar fi <code>title</code>, <code>body</code> și <code>number</code> care descriu problema, precum și informații despre utilizatorul GitHub care a deschis issue-ul.",ge,f,be,ss,Ql='După cum este descris în <a href="https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting" rel="nofollow">documentația</a> GitHub, solicitările neautentificate sunt limitate la 60 de solicitări pe oră. Deși puteți crește <code>per_page</code> query parameter pentru a reduce numărul de solicitări pe care le faceți, oricum veți atinge limita pentru orice repository care are mai mult de câteva mii de issues. Prin urmare, ar trebui să urmați <a href="https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token" rel="nofollow">instrucțiunile</a> GitHub pentru crearea unui <em>personal access token</em> astfel încât să puteți crește limita la 5.000 de solicitări pe oră. Odată ce aveți tokenul, îl puteți include ca parte a request header:',fe,es,we,w,xe,ls,Fl="Acum că avem tokenul de acces, hai să creăm o funcție care să poată descărca toate issue-urile dintr-un repositoriu GitHub:",Ze,ts,Ge,as,Yl="Acum când apelăm <code>fetch_issues()</code> va descărca toate problemele în batch-uri pentru a evita depășirea limitei GitHub pe numărul de solicitări pe oră; rezultatul va fi stocat într-un fișier <code>_repository_name-issues.jsonl</code>, unde fiecare linie este un obiect JSON care reprezintă un issue. Mai jos folosim această funcție pentru a obține toate issue-urile de la 🤗 Datasets:",Ie,ns,Re,is,El='Odată ce issue-urile sunt descărcate, le putem încărca local utilizând abilitățile noastre dobândite în <a href="/course/chapter5/2">secțiunea 2</a>:',Ce,ps,Xe,cs,ze,rs,Al='Great, am creat primul nostru dataset de la zero! Dar de ce sunt mai mult de câteva mii de issue-uri atunci când tabul de issue-uri al repositoriului 🤗 Datasets afișează doar aproximativ 1.000 de issue-uri în total 🤔? Conform descris în <a href="https://docs.github.com/en/rest/reference/issues#list-issues-assigned-to-the-authenticated-user" rel="nofollow">documentația</a> GitHub, acest lucru s-a întâmplat pentru că am descărcat și toate pull requesturile:',Ne,us,Sl="<p>GitHub’s REST API v3 considers every pull request an issue, but not every issue is a pull request. For this reason, “Issues” endpoints may return both issues and pull requests in the response. You can identify pull requests by the <code>pull_request</code> key. Be aware that the <code>id</code> of a pull request returned from “Issues” endpoints will be an issue id.</p>",_e,Ms,Dl="Deoarece conținutul issue-urilor și pull requesturilor este destul de diferit, hai să preprocesăm puțin datele pentru a ne permite să le diferențiem între ele.",ke,ds,ve,Js,Ll='Fragmentul de mai sus din documentația GitHub ne spune că coloana <code>pull_request</code> poate fi utilizată pentru a diferenția între issues și pull requests. Să analizăm un sampple aleatoriu pentru a vedea care este diferența. Așa cum am făcut în <a href="/course/chapter5/3">secțiunea 3</a>, vom înlănțui <code>Dataset.shuffle()</code> și <code>Dataset.select()</code> pentru a crea un sample aleatoriu și apoi vom împerechea coloanele <code>html_url</code> și <code>pull_request</code> pentru a putea compara diversele URL-uri:',Ve,ms,Be,os,$e,ys,ql="Aici putem vedea că fiecare pull request este asociat cu diverse URL-uri, în timp ce issue-urile obișnuite au o intrare <code>None</code>. Putem utiliza această distincție pentru a crea o nouă coloană <code>is_pull_request</code> care verifică dacă câmpul <code>pull_request</code> este <code>None</code> sau nu:",We,hs,He,x,Qe,Ts,Pl="Deși am putea continua să curățăm datasetul prin eliminarea sau redenumirea unor coloane, este, în general, o practică bună să păstrăm datasetul cât mai “raw” posibil la acest stadiu, astfel încât să poată fi utilizat ușor în multiple aplicații.",Fe,Us,Ol="Înainte de a încărca datasetul în Hugging Face Hub, trebuie să rezolvăm chestie care lipsește din el: comentariile asociate fiecărui issue și pull request. Le vom adăuga în continuare cu— ați ghicit — GitHub REST API!",Ye,js,Ee,gs,Kl="După cum se vede în următorul screenshot, comentariile asociate unui issue sau pull request oferă o sursă bogată de informații, în special dacă suntem interesați să construim un motor de căutare pentru a răspunde la întrebările utilizatorilor despre bibliotecă.",Ae,Z,st='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues-comment.png" alt="Comentariile asociate unei probleme despre 🤗 Datasets." width="80%"/>',Se,bs,et='GitHub REST API oferă un endpoint <a href="https://docs.github.com/en/rest/reference/issues#list-issue-comments" rel="nofollow"><code>Comments</code></a> care returnează toate comentariile asociate numărului problemei. Să testăm endpointul pentru a vedea ce returnează:',De,fs,Le,ws,qe,xs,lt="Putem vedea că comentariul este stocat în câmpul <code>body</code>, așa că putem scrie o funcție simplă care returnează toate comentariile asociate unei probleme prin extragerea conținutului <code>body</code> pentru fiecare element în <code>response.json()</code>:",Pe,Zs,Oe,Gs,Ke,Is,tt="Arată bine. Acum hai să folosim <code>Dataset.map()</code> pentru a adăuga noi coloane <code>comments</code> fiecărui issue în datasetul nostru:",sl,Rs,el,Cs,at="Ultimul pas este să facem push datasetului nostru pe Hub. Să vedem cum putem face asta.",ll,Xs,tl,zs,al,Ns,nt="Acum că avem datasetul nostru augmentat, este timpul să îi facem push pe Hub pentru a-l oferi comunității! Încărcarea unui dataset este foarte simplu: la fel ca modelele și tokenizerrii din 🤗 Transformers, putem utiliza o metodă <code>push_to_hub()</code> pentru a face push unui dataset. Pentru a face asta, avem nevoie de un token de autentificare, care poate fi obținut prin autentificarea pe Hugging Face Hub cu funcția <code>notebook_login()</code>:",nl,_s,il,ks,it=`Acest lucru va crea un widget unde poți să scrii usernameul și parola ta, iar un API token va fi salvat în <em>~/.huggingface/token</em>. Dacă rulezi codeul într-un terminal, te poți loga cu ajutor CLI:
This will create a widget where you can enter your username and password, and an API token will be saved in <em>~/.huggingface/token</em>. If you’re running the code in a terminal, you can log in via the CLI instead:`,pl,vs,cl,Vs,pt="O dată ce ai făcut asta, putem încărca datasetul rulând:",rl,Bs,ul,$s,ct="De acum, orice poate să descarce datasetul, utilizând <code>load_dataset()</code> cu ID-ul repositoriului ca <code>path</code> argument:",Ml,Ws,dl,Hs,Jl,Qs,rt="Cool, am încărcat datasetul nostru pe Hub și acum este disponibil pentru alții să îl utilizeze! Mai este doar un lucru important de făcut: adăugarea unui <em>dataset card</em> care explică cum a fost creat corpusul și oferă alte informații utile pentru comunitate.",ml,G,ol,Fs,yl,Ys,ut="Datasetiroșe bine documentate sunt mai probabil să fie utile altora (inclusiv ție din viitor!), deoarece furnizează contextul pentru a permite utilizatorilor să decidă dacă datasetul este relevant pentru taskul lor și să evalueze eventualele biasuri sau riscurile asociate cu utilizarea datasetului.",hl,Es,Mt="Pe Hugging Face Hub, această informație este stocată în fișierul <em>README.md</em> al fiecărui dataset repository. Sunt doi pași principali pe care trebuie să îi efectuați înainte de a crea acest fișier:",Tl,As,dt='<li>Utilizați aplicația <a href="https://huggingface.co/datasets/tagging/" rel="nofollow"><code>datasets-tagging</code></a> pentru a crea etichete de metadate în format YAML. Aceste taguri sunt utilizate pentru o varietate de funcționalități de căutare pe Hugging Face Hub și asigură că datasetul poate fi găsit ușor de membrii comunității. Deoarece am creat un dataset custom aici, veți fi nevoiți să clonați repositoriul <code>datasets-tagging</code> și să rulați aplicația local. Iată cum arată interfața:</li>',Ul,I,Jt='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-tagger.png" alt="Interfața &#39;datasets-tagging&#39;." width="80%"/>',jl,R,mt='<li>Citiți <a href="https://github.com/huggingface/datasets/blob/master/templates/README_guide.md" rel="nofollow">ghidul 🤗 Datasets</a> despre crearea de dataset cards informative și utilizați-l ca șablon.</li>',gl,Ss,ot="Puteți crea fișierul <em>README.md</em> direct pe Hub și puteți găsi un template pentru dataset card în repositoriul <code>lewtun/github-issues</code>. Un screenshot a dataset card completată este afișată mai jos.",bl,C,yt='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/dataset-card.png" alt="Dataset card." width="80%"/>',fl,X,wl,Ds,ht="Astfel, am văzut în această secțiune că crearea unui dataset bun poate fi destul de complicată, dar, spre norocul nsotru, încărcarea și oferirea acestuia comunității nu sunt. În secțiunea următoare, vom utiliza datasetul nou pentru a crea un motor de căutare semantic cu 🤗 Datasets care poate să asocieze întrebări cu cele mai relevante issues și comentarii.",xl,z,Zl,Ls,Gl,Ks,Il;return _=new Ps({props:{title:"Crearea propriului tău dataset",local:"creating-your-own-dataset",headingTag:"h1"}}),k=new zt({props:{chapter:5,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter5/section5.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter5/section5.ipynb"}]}}),$=new Ps({props:{title:"Obținerea datelor",local:"getting-the-data",headingTag:"h2"}}),Y=new h({props:{code:"IXBpcCUyMGluc3RhbGwlMjByZXF1ZXN0cw==",highlighted:"!pip install requests",wrap:!1}}),A=new h({props:{code:"aW1wb3J0JTIwcmVxdWVzdHMlMEElMEF1cmwlMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTNGcGFnZSUzRDElMjZwZXJfcGFnZSUzRDElMjIlMEFyZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLmdldCh1cmwp",highlighted:`<span class="hljs-keyword">import</span> requests
url = <span class="hljs-string">&quot;https://api.github.com/repos/huggingface/datasets/issues?page=1&amp;per_page=1&quot;</span>
response = requests.get(url)`,wrap:!1}}),D=new h({props:{code:"cmVzcG9uc2Uuc3RhdHVzX2NvZGU=",highlighted:"response.status_code",wrap:!1}}),L=new h({props:{code:"MjAw",highlighted:'<span class="hljs-number">200</span>',wrap:!1}}),P=new h({props:{code:"cmVzcG9uc2UuanNvbigp",highlighted:"response.json()",wrap:!1}}),O=new h({props:{code:"JTVCJTdCJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTInJTJDJTBBJTIwJTIwJ3JlcG9zaXRvcnlfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJyUyQyUwQSUyMCUyMCdsYWJlbHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc5MiUyRmxhYmVscyU3QiUyRm5hbWUlN0QnJTJDJTBBJTIwJTIwJ2NvbW1lbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTIlMkZjb21tZW50cyclMkMlMEElMjAlMjAnZXZlbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTIlMkZldmVudHMnJTJDJTBBJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTInJTJDJTBBJTIwJTIwJ2lkJyUzQSUyMDk2ODY1MDI3NCUyQyUwQSUyMCUyMCdub2RlX2lkJyUzQSUyMCdNREV4T2xCMWJHeFNaWEYxWlhOME56RXdOelV5TWpjMCclMkMlMEElMjAlMjAnbnVtYmVyJyUzQSUyMDI3OTIlMkMlMEElMjAlMjAndGl0bGUnJTNBJTIwJ1VwZGF0ZSUyMEdvb0FRJyUyQyUwQSUyMCUyMCd1c2VyJyUzQSUyMCU3Qidsb2dpbiclM0ElMjAnYmhhdml0dnlhbWFsaWsnJTJDJTBBJTIwJTIwJTIwJ2lkJyUzQSUyMDE5NzE4ODE4JTJDJTBBJTIwJTIwJTIwJ25vZGVfaWQnJTNBJTIwJ01EUTZWWE5sY2pFNU56RTRPREU0JyUyQyUwQSUyMCUyMCUyMCdhdmF0YXJfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmF2YXRhcnMuZ2l0aHVidXNlcmNvbnRlbnQuY29tJTJGdSUyRjE5NzE4ODE4JTNGdiUzRDQnJTJDJTBBJTIwJTIwJTIwJ2dyYXZhdGFyX2lkJyUzQSUyMCcnJTJDJTBBJTIwJTIwJTIwJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWsnJTJDJTBBJTIwJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnZm9sbG93ZXJzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZmb2xsb3dlcnMnJTJDJTBBJTIwJTIwJTIwJ2ZvbGxvd2luZ191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZm9sbG93aW5nJTdCJTJGb3RoZXJfdXNlciU3RCclMkMlMEElMjAlMjAlMjAnZ2lzdHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmdpc3RzJTdCJTJGZ2lzdF9pZCU3RCclMkMlMEElMjAlMjAlMjAnc3RhcnJlZF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGc3RhcnJlZCU3QiUyRm93bmVyJTdEJTdCJTJGcmVwbyU3RCclMkMlMEElMjAlMjAlMjAnc3Vic2NyaXB0aW9uc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGc3Vic2NyaXB0aW9ucyclMkMlMEElMjAlMjAlMjAnb3JnYW5pemF0aW9uc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGb3JncyclMkMlMEElMjAlMjAlMjAncmVwb3NfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRnJlcG9zJyUyQyUwQSUyMCUyMCUyMCdldmVudHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmV2ZW50cyU3QiUyRnByaXZhY3klN0QnJTJDJTBBJTIwJTIwJTIwJ3JlY2VpdmVkX2V2ZW50c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGcmVjZWl2ZWRfZXZlbnRzJyUyQyUwQSUyMCUyMCUyMCd0eXBlJyUzQSUyMCdVc2VyJyUyQyUwQSUyMCUyMCUyMCdzaXRlX2FkbWluJyUzQSUyMEZhbHNlJTdEJTJDJTBBJTIwJTIwJ2xhYmVscyclM0ElMjAlNUIlNUQlMkMlMEElMjAlMjAnc3RhdGUnJTNBJTIwJ29wZW4nJTJDJTBBJTIwJTIwJ2xvY2tlZCclM0ElMjBGYWxzZSUyQyUwQSUyMCUyMCdhc3NpZ25lZSclM0ElMjBOb25lJTJDJTBBJTIwJTIwJ2Fzc2lnbmVlcyclM0ElMjAlNUIlNUQlMkMlMEElMjAlMjAnbWlsZXN0b25lJyUzQSUyME5vbmUlMkMlMEElMjAlMjAnY29tbWVudHMnJTNBJTIwMSUyQyUwQSUyMCUyMCdjcmVhdGVkX2F0JyUzQSUyMCcyMDIxLTA4LTEyVDExJTNBNDAlM0ExOFonJTJDJTBBJTIwJTIwJ3VwZGF0ZWRfYXQnJTNBJTIwJzIwMjEtMDgtMTJUMTIlM0EzMSUzQTE3WiclMkMlMEElMjAlMjAnY2xvc2VkX2F0JyUzQSUyME5vbmUlMkMlMEElMjAlMjAnYXV0aG9yX2Fzc29jaWF0aW9uJyUzQSUyMCdDT05UUklCVVRPUiclMkMlMEElMjAlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUzQSUyME5vbmUlMkMlMEElMjAlMjAncHVsbF9yZXF1ZXN0JyUzQSUyMCU3Qid1cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZyZXBvcyUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxscyUyRjI3OTInJTJDJTBBJTIwJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTInJTJDJTBBJTIwJTIwJTIwJ2RpZmZfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTIuZGlmZiclMkMlMEElMjAlMjAlMjAncGF0Y2hfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTIucGF0Y2gnJTdEJTJDJTBBJTIwJTIwJ2JvZHknJTNBJTIwJyU1Qkdvb0FRJTVEKGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmFsbGVuYWklMkZnb29hcSklMjBkYXRhc2V0JTIwd2FzJTIwcmVjZW50bHklMjB1cGRhdGVkJTIwYWZ0ZXIlMjBzcGxpdHMlMjB3ZXJlJTIwYWRkZWQlMjBmb3IlMjB0aGUlMjBzYW1lLiUyMFRoaXMlMjBQUiUyMGNvbnRhaW5zJTIwbmV3JTIwdXBkYXRlZCUyMEdvb0FRJTIwd2l0aCUyMHRyYWluJTJGdmFsJTJGdGVzdCUyMHNwbGl0cyUyMGFuZCUyMHVwZGF0ZWQlMjBSRUFETUUlMjBhcyUyMHdlbGwuJyUyQyUwQSUyMCUyMCdwZXJmb3JtZWRfdmlhX2dpdGh1Yl9hcHAnJTNBJTIwTm9uZSU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792&#x27;</span>,
<span class="hljs-string">&#x27;repository_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets&#x27;</span>,
<span class="hljs-string">&#x27;labels_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/labels{/name}&#x27;</span>,
<span class="hljs-string">&#x27;comments_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/comments&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/events&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">968650274</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDExOlB1bGxSZXF1ZXN0NzEwNzUyMjc0&#x27;</span>,
<span class="hljs-string">&#x27;number&#x27;</span>: <span class="hljs-number">2792</span>,
<span class="hljs-string">&#x27;title&#x27;</span>: <span class="hljs-string">&#x27;Update GooAQ&#x27;</span>,
<span class="hljs-string">&#x27;user&#x27;</span>: {<span class="hljs-string">&#x27;login&#x27;</span>: <span class="hljs-string">&#x27;bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">19718818</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDQ6VXNlcjE5NzE4ODE4&#x27;</span>,
<span class="hljs-string">&#x27;avatar_url&#x27;</span>: <span class="hljs-string">&#x27;https://avatars.githubusercontent.com/u/19718818?v=4&#x27;</span>,
<span class="hljs-string">&#x27;gravatar_id&#x27;</span>: <span class="hljs-string">&#x27;&#x27;</span>,
<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;followers_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/followers&#x27;</span>,
<span class="hljs-string">&#x27;following_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/following{/other_user}&#x27;</span>,
<span class="hljs-string">&#x27;gists_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/gists{/gist_id}&#x27;</span>,
<span class="hljs-string">&#x27;starred_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/starred{/owner}{/repo}&#x27;</span>,
<span class="hljs-string">&#x27;subscriptions_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/subscriptions&#x27;</span>,
<span class="hljs-string">&#x27;organizations_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/orgs&#x27;</span>,
<span class="hljs-string">&#x27;repos_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/repos&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/events{/privacy}&#x27;</span>,
<span class="hljs-string">&#x27;received_events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/received_events&#x27;</span>,
<span class="hljs-string">&#x27;type&#x27;</span>: <span class="hljs-string">&#x27;User&#x27;</span>,
<span class="hljs-string">&#x27;site_admin&#x27;</span>: <span class="hljs-literal">False</span>},
<span class="hljs-string">&#x27;labels&#x27;</span>: [],
<span class="hljs-string">&#x27;state&#x27;</span>: <span class="hljs-string">&#x27;open&#x27;</span>,
<span class="hljs-string">&#x27;locked&#x27;</span>: <span class="hljs-literal">False</span>,
<span class="hljs-string">&#x27;assignee&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;assignees&#x27;</span>: [],
<span class="hljs-string">&#x27;milestone&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;comments&#x27;</span>: <span class="hljs-number">1</span>,
<span class="hljs-string">&#x27;created_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T11:40:18Z&#x27;</span>,
<span class="hljs-string">&#x27;updated_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:31:17Z&#x27;</span>,
<span class="hljs-string">&#x27;closed_at&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;author_association&#x27;</span>: <span class="hljs-string">&#x27;CONTRIBUTOR&#x27;</span>,
<span class="hljs-string">&#x27;active_lock_reason&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;pull_request&#x27;</span>: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/2792&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792&#x27;</span>,
<span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792.diff&#x27;</span>,
<span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792.patch&#x27;</span>},
<span class="hljs-string">&#x27;body&#x27;</span>: <span class="hljs-string">&#x27;[GooAQ](https://github.com/allenai/gooaq) dataset was recently updated after splits were added for the same. This PR contains new updated GooAQ with train/val/test splits and updated README as well.&#x27;</span>,
<span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>: <span class="hljs-literal">None</span>}]`,wrap:!1}}),f=new qs({props:{$$slots:{default:[_t]},$$scope:{ctx:j}}}),es=new h({props:{code:"R0lUSFVCX1RPS0VOJTIwJTNEJTIweHh4JTIwJTIwJTIzJTIwQ29weSUyMHlvdXIlMjBHaXRIdWIlMjB0b2tlbiUyMGhlcmUlMEFoZWFkZXJzJTIwJTNEJTIwJTdCJTIyQXV0aG9yaXphdGlvbiUyMiUzQSUyMGYlMjJ0b2tlbiUyMCU3QkdJVEhVQl9UT0tFTiU3RCUyMiU3RA==",highlighted:`GITHUB_TOKEN = xxx <span class="hljs-comment"># Copy your GitHub token here</span>
headers = {<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;token <span class="hljs-subst">{GITHUB_TOKEN}</span>&quot;</span>}`,wrap:!1}}),w=new qs({props:{warning:!0,$$slots:{default:[kt]},$$scope:{ctx:j}}}),ts=new h({props:{code:"aW1wb3J0JTIwdGltZSUwQWltcG9ydCUyMG1hdGglMEFmcm9tJTIwcGF0aGxpYiUyMGltcG9ydCUyMFBhdGglMEFpbXBvcnQlMjBwYW5kYXMlMjBhcyUyMHBkJTBBZnJvbSUyMHRxZG0ubm90ZWJvb2slMjBpbXBvcnQlMjB0cWRtJTBBJTBBJTBBZGVmJTIwZmV0Y2hfaXNzdWVzKCUwQSUyMCUyMCUyMCUyMG93bmVyJTNEJTIyaHVnZ2luZ2ZhY2UlMjIlMkMlMEElMjAlMjAlMjAlMjByZXBvJTNEJTIyZGF0YXNldHMlMjIlMkMlMEElMjAlMjAlMjAlMjBudW1faXNzdWVzJTNEMTBfMDAwJTJDJTBBJTIwJTIwJTIwJTIwcmF0ZV9saW1pdCUzRDVfMDAwJTJDJTBBJTIwJTIwJTIwJTIwaXNzdWVzX3BhdGglM0RQYXRoKCUyMi4lMjIpJTJDJTBBKSUzQSUwQSUyMCUyMCUyMCUyMGlmJTIwbm90JTIwaXNzdWVzX3BhdGguaXNfZGlyKCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpc3N1ZXNfcGF0aC5ta2RpcihleGlzdF9vayUzRFRydWUpJTBBJTBBJTIwJTIwJTIwJTIwYmF0Y2glMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjBhbGxfaXNzdWVzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwcGVyX3BhZ2UlMjAlM0QlMjAxMDAlMjAlMjAlMjMlMjBOdW1iZXIlMjBvZiUyMGlzc3VlcyUyMHRvJTIwcmV0dXJuJTIwcGVyJTIwcGFnZSUwQSUyMCUyMCUyMCUyMG51bV9wYWdlcyUyMCUzRCUyMG1hdGguY2VpbChudW1faXNzdWVzJTIwJTJGJTIwcGVyX3BhZ2UpJTBBJTIwJTIwJTIwJTIwYmFzZV91cmwlMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMjIlMEElMEElMjAlMjAlMjAlMjBmb3IlMjBwYWdlJTIwaW4lMjB0cWRtKHJhbmdlKG51bV9wYWdlcykpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwUXVlcnklMjB3aXRoJTIwc3RhdGUlM0RhbGwlMjB0byUyMGdldCUyMGJvdGglMjBvcGVuJTIwYW5kJTIwY2xvc2VkJTIwaXNzdWVzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcXVlcnklMjAlM0QlMjBmJTIyaXNzdWVzJTNGcGFnZSUzRCU3QnBhZ2UlN0QlMjZwZXJfcGFnZSUzRCU3QnBlcl9wYWdlJTdEJTI2c3RhdGUlM0RhbGwlMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpc3N1ZXMlMjAlM0QlMjByZXF1ZXN0cy5nZXQoZiUyMiU3QmJhc2VfdXJsJTdEJTJGJTdCb3duZXIlN0QlMkYlN0JyZXBvJTdEJTJGJTdCcXVlcnklN0QlMjIlMkMlMjBoZWFkZXJzJTNEaGVhZGVycyklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBiYXRjaC5leHRlbmQoaXNzdWVzLmpzb24oKSklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMGxlbihiYXRjaCklMjAlM0UlMjByYXRlX2xpbWl0JTIwYW5kJTIwbGVuKGFsbF9pc3N1ZXMpJTIwJTNDJTIwbnVtX2lzc3VlcyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFsbF9pc3N1ZXMuZXh0ZW5kKGJhdGNoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGJhdGNoJTIwJTNEJTIwJTVCJTVEJTIwJTIwJTIzJTIwRmx1c2glMjBiYXRjaCUyMGZvciUyMG5leHQlMjB0aW1lJTIwcGVyaW9kJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlJlYWNoZWQlMjBHaXRIdWIlMjByYXRlJTIwbGltaXQuJTIwU2xlZXBpbmclMjBmb3IlMjBvbmUlMjBob3VyJTIwLi4uJTIyKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRpbWUuc2xlZXAoNjAlMjAqJTIwNjAlMjAlMkIlMjAxKSUwQSUwQSUyMCUyMCUyMCUyMGFsbF9pc3N1ZXMuZXh0ZW5kKGJhdGNoKSUwQSUyMCUyMCUyMCUyMGRmJTIwJTNEJTIwcGQuRGF0YUZyYW1lLmZyb21fcmVjb3JkcyhhbGxfaXNzdWVzKSUwQSUyMCUyMCUyMCUyMGRmLnRvX2pzb24oZiUyMiU3Qmlzc3Vlc19wYXRoJTdEJTJGJTdCcmVwbyU3RC1pc3N1ZXMuanNvbmwlMjIlMkMlMjBvcmllbnQlM0QlMjJyZWNvcmRzJTIyJTJDJTIwbGluZXMlM0RUcnVlKSUwQSUyMCUyMCUyMCUyMHByaW50KCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGYlMjJEb3dubG9hZGVkJTIwYWxsJTIwdGhlJTIwaXNzdWVzJTIwZm9yJTIwJTdCcmVwbyU3RCElMjBEYXRhc2V0JTIwc3RvcmVkJTIwYXQlMjAlN0Jpc3N1ZXNfcGF0aCU3RCUyRiU3QnJlcG8lN0QtaXNzdWVzLmpzb25sJTIyJTBBJTIwJTIwJTIwJTIwKQ==",highlighted:`<span class="hljs-keyword">import</span> time
<span class="hljs-keyword">import</span> math
<span class="hljs-keyword">from</span> pathlib <span class="hljs-keyword">import</span> Path
<span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd
<span class="hljs-keyword">from</span> tqdm.notebook <span class="hljs-keyword">import</span> tqdm
<span class="hljs-keyword">def</span> <span class="hljs-title function_">fetch_issues</span>(<span class="hljs-params">
owner=<span class="hljs-string">&quot;huggingface&quot;</span>,
repo=<span class="hljs-string">&quot;datasets&quot;</span>,
num_issues=<span class="hljs-number">10_000</span>,
rate_limit=<span class="hljs-number">5_000</span>,
issues_path=Path(<span class="hljs-params"><span class="hljs-string">&quot;.&quot;</span></span>),
</span>):
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> issues_path.is_dir():
issues_path.mkdir(exist_ok=<span class="hljs-literal">True</span>)
batch = []
all_issues = []
per_page = <span class="hljs-number">100</span> <span class="hljs-comment"># Number of issues to return per page</span>
num_pages = math.ceil(num_issues / per_page)
base_url = <span class="hljs-string">&quot;https://api.github.com/repos&quot;</span>
<span class="hljs-keyword">for</span> page <span class="hljs-keyword">in</span> tqdm(<span class="hljs-built_in">range</span>(num_pages)):
<span class="hljs-comment"># Query with state=all to get both open and closed issues</span>
query = <span class="hljs-string">f&quot;issues?page=<span class="hljs-subst">{page}</span>&amp;per_page=<span class="hljs-subst">{per_page}</span>&amp;state=all&quot;</span>
issues = requests.get(<span class="hljs-string">f&quot;<span class="hljs-subst">{base_url}</span>/<span class="hljs-subst">{owner}</span>/<span class="hljs-subst">{repo}</span>/<span class="hljs-subst">{query}</span>&quot;</span>, headers=headers)
batch.extend(issues.json())
<span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(batch) &gt; rate_limit <span class="hljs-keyword">and</span> <span class="hljs-built_in">len</span>(all_issues) &lt; num_issues:
all_issues.extend(batch)
batch = [] <span class="hljs-comment"># Flush batch for next time period</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Reached GitHub rate limit. Sleeping for one hour ...&quot;</span>)
time.sleep(<span class="hljs-number">60</span> * <span class="hljs-number">60</span> + <span class="hljs-number">1</span>)
all_issues.extend(batch)
df = pd.DataFrame.from_records(all_issues)
df.to_json(<span class="hljs-string">f&quot;<span class="hljs-subst">{issues_path}</span>/<span class="hljs-subst">{repo}</span>-issues.jsonl&quot;</span>, orient=<span class="hljs-string">&quot;records&quot;</span>, lines=<span class="hljs-literal">True</span>)
<span class="hljs-built_in">print</span>(
<span class="hljs-string">f&quot;Downloaded all the issues for <span class="hljs-subst">{repo}</span>! Dataset stored at <span class="hljs-subst">{issues_path}</span>/<span class="hljs-subst">{repo}</span>-issues.jsonl&quot;</span>
)`,wrap:!1}}),ns=new h({props:{code:"JTIzJTIwJUMzJThFbiUyMGRlcGVuZGVuJUM4JTlCJUM0JTgzJTIwZGUlMjBjb25leGl1bmVhJTIwdGElMjBsYSUyMGludGVybmV0JTJDJTIwYWNlc3QlMjBsdWNydSUyMHBvYXRlJTIwZHVyYSUyMGMlQzMlQTJ0ZXZhJTIwbWludXRlLi4uJTBBZmV0Y2hfaXNzdWVzKCk=",highlighted:`<span class="hljs-comment"># În dependență de conexiunea ta la internet, acest lucru poate dura câteva minute...</span>
fetch_issues()`,wrap:!1}}),ps=new h({props:{code:"aXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyanNvbiUyMiUyQyUyMGRhdGFfZmlsZXMlM0QlMjJkYXRhc2V0cy1pc3N1ZXMuanNvbmwlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQWlzc3Vlc19kYXRhc2V0",highlighted:`issues_dataset = load_dataset(<span class="hljs-string">&quot;json&quot;</span>, data_files=<span class="hljs-string">&quot;datasets-issues.jsonl&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
issues_dataset`,wrap:!1}}),cs=new h({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3RpbWVsaW5lX3VybCclMkMlMjAncGVyZm9ybWVkX3ZpYV9naXRodWJfYXBwJyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMzAxOSUwQSU3RCk=",highlighted:`Dataset({
features: [<span class="hljs-string">&#x27;url&#x27;</span>, <span class="hljs-string">&#x27;repository_url&#x27;</span>, <span class="hljs-string">&#x27;labels_url&#x27;</span>, <span class="hljs-string">&#x27;comments_url&#x27;</span>, <span class="hljs-string">&#x27;events_url&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>, <span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;node_id&#x27;</span>, <span class="hljs-string">&#x27;number&#x27;</span>, <span class="hljs-string">&#x27;title&#x27;</span>, <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;labels&#x27;</span>, <span class="hljs-string">&#x27;state&#x27;</span>, <span class="hljs-string">&#x27;locked&#x27;</span>, <span class="hljs-string">&#x27;assignee&#x27;</span>, <span class="hljs-string">&#x27;assignees&#x27;</span>, <span class="hljs-string">&#x27;milestone&#x27;</span>, <span class="hljs-string">&#x27;comments&#x27;</span>, <span class="hljs-string">&#x27;created_at&#x27;</span>, <span class="hljs-string">&#x27;updated_at&#x27;</span>, <span class="hljs-string">&#x27;closed_at&#x27;</span>, <span class="hljs-string">&#x27;author_association&#x27;</span>, <span class="hljs-string">&#x27;active_lock_reason&#x27;</span>, <span class="hljs-string">&#x27;pull_request&#x27;</span>, <span class="hljs-string">&#x27;body&#x27;</span>, <span class="hljs-string">&#x27;timeline_url&#x27;</span>, <span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>],
num_rows: <span class="hljs-number">3019</span>
})`,wrap:!1}}),ds=new Ps({props:{title:"Curățarea datelor",local:"cleaning-up-the-data",headingTag:"h2"}}),ms=new h({props:{code:"c2FtcGxlJTIwJTNEJTIwaXNzdWVzX2RhdGFzZXQuc2h1ZmZsZShzZWVkJTNENjY2KS5zZWxlY3QocmFuZ2UoMykpJTBBJTBBJTIzJTIwUHJpbnQlMjBvdXQlMjB0aGUlMjBVUkwlMjBhbmQlMjBwdWxsJTIwcmVxdWVzdCUyMGVudHJpZXMlMEFmb3IlMjB1cmwlMkMlMjBwciUyMGluJTIwemlwKHNhbXBsZSU1QiUyMmh0bWxfdXJsJTIyJTVEJTJDJTIwc2FtcGxlJTVCJTIycHVsbF9yZXF1ZXN0JTIyJTVEKSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjIlM0UlM0UlMjBVUkwlM0ElMjAlN0J1cmwlN0QlMjIpJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiUzRSUzRSUyMFB1bGwlMjByZXF1ZXN0JTNBJTIwJTdCcHIlN0QlNUNuJTIyKQ==",highlighted:`sample = issues_dataset.shuffle(seed=<span class="hljs-number">666</span>).select(<span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>))
<span class="hljs-comment"># Print out the URL and pull request entries</span>
<span class="hljs-keyword">for</span> url, pr <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(sample[<span class="hljs-string">&quot;html_url&quot;</span>], sample[<span class="hljs-string">&quot;pull_request&quot;</span>]):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&gt;&gt; URL: <span class="hljs-subst">{url}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&gt;&gt; Pull request: <span class="hljs-subst">{pr}</span>\\n&quot;</span>)`,wrap:!1}}),os=new h({props:{code:"JTNFJTNFJTIwVVJMJTNBJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY4NTAlMEElM0UlM0UlMjBQdWxsJTIwcmVxdWVzdCUzQSUyMCU3Qid1cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZyZXBvcyUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxscyUyRjg1MCclMkMlMjAnaHRtbF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGODUwJyUyQyUyMCdkaWZmX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY4NTAuZGlmZiclMkMlMjAncGF0Y2hfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjg1MC5wYXRjaCclN0QlMEElMEElM0UlM0UlMjBVUkwlM0ElMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc3MyUwQSUzRSUzRSUyMFB1bGwlMjByZXF1ZXN0JTNBJTIwTm9uZSUwQSUwQSUzRSUzRSUyMFVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGNzgzJTBBJTNFJTNFJTIwUHVsbCUyMHJlcXVlc3QlM0ElMjAlN0IndXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbHMlMkY3ODMnJTJDJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjc4MyclMkMlMjAnZGlmZl91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGNzgzLmRpZmYnJTJDJTIwJ3BhdGNoX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY3ODMucGF0Y2gnJTdE",highlighted:`&gt;&gt; URL: https://github.com/huggingface/datasets/pull/<span class="hljs-number">850</span>
&gt;&gt; Pull request: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/850&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850&#x27;</span>, <span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850.diff&#x27;</span>, <span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850.patch&#x27;</span>}
&gt;&gt; URL: https://github.com/huggingface/datasets/issues/<span class="hljs-number">2773</span>
&gt;&gt; Pull request: <span class="hljs-literal">None</span>
&gt;&gt; URL: https://github.com/huggingface/datasets/pull/<span class="hljs-number">783</span>
&gt;&gt; Pull request: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/783&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783&#x27;</span>, <span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783.diff&#x27;</span>, <span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783.patch&#x27;</span>}`,wrap:!1}}),hs=new h({props:{code:"aXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmlzX3B1bGxfcmVxdWVzdCUyMiUzQSUyMEZhbHNlJTIwaWYlMjB4JTVCJTIycHVsbF9yZXF1ZXN0JTIyJTVEJTIwaXMlMjBOb25lJTIwZWxzZSUyMFRydWUlN0QlMEEp",highlighted:`issues_dataset = issues_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">&quot;is_pull_request&quot;</span>: <span class="hljs-literal">False</span> <span class="hljs-keyword">if</span> x[<span class="hljs-string">&quot;pull_request&quot;</span>] <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> <span class="hljs-keyword">else</span> <span class="hljs-literal">True</span>}
)`,wrap:!1}}),x=new qs({props:{$$slots:{default:[vt]},$$scope:{ctx:j}}}),js=new Ps({props:{title:"Îmbunătățirea datasetului",local:"augmenting-the-dataset",headingTag:"h2"}}),fs=new h({props:{code:"aXNzdWVfbnVtYmVyJTIwJTNEJTIwMjc5MiUwQXVybCUyMCUzRCUyMGYlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGJTdCaXNzdWVfbnVtYmVyJTdEJTJGY29tbWVudHMlMjIlMEFyZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLmdldCh1cmwlMkMlMjBoZWFkZXJzJTNEaGVhZGVycyklMEFyZXNwb25zZS5qc29uKCk=",highlighted:`issue_number = <span class="hljs-number">2792</span>
url = <span class="hljs-string">f&quot;https://api.github.com/repos/huggingface/datasets/issues/<span class="hljs-subst">{issue_number}</span>/comments&quot;</span>
response = requests.get(url, headers=headers)
response.json()`,wrap:!1}}),ws=new h({props:{code:"JTVCJTdCJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRmNvbW1lbnRzJTJGODk3NTk0MTI4JyUyQyUwQSUyMCUyMCdodG1sX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkYyNzkyJTIzaXNzdWVjb21tZW50LTg5NzU5NDEyOCclMkMlMEElMjAlMjAnaXNzdWVfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc5MiclMkMlMEElMjAlMjAnaWQnJTNBJTIwODk3NTk0MTI4JTJDJTBBJTIwJTIwJ25vZGVfaWQnJTNBJTIwJ0lDX2t3RE9EdW56cHM0MWdETVEnJTJDJTBBJTIwJTIwJ3VzZXInJTNBJTIwJTdCJ2xvZ2luJyUzQSUyMCdiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnaWQnJTNBJTIwMTk3MTg4MTglMkMlMEElMjAlMjAlMjAnbm9kZV9pZCclM0ElMjAnTURRNlZYTmxjakU1TnpFNE9ERTQnJTJDJTBBJTIwJTIwJTIwJ2F2YXRhcl91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXZhdGFycy5naXRodWJ1c2VyY29udGVudC5jb20lMkZ1JTJGMTk3MTg4MTglM0Z2JTNENCclMkMlMEElMjAlMjAlMjAnZ3JhdmF0YXJfaWQnJTNBJTIwJyclMkMlMEElMjAlMjAlMjAndXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnaHRtbF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmJoYXZpdHZ5YW1hbGlrJyUyQyUwQSUyMCUyMCUyMCdmb2xsb3dlcnNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmZvbGxvd2VycyclMkMlMEElMjAlMjAlMjAnZm9sbG93aW5nX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZmb2xsb3dpbmclN0IlMkZvdGhlcl91c2VyJTdEJyUyQyUwQSUyMCUyMCUyMCdnaXN0c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZ2lzdHMlN0IlMkZnaXN0X2lkJTdEJyUyQyUwQSUyMCUyMCUyMCdzdGFycmVkX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZzdGFycmVkJTdCJTJGb3duZXIlN0QlN0IlMkZyZXBvJTdEJyUyQyUwQSUyMCUyMCUyMCdzdWJzY3JpcHRpb25zX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZzdWJzY3JpcHRpb25zJyUyQyUwQSUyMCUyMCUyMCdvcmdhbml6YXRpb25zX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZvcmdzJyUyQyUwQSUyMCUyMCUyMCdyZXBvc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGcmVwb3MnJTJDJTBBJTIwJTIwJTIwJ2V2ZW50c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZXZlbnRzJTdCJTJGcHJpdmFjeSU3RCclMkMlMEElMjAlMjAlMjAncmVjZWl2ZWRfZXZlbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZyZWNlaXZlZF9ldmVudHMnJTJDJTBBJTIwJTIwJTIwJ3R5cGUnJTNBJTIwJ1VzZXInJTJDJTBBJTIwJTIwJTIwJ3NpdGVfYWRtaW4nJTNBJTIwRmFsc2UlN0QlMkMlMEElMjAlMjAnY3JlYXRlZF9hdCclM0ElMjAnMjAyMS0wOC0xMlQxMiUzQTIxJTNBNTJaJyUyQyUwQSUyMCUyMCd1cGRhdGVkX2F0JyUzQSUyMCcyMDIxLTA4LTEyVDEyJTNBMzElM0ExN1onJTJDJTBBJTIwJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclM0ElMjAnQ09OVFJJQlVUT1InJTJDJTBBJTIwJTIwJ2JvZHknJTNBJTIwJTIyJTQwYWxiZXJ0dmlsbGFub3ZhJTIwbXklMjB0ZXN0cyUyMGFyZSUyMGZhaWxpbmclMjBoZXJlJTNBJTVDciU1Q24lNjAlNjAlNjAlNUNyJTVDbmRhdGFzZXRfbmFtZSUyMCUzRCUyMCdnb29hcSclNUNyJTVDbiU1Q3IlNUNuJTIwJTIwJTIwJTIwZGVmJTIwdGVzdF9sb2FkX2RhdGFzZXQoc2VsZiUyQyUyMGRhdGFzZXRfbmFtZSklM0ElNUNyJTVDbiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvbmZpZ3MlMjAlM0QlMjBzZWxmLmRhdGFzZXRfdGVzdGVyLmxvYWRfYWxsX2NvbmZpZ3MoZGF0YXNldF9uYW1lJTJDJTIwaXNfbG9jYWwlM0RUcnVlKSU1QiUzQTElNUQlNUNyJTVDbiUzRSUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGF0YXNldF90ZXN0ZXIuY2hlY2tfbG9hZF9kYXRhc2V0KGRhdGFzZXRfbmFtZSUyQyUyMGNvbmZpZ3MlMkMlMjBpc19sb2NhbCUzRFRydWUlMkMlMjB1c2VfbG9jYWxfZHVtbXlfZGF0YSUzRFRydWUpJTVDciU1Q24lNUNyJTVDbnRlc3RzJTJGdGVzdF9kYXRhc2V0X2NvbW1vbi5weSUzQTIzNCUzQSUyMCU1Q3IlNUNuXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMCU1Q3IlNUNudGVzdHMlMkZ0ZXN0X2RhdGFzZXRfY29tbW9uLnB5JTNBMTg3JTNBJTIwaW4lMjBjaGVja19sb2FkX2RhdGFzZXQlNUNyJTVDbiUyMCUyMCUyMCUyMHNlbGYucGFyZW50LmFzc2VydFRydWUobGVuKGRhdGFzZXQlNUJzcGxpdCU1RCklMjAlM0UlMjAwKSU1Q3IlNUNuRSUyMCUyMCUyMEFzc2VydGlvbkVycm9yJTNBJTIwRmFsc2UlMjBpcyUyMG5vdCUyMHRydWUlNUNyJTVDbiU2MCU2MCU2MCU1Q3IlNUNuV2hlbiUyMEklMjB0cnklMjBsb2FkaW5nJTIwZGF0YXNldCUyMG9uJTIwbG9jYWwlMjBtYWNoaW5lJTIwaXQlMjB3b3JrcyUyMGZpbmUuJTIwQW55JTIwc3VnZ2VzdGlvbnMlMjBvbiUyMGhvdyUyMGNhbiUyMEklMjBhdm9pZCUyMHRoaXMlMjBlcnJvciUzRiUyMiUyQyUwQSUyMCUyMCdwZXJmb3JtZWRfdmlhX2dpdGh1Yl9hcHAnJTNBJTIwTm9uZSU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/comments/897594128&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792#issuecomment-897594128&#x27;</span>,
<span class="hljs-string">&#x27;issue_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">897594128</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;IC_kwDODunzps41gDMQ&#x27;</span>,
<span class="hljs-string">&#x27;user&#x27;</span>: {<span class="hljs-string">&#x27;login&#x27;</span>: <span class="hljs-string">&#x27;bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">19718818</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDQ6VXNlcjE5NzE4ODE4&#x27;</span>,
<span class="hljs-string">&#x27;avatar_url&#x27;</span>: <span class="hljs-string">&#x27;https://avatars.githubusercontent.com/u/19718818?v=4&#x27;</span>,
<span class="hljs-string">&#x27;gravatar_id&#x27;</span>: <span class="hljs-string">&#x27;&#x27;</span>,
<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;followers_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/followers&#x27;</span>,
<span class="hljs-string">&#x27;following_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/following{/other_user}&#x27;</span>,
<span class="hljs-string">&#x27;gists_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/gists{/gist_id}&#x27;</span>,
<span class="hljs-string">&#x27;starred_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/starred{/owner}{/repo}&#x27;</span>,
<span class="hljs-string">&#x27;subscriptions_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/subscriptions&#x27;</span>,
<span class="hljs-string">&#x27;organizations_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/orgs&#x27;</span>,
<span class="hljs-string">&#x27;repos_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/repos&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/events{/privacy}&#x27;</span>,
<span class="hljs-string">&#x27;received_events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/received_events&#x27;</span>,
<span class="hljs-string">&#x27;type&#x27;</span>: <span class="hljs-string">&#x27;User&#x27;</span>,
<span class="hljs-string">&#x27;site_admin&#x27;</span>: <span class="hljs-literal">False</span>},
<span class="hljs-string">&#x27;created_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:21:52Z&#x27;</span>,
<span class="hljs-string">&#x27;updated_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:31:17Z&#x27;</span>,
<span class="hljs-string">&#x27;author_association&#x27;</span>: <span class="hljs-string">&#x27;CONTRIBUTOR&#x27;</span>,
<span class="hljs-string">&#x27;body&#x27;</span>: <span class="hljs-string">&quot;@albertvillanova my tests are failing here:\\r\\n\`\`\`\\r\\ndataset_name = &#x27;gooaq&#x27;\\r\\n\\r\\n def test_load_dataset(self, dataset_name):\\r\\n configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True)[:1]\\r\\n&gt; self.dataset_tester.check_load_dataset(dataset_name, configs, is_local=True, use_local_dummy_data=True)\\r\\n\\r\\ntests/test_dataset_common.py:234: \\r\\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \\r\\ntests/test_dataset_common.py:187: in check_load_dataset\\r\\n self.parent.assertTrue(len(dataset[split]) &gt; 0)\\r\\nE AssertionError: False is not true\\r\\n\`\`\`\\r\\nWhen I try loading dataset on local machine it works fine. Any suggestions on how can I avoid this error?&quot;</span>,
<span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>: <span class="hljs-literal">None</span>}]`,wrap:!1}}),Zs=new h({props:{code:"ZGVmJTIwZ2V0X2NvbW1lbnRzKGlzc3VlX251bWJlciklM0ElMEElMjAlMjAlMjAlMjB1cmwlMjAlM0QlMjBmJTIyaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRiU3Qmlzc3VlX251bWJlciU3RCUyRmNvbW1lbnRzJTIyJTBBJTIwJTIwJTIwJTIwcmVzcG9uc2UlMjAlM0QlMjByZXF1ZXN0cy5nZXQodXJsJTJDJTIwaGVhZGVycyUzRGhlYWRlcnMpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTVCciU1QiUyMmJvZHklMjIlNUQlMjBmb3IlMjByJTIwaW4lMjByZXNwb25zZS5qc29uKCklNUQlMEElMEElMEElMjMlMjBUZXN0JUM0JTgzbSUyMGRhYyVDNCU4MyUyMGZ1bmMlQzglOUJpYSUyMGx1Y3JlYXolQzQlODMlMjBjdW0lMjBuZSUyMGRvcmltJTBBZ2V0X2NvbW1lbnRzKDI3OTIp",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_comments</span>(<span class="hljs-params">issue_number</span>):
url = <span class="hljs-string">f&quot;https://api.github.com/repos/huggingface/datasets/issues/<span class="hljs-subst">{issue_number}</span>/comments&quot;</span>
response = requests.get(url, headers=headers)
<span class="hljs-keyword">return</span> [r[<span class="hljs-string">&quot;body&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> response.json()]
<span class="hljs-comment"># Testăm dacă funcția lucrează cum ne dorim</span>
get_comments(<span class="hljs-number">2792</span>)`,wrap:!1}}),Gs=new h({props:{code:"JTVCJTIyJTQwYWxiZXJ0dmlsbGFub3ZhJTIwbXklMjB0ZXN0cyUyMGFyZSUyMGZhaWxpbmclMjBoZXJlJTNBJTVDciU1Q24lNjAlNjAlNjAlNUNyJTVDbmRhdGFzZXRfbmFtZSUyMCUzRCUyMCdnb29hcSclNUNyJTVDbiU1Q3IlNUNuJTIwJTIwJTIwJTIwZGVmJTIwdGVzdF9sb2FkX2RhdGFzZXQoc2VsZiUyQyUyMGRhdGFzZXRfbmFtZSklM0ElNUNyJTVDbiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvbmZpZ3MlMjAlM0QlMjBzZWxmLmRhdGFzZXRfdGVzdGVyLmxvYWRfYWxsX2NvbmZpZ3MoZGF0YXNldF9uYW1lJTJDJTIwaXNfbG9jYWwlM0RUcnVlKSU1QiUzQTElNUQlNUNyJTVDbiUzRSUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGF0YXNldF90ZXN0ZXIuY2hlY2tfbG9hZF9kYXRhc2V0KGRhdGFzZXRfbmFtZSUyQyUyMGNvbmZpZ3MlMkMlMjBpc19sb2NhbCUzRFRydWUlMkMlMjB1c2VfbG9jYWxfZHVtbXlfZGF0YSUzRFRydWUpJTVDciU1Q24lNUNyJTVDbnRlc3RzJTJGdGVzdF9kYXRhc2V0X2NvbW1vbi5weSUzQTIzNCUzQSUyMCU1Q3IlNUNuXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMCU1Q3IlNUNudGVzdHMlMkZ0ZXN0X2RhdGFzZXRfY29tbW9uLnB5JTNBMTg3JTNBJTIwaW4lMjBjaGVja19sb2FkX2RhdGFzZXQlNUNyJTVDbiUyMCUyMCUyMCUyMHNlbGYucGFyZW50LmFzc2VydFRydWUobGVuKGRhdGFzZXQlNUJzcGxpdCU1RCklMjAlM0UlMjAwKSU1Q3IlNUNuRSUyMCUyMCUyMEFzc2VydGlvbkVycm9yJTNBJTIwRmFsc2UlMjBpcyUyMG5vdCUyMHRydWUlNUNyJTVDbiU2MCU2MCU2MCU1Q3IlNUNuV2hlbiUyMEklMjB0cnklMjBsb2FkaW5nJTIwZGF0YXNldCUyMG9uJTIwbG9jYWwlMjBtYWNoaW5lJTIwaXQlMjB3b3JrcyUyMGZpbmUuJTIwQW55JTIwc3VnZ2VzdGlvbnMlMjBvbiUyMGhvdyUyMGNhbiUyMEklMjBhdm9pZCUyMHRoaXMlMjBlcnJvciUzRiUyMiU1RA==",highlighted:'[<span class="hljs-string">&quot;@albertvillanova my tests are failing here:\\r\\n```\\r\\ndataset_name = &#x27;gooaq&#x27;\\r\\n\\r\\n def test_load_dataset(self, dataset_name):\\r\\n configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True)[:1]\\r\\n&gt; self.dataset_tester.check_load_dataset(dataset_name, configs, is_local=True, use_local_dummy_data=True)\\r\\n\\r\\ntests/test_dataset_common.py:234: \\r\\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \\r\\ntests/test_dataset_common.py:187: in check_load_dataset\\r\\n self.parent.assertTrue(len(dataset[split]) &gt; 0)\\r\\nE AssertionError: False is not true\\r\\n```\\r\\nWhen I try loading dataset on local machine it works fine. Any suggestions on how can I avoid this error?&quot;</span>]',wrap:!1}}),Rs=new h({props:{code:"JTIzJTIwRGVwZW5kaW5nJTIwb24lMjB5b3VyJTIwaW50ZXJuZXQlMjBjb25uZWN0aW9uJTJDJTIwdGhpcyUyMGNhbiUyMHRha2UlMjBhJTIwZmV3JTIwbWludXRlcy4uLiUwQWlzc3Vlc193aXRoX2NvbW1lbnRzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmNvbW1lbnRzJTIyJTNBJTIwZ2V0X2NvbW1lbnRzKHglNUIlMjJudW1iZXIlMjIlNUQpJTdEJTBBKQ==",highlighted:`<span class="hljs-comment"># Depending on your internet connection, this can take a few minutes...</span>
issues_with_comments_dataset = issues_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">&quot;comments&quot;</span>: get_comments(x[<span class="hljs-string">&quot;number&quot;</span>])}
)`,wrap:!1}}),Xs=new Ps({props:{title:"Încărcarea datasetului pe Hugging Face Hub",local:"uploading-the-dataset-to-the-hugging-face-hub",headingTag:"h2"}}),zs=new Xt({props:{id:"HaN6qCr_Afc"}}),_s=new h({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMG5vdGVib29rX2xvZ2luJTBBJTBBbm90ZWJvb2tfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
notebook_login()`,wrap:!1}}),vs=new h({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:"huggingface-cli login",wrap:!1}}),Bs=new h({props:{code:"aXNzdWVzX3dpdGhfY29tbWVudHNfZGF0YXNldC5wdXNoX3RvX2h1YiglMjJnaXRodWItaXNzdWVzJTIyKQ==",highlighted:'issues_with_comments_dataset.push_to_hub(<span class="hljs-string">&quot;github-issues&quot;</span>)',wrap:!1}}),Ws=new h({props:{code:"cmVtb3RlX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIybGV3dHVuJTJGZ2l0aHViLWlzc3VlcyUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIpJTBBcmVtb3RlX2RhdGFzZXQ=",highlighted:`remote_dataset = load_dataset(<span class="hljs-string">&quot;lewtun/github-issues&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
remote_dataset`,wrap:!1}}),Hs=new h({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3BlcmZvcm1lZF92aWFfZ2l0aHViX2FwcCclMkMlMjAnaXNfcHVsbF9yZXF1ZXN0JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjg1NSUwQSU3RCk=",highlighted:`Dataset({
features: [<span class="hljs-string">&#x27;url&#x27;</span>, <span class="hljs-string">&#x27;repository_url&#x27;</span>, <span class="hljs-string">&#x27;labels_url&#x27;</span>, <span class="hljs-string">&#x27;comments_url&#x27;</span>, <span class="hljs-string">&#x27;events_url&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>, <span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;node_id&#x27;</span>, <span class="hljs-string">&#x27;number&#x27;</span>, <span class="hljs-string">&#x27;title&#x27;</span>, <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;labels&#x27;</span>, <span class="hljs-string">&#x27;state&#x27;</span>, <span class="hljs-string">&#x27;locked&#x27;</span>, <span class="hljs-string">&#x27;assignee&#x27;</span>, <span class="hljs-string">&#x27;assignees&#x27;</span>, <span class="hljs-string">&#x27;milestone&#x27;</span>, <span class="hljs-string">&#x27;comments&#x27;</span>, <span class="hljs-string">&#x27;created_at&#x27;</span>, <span class="hljs-string">&#x27;updated_at&#x27;</span>, <span class="hljs-string">&#x27;closed_at&#x27;</span>, <span class="hljs-string">&#x27;author_association&#x27;</span>, <span class="hljs-string">&#x27;active_lock_reason&#x27;</span>, <span class="hljs-string">&#x27;pull_request&#x27;</span>, <span class="hljs-string">&#x27;body&#x27;</span>, <span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>, <span class="hljs-string">&#x27;is_pull_request&#x27;</span>],
num_rows: <span class="hljs-number">2855</span>
})`,wrap:!1}}),G=new qs({props:{$$slots:{default:[Vt]},$$scope:{ctx:j}}}),Fs=new Ps({props:{title:"Crearea unei dataset card",local:"creating-a-dataset-card",headingTag:"h2"}}),X=new qs({props:{$$slots:{default:[Bt]},$$scope:{ctx:j}}}),z=new qs({props:{$$slots:{default:[$t]},$$scope:{ctx:j}}}),Ls=new Nt({props:{source:"https://github.com/huggingface/course/blob/main/chapters/rum/chapter5/5.mdx"}}),{c(){i=p("meta"),T=a(),u=p("p"),U=a(),M(_.$$.fragment),se=a(),M(k.$$.fragment),ee=a(),v=p("p"),v.innerHTML=Rl,le=a(),V=p("ul"),V.innerHTML=Cl,te=a(),B=p("p"),B.textContent=Xl,ae=a(),M($.$$.fragment),ne=a(),W=p("p"),W.innerHTML=zl,ie=a(),g=p("div"),g.innerHTML=Nl,pe=a(),H=p("p"),H.textContent=_l,ce=a(),b=p("div"),b.innerHTML=kl,re=a(),Q=p("p"),Q.innerHTML=vl,ue=a(),F=p("p"),F.innerHTML=Vl,Me=a(),M(Y.$$.fragment),de=a(),E=p("p"),E.innerHTML=Bl,Je=a(),M(A.$$.fragment),me=a(),S=p("p"),S.innerHTML=$l,oe=a(),M(D.$$.fragment),ye=a(),M(L.$$.fragment),he=a(),q=p("p"),q.innerHTML=Wl,Te=a(),M(P.$$.fragment),Ue=a(),M(O.$$.fragment),je=a(),K=p("p"),K.innerHTML=Hl,ge=a(),M(f.$$.fragment),be=a(),ss=p("p"),ss.innerHTML=Ql,fe=a(),M(es.$$.fragment),we=a(),M(w.$$.fragment),xe=a(),ls=p("p"),ls.textContent=Fl,Ze=a(),M(ts.$$.fragment),Ge=a(),as=p("p"),as.innerHTML=Yl,Ie=a(),M(ns.$$.fragment),Re=a(),is=p("p"),is.innerHTML=El,Ce=a(),M(ps.$$.fragment),Xe=a(),M(cs.$$.fragment),ze=a(),rs=p("p"),rs.innerHTML=Al,Ne=a(),us=p("blockquote"),us.innerHTML=Sl,_e=a(),Ms=p("p"),Ms.textContent=Dl,ke=a(),M(ds.$$.fragment),ve=a(),Js=p("p"),Js.innerHTML=Ll,Ve=a(),M(ms.$$.fragment),Be=a(),M(os.$$.fragment),$e=a(),ys=p("p"),ys.innerHTML=ql,We=a(),M(hs.$$.fragment),He=a(),M(x.$$.fragment),Qe=a(),Ts=p("p"),Ts.textContent=Pl,Fe=a(),Us=p("p"),Us.textContent=Ol,Ye=a(),M(js.$$.fragment),Ee=a(),gs=p("p"),gs.textContent=Kl,Ae=a(),Z=p("div"),Z.innerHTML=st,Se=a(),bs=p("p"),bs.innerHTML=et,De=a(),M(fs.$$.fragment),Le=a(),M(ws.$$.fragment),qe=a(),xs=p("p"),xs.innerHTML=lt,Pe=a(),M(Zs.$$.fragment),Oe=a(),M(Gs.$$.fragment),Ke=a(),Is=p("p"),Is.innerHTML=tt,sl=a(),M(Rs.$$.fragment),el=a(),Cs=p("p"),Cs.textContent=at,ll=a(),M(Xs.$$.fragment),tl=a(),M(zs.$$.fragment),al=a(),Ns=p("p"),Ns.innerHTML=nt,nl=a(),M(_s.$$.fragment),il=a(),ks=p("p"),ks.innerHTML=it,pl=a(),M(vs.$$.fragment),cl=a(),Vs=p("p"),Vs.textContent=pt,rl=a(),M(Bs.$$.fragment),ul=a(),$s=p("p"),$s.innerHTML=ct,Ml=a(),M(Ws.$$.fragment),dl=a(),M(Hs.$$.fragment),Jl=a(),Qs=p("p"),Qs.innerHTML=rt,ml=a(),M(G.$$.fragment),ol=a(),M(Fs.$$.fragment),yl=a(),Ys=p("p"),Ys.textContent=ut,hl=a(),Es=p("p"),Es.innerHTML=Mt,Tl=a(),As=p("ol"),As.innerHTML=dt,Ul=a(),I=p("div"),I.innerHTML=Jt,jl=a(),R=p("ol"),R.innerHTML=mt,gl=a(),Ss=p("p"),Ss.innerHTML=ot,bl=a(),C=p("div"),C.innerHTML=yt,fl=a(),M(X.$$.fragment),wl=a(),Ds=p("p"),Ds.textContent=ht,xl=a(),M(z.$$.fragment),Zl=a(),M(Ls.$$.fragment),Gl=a(),Ks=p("p"),this.h()},l(s){const e=Rt("svelte-u9bgzb",document.head);i=c(e,"META",{name:!0,content:!0}),e.forEach(l),T=n(s),u=c(s,"P",{}),wt(u).forEach(l),U=n(s),d(_.$$.fragment,s),se=n(s),d(k.$$.fragment,s),ee=n(s),v=c(s,"P",{"data-svelte-h":!0}),r(v)!=="svelte-q9med2"&&(v.innerHTML=Rl),le=n(s),V=c(s,"UL",{"data-svelte-h":!0}),r(V)!=="svelte-1g50u3y"&&(V.innerHTML=Cl),te=n(s),B=c(s,"P",{"data-svelte-h":!0}),r(B)!=="svelte-jqj8kl"&&(B.textContent=Xl),ae=n(s),d($.$$.fragment,s),ne=n(s),W=c(s,"P",{"data-svelte-h":!0}),r(W)!=="svelte-1tslfmk"&&(W.innerHTML=zl),ie=n(s),g=c(s,"DIV",{class:!0,"data-svelte-h":!0}),r(g)!=="svelte-1nfcdyp"&&(g.innerHTML=Nl),pe=n(s),H=c(s,"P",{"data-svelte-h":!0}),r(H)!=="svelte-1v73hmj"&&(H.textContent=_l),ce=n(s),b=c(s,"DIV",{class:!0,"data-svelte-h":!0}),r(b)!=="svelte-1os1cjx"&&(b.innerHTML=kl),re=n(s),Q=c(s,"P",{"data-svelte-h":!0}),r(Q)!=="svelte-15bl581"&&(Q.innerHTML=vl),ue=n(s),F=c(s,"P",{"data-svelte-h":!0}),r(F)!=="svelte-fzac58"&&(F.innerHTML=Vl),Me=n(s),d(Y.$$.fragment,s),de=n(s),E=c(s,"P",{"data-svelte-h":!0}),r(E)!=="svelte-ot26xa"&&(E.innerHTML=Bl),Je=n(s),d(A.$$.fragment,s),me=n(s),S=c(s,"P",{"data-svelte-h":!0}),r(S)!=="svelte-7clxu8"&&(S.innerHTML=$l),oe=n(s),d(D.$$.fragment,s),ye=n(s),d(L.$$.fragment,s),he=n(s),q=c(s,"P",{"data-svelte-h":!0}),r(q)!=="svelte-s2gl5h"&&(q.innerHTML=Wl),Te=n(s),d(P.$$.fragment,s),Ue=n(s),d(O.$$.fragment,s),je=n(s),K=c(s,"P",{"data-svelte-h":!0}),r(K)!=="svelte-w28vk1"&&(K.innerHTML=Hl),ge=n(s),d(f.$$.fragment,s),be=n(s),ss=c(s,"P",{"data-svelte-h":!0}),r(ss)!=="svelte-1o5tle4"&&(ss.innerHTML=Ql),fe=n(s),d(es.$$.fragment,s),we=n(s),d(w.$$.fragment,s),xe=n(s),ls=c(s,"P",{"data-svelte-h":!0}),r(ls)!=="svelte-rumg9a"&&(ls.textContent=Fl),Ze=n(s),d(ts.$$.fragment,s),Ge=n(s),as=c(s,"P",{"data-svelte-h":!0}),r(as)!=="svelte-dvol3l"&&(as.innerHTML=Yl),Ie=n(s),d(ns.$$.fragment,s),Re=n(s),is=c(s,"P",{"data-svelte-h":!0}),r(is)!=="svelte-kikhs"&&(is.innerHTML=El),Ce=n(s),d(ps.$$.fragment,s),Xe=n(s),d(cs.$$.fragment,s),ze=n(s),rs=c(s,"P",{"data-svelte-h":!0}),r(rs)!=="svelte-1kghnkq"&&(rs.innerHTML=Al),Ne=n(s),us=c(s,"BLOCKQUOTE",{"data-svelte-h":!0}),r(us)!=="svelte-165ve2g"&&(us.innerHTML=Sl),_e=n(s),Ms=c(s,"P",{"data-svelte-h":!0}),r(Ms)!=="svelte-37i03s"&&(Ms.textContent=Dl),ke=n(s),d(ds.$$.fragment,s),ve=n(s),Js=c(s,"P",{"data-svelte-h":!0}),r(Js)!=="svelte-xtwl00"&&(Js.innerHTML=Ll),Ve=n(s),d(ms.$$.fragment,s),Be=n(s),d(os.$$.fragment,s),$e=n(s),ys=c(s,"P",{"data-svelte-h":!0}),r(ys)!=="svelte-1djz2j8"&&(ys.innerHTML=ql),We=n(s),d(hs.$$.fragment,s),He=n(s),d(x.$$.fragment,s),Qe=n(s),Ts=c(s,"P",{"data-svelte-h":!0}),r(Ts)!=="svelte-1pc596d"&&(Ts.textContent=Pl),Fe=n(s),Us=c(s,"P",{"data-svelte-h":!0}),r(Us)!=="svelte-11000t7"&&(Us.textContent=Ol),Ye=n(s),d(js.$$.fragment,s),Ee=n(s),gs=c(s,"P",{"data-svelte-h":!0}),r(gs)!=="svelte-1t6wt29"&&(gs.textContent=Kl),Ae=n(s),Z=c(s,"DIV",{class:!0,"data-svelte-h":!0}),r(Z)!=="svelte-1qll6ft"&&(Z.innerHTML=st),Se=n(s),bs=c(s,"P",{"data-svelte-h":!0}),r(bs)!=="svelte-pa5erq"&&(bs.innerHTML=et),De=n(s),d(fs.$$.fragment,s),Le=n(s),d(ws.$$.fragment,s),qe=n(s),xs=c(s,"P",{"data-svelte-h":!0}),r(xs)!=="svelte-1xjklhl"&&(xs.innerHTML=lt),Pe=n(s),d(Zs.$$.fragment,s),Oe=n(s),d(Gs.$$.fragment,s),Ke=n(s),Is=c(s,"P",{"data-svelte-h":!0}),r(Is)!=="svelte-sqy8v7"&&(Is.innerHTML=tt),sl=n(s),d(Rs.$$.fragment,s),el=n(s),Cs=c(s,"P",{"data-svelte-h":!0}),r(Cs)!=="svelte-18vozdy"&&(Cs.textContent=at),ll=n(s),d(Xs.$$.fragment,s),tl=n(s),d(zs.$$.fragment,s),al=n(s),Ns=c(s,"P",{"data-svelte-h":!0}),r(Ns)!=="svelte-19srtem"&&(Ns.innerHTML=nt),nl=n(s),d(_s.$$.fragment,s),il=n(s),ks=c(s,"P",{"data-svelte-h":!0}),r(ks)!=="svelte-1d62pml"&&(ks.innerHTML=it),pl=n(s),d(vs.$$.fragment,s),cl=n(s),Vs=c(s,"P",{"data-svelte-h":!0}),r(Vs)!=="svelte-qhk020"&&(Vs.textContent=pt),rl=n(s),d(Bs.$$.fragment,s),ul=n(s),$s=c(s,"P",{"data-svelte-h":!0}),r($s)!=="svelte-1f23lsk"&&($s.innerHTML=ct),Ml=n(s),d(Ws.$$.fragment,s),dl=n(s),d(Hs.$$.fragment,s),Jl=n(s),Qs=c(s,"P",{"data-svelte-h":!0}),r(Qs)!=="svelte-1nh3ov3"&&(Qs.innerHTML=rt),ml=n(s),d(G.$$.fragment,s),ol=n(s),d(Fs.$$.fragment,s),yl=n(s),Ys=c(s,"P",{"data-svelte-h":!0}),r(Ys)!=="svelte-1v8gyxc"&&(Ys.textContent=ut),hl=n(s),Es=c(s,"P",{"data-svelte-h":!0}),r(Es)!=="svelte-1yj0d15"&&(Es.innerHTML=Mt),Tl=n(s),As=c(s,"OL",{"data-svelte-h":!0}),r(As)!=="svelte-13pn4mo"&&(As.innerHTML=dt),Ul=n(s),I=c(s,"DIV",{class:!0,"data-svelte-h":!0}),r(I)!=="svelte-s62rok"&&(I.innerHTML=Jt),jl=n(s),R=c(s,"OL",{start:!0,"data-svelte-h":!0}),r(R)!=="svelte-17itww3"&&(R.innerHTML=mt),gl=n(s),Ss=c(s,"P",{"data-svelte-h":!0}),r(Ss)!=="svelte-c0261d"&&(Ss.innerHTML=ot),bl=n(s),C=c(s,"DIV",{class:!0,"data-svelte-h":!0}),r(C)!=="svelte-1twscot"&&(C.innerHTML=yt),fl=n(s),d(X.$$.fragment,s),wl=n(s),Ds=c(s,"P",{"data-svelte-h":!0}),r(Ds)!=="svelte-pd26ss"&&(Ds.textContent=ht),xl=n(s),d(z.$$.fragment,s),Zl=n(s),d(Ls.$$.fragment,s),Gl=n(s),Ks=c(s,"P",{}),wt(Ks).forEach(l),this.h()},h(){N(i,"name","hf:doc:metadata"),N(i,"content",Ht),N(g,"class","flex justify-center"),N(b,"class","flex justify-center"),N(Z,"class","flex justify-center"),N(I,"class","flex justify-center"),N(R,"start","2"),N(C,"class","flex justify-center")},m(s,e){Ct(document.head,i),t(s,T,e),t(s,u,e),t(s,U,e),J(_,s,e),t(s,se,e),J(k,s,e),t(s,ee,e),t(s,v,e),t(s,le,e),t(s,V,e),t(s,te,e),t(s,B,e),t(s,ae,e),J($,s,e),t(s,ne,e),t(s,W,e),t(s,ie,e),t(s,g,e),t(s,pe,e),t(s,H,e),t(s,ce,e),t(s,b,e),t(s,re,e),t(s,Q,e),t(s,ue,e),t(s,F,e),t(s,Me,e),J(Y,s,e),t(s,de,e),t(s,E,e),t(s,Je,e),J(A,s,e),t(s,me,e),t(s,S,e),t(s,oe,e),J(D,s,e),t(s,ye,e),J(L,s,e),t(s,he,e),t(s,q,e),t(s,Te,e),J(P,s,e),t(s,Ue,e),J(O,s,e),t(s,je,e),t(s,K,e),t(s,ge,e),J(f,s,e),t(s,be,e),t(s,ss,e),t(s,fe,e),J(es,s,e),t(s,we,e),J(w,s,e),t(s,xe,e),t(s,ls,e),t(s,Ze,e),J(ts,s,e),t(s,Ge,e),t(s,as,e),t(s,Ie,e),J(ns,s,e),t(s,Re,e),t(s,is,e),t(s,Ce,e),J(ps,s,e),t(s,Xe,e),J(cs,s,e),t(s,ze,e),t(s,rs,e),t(s,Ne,e),t(s,us,e),t(s,_e,e),t(s,Ms,e),t(s,ke,e),J(ds,s,e),t(s,ve,e),t(s,Js,e),t(s,Ve,e),J(ms,s,e),t(s,Be,e),J(os,s,e),t(s,$e,e),t(s,ys,e),t(s,We,e),J(hs,s,e),t(s,He,e),J(x,s,e),t(s,Qe,e),t(s,Ts,e),t(s,Fe,e),t(s,Us,e),t(s,Ye,e),J(js,s,e),t(s,Ee,e),t(s,gs,e),t(s,Ae,e),t(s,Z,e),t(s,Se,e),t(s,bs,e),t(s,De,e),J(fs,s,e),t(s,Le,e),J(ws,s,e),t(s,qe,e),t(s,xs,e),t(s,Pe,e),J(Zs,s,e),t(s,Oe,e),J(Gs,s,e),t(s,Ke,e),t(s,Is,e),t(s,sl,e),J(Rs,s,e),t(s,el,e),t(s,Cs,e),t(s,ll,e),J(Xs,s,e),t(s,tl,e),J(zs,s,e),t(s,al,e),t(s,Ns,e),t(s,nl,e),J(_s,s,e),t(s,il,e),t(s,ks,e),t(s,pl,e),J(vs,s,e),t(s,cl,e),t(s,Vs,e),t(s,rl,e),J(Bs,s,e),t(s,ul,e),t(s,$s,e),t(s,Ml,e),J(Ws,s,e),t(s,dl,e),J(Hs,s,e),t(s,Jl,e),t(s,Qs,e),t(s,ml,e),J(G,s,e),t(s,ol,e),J(Fs,s,e),t(s,yl,e),t(s,Ys,e),t(s,hl,e),t(s,Es,e),t(s,Tl,e),t(s,As,e),t(s,Ul,e),t(s,I,e),t(s,jl,e),t(s,R,e),t(s,gl,e),t(s,Ss,e),t(s,bl,e),t(s,C,e),t(s,fl,e),J(X,s,e),t(s,wl,e),t(s,Ds,e),t(s,xl,e),J(z,s,e),t(s,Zl,e),J(Ls,s,e),t(s,Gl,e),t(s,Ks,e),Il=!0},p(s,[e]){const Tt={};e&2&&(Tt.$$scope={dirty:e,ctx:s}),f.$set(Tt);const Ut={};e&2&&(Ut.$$scope={dirty:e,ctx:s}),w.$set(Ut);const jt={};e&2&&(jt.$$scope={dirty:e,ctx:s}),x.$set(jt);const gt={};e&2&&(gt.$$scope={dirty:e,ctx:s}),G.$set(gt);const bt={};e&2&&(bt.$$scope={dirty:e,ctx:s}),X.$set(bt);const ft={};e&2&&(ft.$$scope={dirty:e,ctx:s}),z.$set(ft)},i(s){Il||(m(_.$$.fragment,s),m(k.$$.fragment,s),m($.$$.fragment,s),m(Y.$$.fragment,s),m(A.$$.fragment,s),m(D.$$.fragment,s),m(L.$$.fragment,s),m(P.$$.fragment,s),m(O.$$.fragment,s),m(f.$$.fragment,s),m(es.$$.fragment,s),m(w.$$.fragment,s),m(ts.$$.fragment,s),m(ns.$$.fragment,s),m(ps.$$.fragment,s),m(cs.$$.fragment,s),m(ds.$$.fragment,s),m(ms.$$.fragment,s),m(os.$$.fragment,s),m(hs.$$.fragment,s),m(x.$$.fragment,s),m(js.$$.fragment,s),m(fs.$$.fragment,s),m(ws.$$.fragment,s),m(Zs.$$.fragment,s),m(Gs.$$.fragment,s),m(Rs.$$.fragment,s),m(Xs.$$.fragment,s),m(zs.$$.fragment,s),m(_s.$$.fragment,s),m(vs.$$.fragment,s),m(Bs.$$.fragment,s),m(Ws.$$.fragment,s),m(Hs.$$.fragment,s),m(G.$$.fragment,s),m(Fs.$$.fragment,s),m(X.$$.fragment,s),m(z.$$.fragment,s),m(Ls.$$.fragment,s),Il=!0)},o(s){o(_.$$.fragment,s),o(k.$$.fragment,s),o($.$$.fragment,s),o(Y.$$.fragment,s),o(A.$$.fragment,s),o(D.$$.fragment,s),o(L.$$.fragment,s),o(P.$$.fragment,s),o(O.$$.fragment,s),o(f.$$.fragment,s),o(es.$$.fragment,s),o(w.$$.fragment,s),o(ts.$$.fragment,s),o(ns.$$.fragment,s),o(ps.$$.fragment,s),o(cs.$$.fragment,s),o(ds.$$.fragment,s),o(ms.$$.fragment,s),o(os.$$.fragment,s),o(hs.$$.fragment,s),o(x.$$.fragment,s),o(js.$$.fragment,s),o(fs.$$.fragment,s),o(ws.$$.fragment,s),o(Zs.$$.fragment,s),o(Gs.$$.fragment,s),o(Rs.$$.fragment,s),o(Xs.$$.fragment,s),o(zs.$$.fragment,s),o(_s.$$.fragment,s),o(vs.$$.fragment,s),o(Bs.$$.fragment,s),o(Ws.$$.fragment,s),o(Hs.$$.fragment,s),o(G.$$.fragment,s),o(Fs.$$.fragment,s),o(X.$$.fragment,s),o(z.$$.fragment,s),o(Ls.$$.fragment,s),Il=!1},d(s){s&&(l(T),l(u),l(U),l(se),l(ee),l(v),l(le),l(V),l(te),l(B),l(ae),l(ne),l(W),l(ie),l(g),l(pe),l(H),l(ce),l(b),l(re),l(Q),l(ue),l(F),l(Me),l(de),l(E),l(Je),l(me),l(S),l(oe),l(ye),l(he),l(q),l(Te),l(Ue),l(je),l(K),l(ge),l(be),l(ss),l(fe),l(we),l(xe),l(ls),l(Ze),l(Ge),l(as),l(Ie),l(Re),l(is),l(Ce),l(Xe),l(ze),l(rs),l(Ne),l(us),l(_e),l(Ms),l(ke),l(ve),l(Js),l(Ve),l(Be),l($e),l(ys),l(We),l(He),l(Qe),l(Ts),l(Fe),l(Us),l(Ye),l(Ee),l(gs),l(Ae),l(Z),l(Se),l(bs),l(De),l(Le),l(qe),l(xs),l(Pe),l(Oe),l(Ke),l(Is),l(sl),l(el),l(Cs),l(ll),l(tl),l(al),l(Ns),l(nl),l(il),l(ks),l(pl),l(cl),l(Vs),l(rl),l(ul),l($s),l(Ml),l(dl),l(Jl),l(Qs),l(ml),l(ol),l(yl),l(Ys),l(hl),l(Es),l(Tl),l(As),l(Ul),l(I),l(jl),l(R),l(gl),l(Ss),l(bl),l(C),l(fl),l(wl),l(Ds),l(xl),l(Zl),l(Gl),l(Ks)),l(i),y(_,s),y(k,s),y($,s),y(Y,s),y(A,s),y(D,s),y(L,s),y(P,s),y(O,s),y(f,s),y(es,s),y(w,s),y(ts,s),y(ns,s),y(ps,s),y(cs,s),y(ds,s),y(ms,s),y(os,s),y(hs,s),y(x,s),y(js,s),y(fs,s),y(ws,s),y(Zs,s),y(Gs,s),y(Rs,s),y(Xs,s),y(zs,s),y(_s,s),y(vs,s),y(Bs,s),y(Ws,s),y(Hs,s),y(G,s),y(Fs,s),y(X,s),y(z,s),y(Ls,s)}}}const Ht='{"title":"Crearea propriului tău dataset","local":"creating-your-own-dataset","sections":[{"title":"Obținerea datelor","local":"getting-the-data","sections":[],"depth":2},{"title":"Curățarea datelor","local":"cleaning-up-the-data","sections":[],"depth":2},{"title":"Îmbunătățirea datasetului","local":"augmenting-the-dataset","sections":[],"depth":2},{"title":"Încărcarea datasetului pe Hugging Face Hub","local":"uploading-the-dataset-to-the-hugging-face-hub","sections":[],"depth":2},{"title":"Crearea unei dataset card","local":"creating-a-dataset-card","sections":[],"depth":2}],"depth":1}';function Qt(j){return Zt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class qt extends Gt{constructor(i){super(),It(this,i,Qt,Wt,xt,{})}}export{qt as component};

Xet Storage Details

Size:
76.6 kB
·
Xet hash:
628c7218a75461dd7a42d00d3683d0fe3f9be38df0b71ada85ff6517e7c1ed71

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.