Buckets:

rtrm's picture
download
raw
84.5 kB
import{s as ut,n as mt,o as Mt}from"../chunks/scheduler.505acc25.js";import{S as Jt,i as yt,e as o,s as t,c,h as ht,a as i,d as e,b as n,f as dt,g as r,j as p,k as y,l as Tt,m as l,n as d,t as u,o as m,p as M}from"../chunks/index.1238bded.js";import{C as Ut,H as ca,E as jt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.076cb552.js";import{Y as bt}from"../chunks/Youtube.2fb63721.js";import{C as J}from"../chunks/CodeBlock.b4575fa8.js";import{C as gt}from"../chunks/CourseFloatingBanner.2e302d0f.js";function wt(ol){let h,ua,ra,ma,X,Ma,k,Ja,N,ya,z,il='Às vezes, o conjunto de dados de que você precisa para criar um aplicativo de PLN não existe, portanto, você mesmo precisará criá-lo. Nesta seção, mostraremos como criar um corpus de <a href="https://github.com/features/issues/" rel="nofollow">issues do GitHub</a>, que são comumente usados ​​para rastrear bugs ou recursos nos repositórios do GitHub. Este corpus pode ser usado para vários fins, incluindo:',ha,V,pl="<li>Explorar quanto tempo leva para fechar as issues abertos ou pull requests</li> <li>Treinar um <em>classificador multilabel</em> que pode marcar issues com metadados com base na descrição da issue (por exemplo, “bug”, “melhoria” ou “pergunta”)</li> <li>Criando um mecanismo de pesquisa semântica para descobrir quais issues correspondem à consulta de um usuário</li>",Ta,_,cl="Aqui nos concentraremos na criação do corpus e, na próxima seção, abordaremos o aplicativo de pesquisa semântica. Para manter a meta, usaremos as issues do GitHub associados a um projeto de código aberto popular: 🤗 Datasets! Vamos dar uma olhada em como obter os dados e explorar as informações contidas nessas edições.",Ua,B,ja,H,rl='Você pode encontrar todos as issues em 🤗 Datasets navegando até a <a href="https://github.com/huggingface/datasets/issues" rel="nofollow">guia de issues</a> do repositório. Conforme mostrado na captura de tela a seguir, no momento da redação, havia 331 issues abertos e 668 fechados.',ba,T,dl='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues.png" alt="The GitHub issues associated with 🤗 Datasets." width="80%"/>',ga,W,ul="Se você clicar em uma dessas issues, verá que ele contém um título, uma descrição e um conjunto de rótulos que caracterizam a issue. Um exemplo é mostrado na captura de tela abaixo.",wa,U,ml='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues-single.png" alt="A typical GitHub issue in the 🤗 Datasets repository." width="80%"/>',fa,F,Ml='Para baixar todos as issues do repositório, usaremos a <a href="https://docs.github.com/en/rest" rel="nofollow">GitHub REST API</a> para pesquisar o [<code>Issues</code> endpoint](<a href="https://docs.github." rel="nofollow">https://docs.github.</a> com/en/rest/reference/issues#list-repository-issues). Esse endpoint retorna uma lista de objetos JSON, com cada objeto contendo um grande número de campos que incluem o título e a descrição, bem como metadados sobre o status da issue e assim por diante.',xa,Q,Jl="Uma maneira conveniente de baixar as issues é por meio da biblioteca <code>requests</code>, que é a maneira padrão de fazer solicitações HTTP em Python. Você pode instalar a biblioteca executando:",Za,Y,Ga,$,yl="Uma vez que a biblioteca esteja instalada, você pode fazer solicitações GET para o endpoint <code>Issues</code> invocando a função <code>requests.get()</code>. Por exemplo, você pode executar o seguinte comando para recuperar a primeira issue na primeira página:",Ra,E,Ia,S,hl="O objeto <code>response</code> contém muitas informações úteis sobre a solicitação, incluindo o código de status HTTP:",va,A,Ca,q,Xa,L,Tl='onde um status <code>200</code> significa que a solicitação foi bem-sucedida (você pode encontrar uma lista de possíveis códigos de status HTTP <a href="https://en.wikipedia.org/wiki/List_of_HTTP_status_codes" rel="nofollow">aqui</a>). O que realmente nos interessa, porém, é o <em>payload</em>, que pode ser acessado em vários formatos como bytes, strings ou JSON. Como sabemos que nossas issues estão no formato JSON, vamos inspecionar o payload da seguinte forma:',ka,D,Na,P,za,O,Ul="Uau, é muita informação! Podemos ver campos úteis como <code>title</code>, <code>body</code> e <code>number</code> que descrevem a issue, bem como informações sobre o usuário do GitHub que abriu a issue.",Va,j,jl="<p>✏️ <strong>Experimente!</strong> Clique em alguns dos URLs na carga JSON acima para ter uma ideia de que tipo de informação cada issue do GitHub está vinculado.</p>",_a,K,bl='Conforme descrito na [documentação] do GitHub (<a href="https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting" rel="nofollow">https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting</a>), as solicitações não autenticadas são limitadas a 60 solicitações por hora. Embora você possa aumentar o parâmetro de consulta <code>per_page</code> para reduzir o número de solicitações feitas, você ainda atingirá o limite de taxa em qualquer repositório que tenha mais do que alguns milhares de issues. Então, em vez disso, você deve seguir as [instruções] do GitHub (<a href="https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token" rel="nofollow">https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token</a>) sobre como criar um <em>token de acesso pessoal</em> para que você pode aumentar o limite de taxa para 5.000 solicitações por hora. Depois de ter seu token, você pode incluí-lo como parte do cabeçalho da solicitação:',Ba,ss,Ha,b,gl='<p>⚠️ Não compartilhe um notebook com seu <code>GITHUB_TOKEN</code> colado nele. Recomendamos que você exclua a última célula depois de executá-la para evitar o vazamento dessas informações acidentalmente. Melhor ainda, armazene o token em um arquivo <em>.env</em> e use a <a href="https://github.com/theskumar/python-dotenv" rel="nofollow"><code>python-dotenv</code> library</a> para carregá-lo automaticamente para você como uma variável de ambiente.</p>',Wa,as,wl="Agora que temos nosso token de acesso, vamos criar uma função que possa baixar todas as issues de um repositório do GitHub:",Fa,es,Qa,ls,fl="Agora, quando chamamos <code>fetch_issues()</code>, ele fará o download de todas as issues em lotes para evitar exceder o limite do GitHub no número de solicitações por hora; o resultado será armazenado em um arquivo <em>repository_name-issues.jsonl</em>, onde cada linha é um objeto JSON que representa uma issue. Vamos usar esta função para pegar todas as issues de 🤗 Datasets:",Ya,ts,$a,ns,xl='Depois que as issues forem baixadas, podemos carregá-las localmente usando nossas novas habilidades da <a href="/course/chapter5/2">seção 2</a>:',Ea,os,Sa,is,Aa,ps,Zl='Ótimo, criamos nosso primeiro conjunto de dados do zero! Mas por que existem vários milhares de issues quando a <a href="https://github.com/huggingface/datasets/issues" rel="nofollow">guia Issue</a> do repositório 🤗 Datasets mostra apenas cerca de 1.000 issues no total 🤔? Conforme descrito na [documentação] do GitHub (<a href="https://docs.github.com/en/rest/reference/issues#list-issues-assigned-to-the-authenticated-user" rel="nofollow">https://docs.github.com/en/rest/reference/issues#list-issues-assigned-to-the-authenticated-user</a>), isso ocorre porque baixamos todos os pull request também:',qa,cs,Gl="<p>A API REST v3 do GitHub considera cada pull request como uma issue, mas nem toda issue é um pull request. Por esse motivo, os endpoints de “issues” podem retornar issues e solicitações de pull na resposta. Você pode identificar solicitações de pull pela chave <code>pull_request</code>. Esteja ciente de que o <code>id</code> de uma solicitação pull retornada de endpoints “issues” será um ID de issue.</p>",La,rs,Rl="Como o conteúdo das issues e dos pull request são bem diferentes, vamos fazer um pequeno pré-processamento para nos permitir distinguir entre eles.",Da,ds,Pa,us,Il='O trecho acima da documentação do GitHub nos diz que a coluna <code>pull_request</code> pode ser usada para diferenciar entre issues e solicitações de pull request. Vamos olhar para uma amostra aleatória para ver qual é a diferença. Como fizemos na <a href="/course/chapter5/3">seção 3</a>, vamos encadear <code>Dataset.shuffle()</code> e <code>Dataset.select()</code> para criar uma amostra aleatória e então compactar o <code>html_url</code> e <code>pull_request</code> para que possamos comparar os vários URLs:',Oa,ms,Ka,Ms,se,Js,vl="Aqui podemos ver que cada pull request está associado a vários URLs, enquanto as issues comuns têm uma entrada <code>None</code>. Podemos usar essa distinção para criar uma nova coluna <code>is_pull_request</code> que verifica se o campo <code>pull_request</code> é <code>None</code> ou não:",ae,ys,ee,g,Cl="<p>✏️ <strong>Experimente!</strong> Calcule o tempo médio que leva para fechar as issues em 🤗 Datasets. Você pode achar a função <code>Dataset.filter()</code> útil para filtrar os pull requests e as issues abertas, e você pode usar a função <code>Dataset.set_format()</code> para converter o conjunto de dados em um <code>DataFrame</code> para que você possa manipular facilmente os timestamps <code>created_at</code> e <code>closed_at</code>. Para pontos de bônus, calcule o tempo médio que leva para fechar os pull requests.</p>",le,hs,Xl="Embora possamos continuar a limpar o conjunto de dados descartando ou renomeando algumas colunas, geralmente é uma boa prática manter o conjunto de dados o mais “bruto” possível neste estágio para que possa ser facilmente usado em vários aplicativos.",te,Ts,kl="Antes de enviarmos nosso conjunto de dados para o Hugging Face Hub, vamos lidar com uma coisa que está faltando: os comentários associados a cada issue e pull request. Vamos adicioná-los a seguir - você adivinhou - a API REST do GitHub!",ne,Us,oe,js,Nl="Conforme mostrado na captura de tela a seguir, os comentários associados a uma issue ou a pull request fornecem uma rica fonte de informações, especialmente se estivermos interessados ​​em criar um mecanismo de pesquisa para responder às consultas dos usuários sobre a biblioteca.",ie,w,zl='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-issues-comment.png" alt="Comments associated with an issue about 🤗 Datasets." width="80%"/>',pe,bs,Vl='A API REST do GitHub fornece um <a href="https://docs.github.com/en/rest/reference/issues#list-issue-comments" rel="nofollow">endpoint <code>Comments</code></a> que retorna todos os comentários associados a uma issue. Vamos testar o endpoint para ver o que ele retorna:',ce,gs,re,ws,de,fs,_l="Podemos ver que o comentário está armazenado no campo <code>body</code>, então vamos escrever uma função simples que retorna todos os comentários associados a uma issue selecionando o conteúdo do <code>body</code> para cada elemento em <code>response.json()</code>:",ue,xs,me,Zs,Me,Gs,Bl="Isso parece certo, então vamos usar <code>Dataset.map()</code> para adicionar uma nova coluna <code>comments</code> para cada issue em nosso conjunto de dados:",Je,Rs,ye,Is,Hl="A etapa final é salvar o conjunto de dados aumentado junto com nossos dados brutos para que possamos enviá-los para o Hub:",he,vs,Te,Cs,Ue,Xs,je,ks,Wl='Agora que temos nosso conjunto de dados aumentado, é hora de enviá-lo para o Hub para que possamos compartilhá-lo com a comunidade! Para fazer o upload do conjunto de dados, usaremos a <a href="https://github.com/huggingface/huggingface_hub" rel="nofollow">🤗 Hub library</a>, que nos permite interagir com o Hugging Face Hub por meio de uma API Python. 🤗 Hub vem pré-instalado com 🤗 Transformers, para que possamos usá-lo diretamente. Por exemplo, podemos usar a função <code>list_datasets()</code> para obter informações sobre todos os conjuntos de dados públicos atualmente hospedados no Hub:',be,Ns,ge,zs,we,Vs,Fl="Podemos ver que atualmente existem cerca de 1.500 conjuntos de dados no Hub, e a função <code>list_datasets()</code> também fornece alguns metadados básicos sobre cada repositório de conjuntos de dados.",fe,_s,Ql="Para nossos propósitos, a primeira coisa que precisamos fazer é criar um novo repositório de conjunto de dados no Hub. Para fazer isso, precisamos de um token de autenticação, que pode ser obtido primeiro entrando no Hugging Face Hub com a função <code>notebook_login()</code>:",xe,Bs,Ze,Hs,Yl="Isso criará um widget onde você poderá inserir seu nome de usuário e senha, e um token de API será salvo em <em>~/.huggingface/token</em>. Se você estiver executando o código em um terminal, poderá fazer login via CLI:",Ge,Ws,Re,Fs,$l="Feito isso, podemos criar um novo repositório de conjunto de dados com a função <code>create_repo()</code>:",Ie,Qs,ve,Ys,Ce,$s,El="Neste exemplo, criamos um repositório de conjunto de dados vazio chamado <code>github-issues</code> sob o nome de usuário <code>lewtun</code> (o nome de usuário deve ser seu nome de usuário do Hub quando você estiver executando este código!).",Xe,f,Sl="<p>✏️ <strong>Experimente!</strong> Use seu nome de usuário e senha do Hugging Face Hub para obter um token e criar um repositório vazio chamado <code>github-issues</code>. Lembre-se de <strong>nunca salvar suas credenciais</strong> no Colab ou em qualquer outro repositório, pois essas informações podem ser exploradas por agentes mal-intencionados.</p>",ke,Es,Al="Em seguida, vamos clonar o repositório do Hub para nossa máquina local e copiar nosso arquivo de conjunto de dados para ele. O 🤗 Hub fornece uma classe <code>Repository</code> útil que envolve muitos dos comandos comuns do Git, portanto, para clonar o repositório remoto, basta fornecer o URL e o caminho local para o qual desejamos clonar:",Ne,Ss,ze,As,ql="Por padrão, várias extensões de arquivo (como <em>.bin</em>, <em>.gz</em> e <em>.zip</em>) são rastreadas com o Git LFS para que arquivos grandes possam ser versionados no mesmo fluxo de trabalho do Git. Você pode encontrar uma lista de extensões de arquivos rastreados dentro do arquivo <em>.gitattributes</em> do repositório. Para incluir o formato JSON Lines na lista, podemos executar o seguinte comando:",Ve,qs,_e,Ls,Ll="Então podemos usar <code>Repository.push_to_hub()</code> para enviar o conjunto de dados para o Hub:",Be,Ds,He,Ps,Dl="Se navegarmos para a URL contida em <code>repo_url</code>, veremos agora que nosso arquivo de conjunto de dados foi carregado.",We,x,Pl='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/hub-repo.png" alt="Our dataset repository on the Hugging Face Hub." width="80%"/>',Fe,Os,Ol="A partir daqui, qualquer um pode baixar o conjunto de dados simplesmente fornecendo <code>load_dataset()</code> com o ID do repositório como o argumento <code>path</code>:",Qe,Ks,Ye,sa,$e,aa,Kl="Legal, nós enviamos nosso conjunto de dados para o Hub e está disponível para outros usarem! Há apenas uma coisa importante a fazer: adicionar um <em>cartão de conjunto de dados</em> que explica como o corpus foi criado e fornece outras informações úteis para a comunidade.",Ee,Z,st='<p>💡 Você também pode enviar um conjunto de dados para o Hugging Face Hub diretamente do terminal usando <code>huggingface-cli</code> e um pouco de magia Git. Consulte o <a href="https://huggingface.co/docs/datasets/share#share-a-dataset-using-the-cli" rel="nofollow">guia do 🤗 Datasets</a> para obter detalhes sobre como fazer isso.</p>',Se,ea,Ae,la,at="Conjuntos de dados bem documentados são mais propensos a serem úteis para outras pessoas (incluindo você mesmo no futuro!), pois fornecem o contexto para permitir que os usuários decidam se o conjunto de dados é relevante para sua tarefa e avaliem possíveis vieses ou riscos associados ao uso o conjunto de dados.",qe,ta,et="No Hugging Face Hub, essas informações são armazenadas no arquivo <em>README.md</em> de cada repositório de conjunto de dados. Há duas etapas principais que você deve seguir antes de criar este arquivo:",Le,na,lt='<li>Use a aplicação <a href="https://huggingface.co/datasets/tagging/" rel="nofollow"><code>datasets-tagging</code></a> para criar tags de metadados no formato YAML. Essas tags são usadas para uma variedade de recursos de pesquisa no Hugging Face Hub e garantem que seu conjunto de dados possa ser facilmente encontrado pelos membros da comunidade. Como criamos um conjunto de dados personalizado aqui, você precisará clonar o repositório <code>datasets-tagging</code> e executar o aplicativo localmente. Veja como é a interface:</li>',De,G,tt='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/datasets-tagger.png" alt="The &#39;datasets-tagging&#39; interface." width="80%"/>',Pe,R,nt='<li>Leia o <a href="https://github.com/huggingface/datasets/blob/master/templates/README_guide.md" rel="nofollow">guia do 🤗 datasets</a> sobre como criar cartões informativos de conjuntos de dados e use-os como modelo.</li>',Oe,oa,ot="Você pode criar o arquivo <em>README.md</em> diretamente no Hub e encontrar um cartão de conjunto de dados de modelo no repositório de conjunto de dados <code>lewtun/github-issues</code>. Uma captura de tela do cartão de conjunto de dados preenchido é mostrada abaixo.",Ke,I,it='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter5/dataset-card.png" alt="A dataset card." width="80%"/>',sl,v,pt='<p>✏️ <strong>Experimente!</strong> Use o aplicativo <code>dataset-tagging</code> e <a href="https://github.com/huggingface/datasets/blob/master/templates/README_guide.md" rel="nofollow">guia do 🤗 datasets</a> para concluir o <em>Arquivo README.md</em> para o conjunto de dados de issues do GitHub.</p>',al,ia,ct="É isso! Vimos nesta seção que criar um bom conjunto de dados pode ser bastante complicado, mas felizmente carregá-lo e compartilhá-lo com a comunidade não é. Na próxima seção, usaremos nosso novo conjunto de dados para criar um mecanismo de pesquisa semântica com o 🤗 datasets que podem corresponder perguntas as issues e comentários mais relevantes.",el,C,rt="<p>✏️ <strong>Experimente!</strong> Siga as etapas que seguimos nesta seção para criar um conjunto de dados de issues do GitHub para sua biblioteca de código aberto favorita (escolha algo diferente do 🤗 datasets, é claro!). Para pontos de bônus, ajuste um classificador multilabel para prever as tags presentes no campo <code>labels</code>.</p>",ll,pa,tl,da,nl;return X=new Ut({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),k=new ca({props:{title:"Criando seu próprio dataset",local:"criando-seu-próprio-dataset",headingTag:"h1"}}),N=new gt({props:{chapter:5,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/pt/chapter5/section5.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/pt/chapter5/section5.ipynb"}]}}),B=new ca({props:{title:"Obtendo os dados",local:"obtendo-os-dados",headingTag:"h2"}}),Y=new J({props:{code:"IXBpcCUyMGluc3RhbGwlMjByZXF1ZXN0cw==",highlighted:"!pip install requests",wrap:!1}}),E=new J({props:{code:"aW1wb3J0JTIwcmVxdWVzdHMlMEElMEF1cmwlMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTNGcGFnZSUzRDElMjZwZXJfcGFnZSUzRDElMjIlMEFyZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLmdldCh1cmwp",highlighted:`<span class="hljs-keyword">import</span> requests
url = <span class="hljs-string">&quot;https://api.github.com/repos/huggingface/datasets/issues?page=1&amp;per_page=1&quot;</span>
response = requests.get(url)`,wrap:!1}}),A=new J({props:{code:"cmVzcG9uc2Uuc3RhdHVzX2NvZGU=",highlighted:"response.status_code",wrap:!1}}),q=new J({props:{code:"MjAw",highlighted:'<span class="hljs-number">200</span>',wrap:!1}}),D=new J({props:{code:"cmVzcG9uc2UuanNvbigp",highlighted:"response.json()",wrap:!1}}),P=new J({props:{code:"JTVCJTdCJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTInJTJDJTBBJTIwJTIwJ3JlcG9zaXRvcnlfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJyUyQyUwQSUyMCUyMCdsYWJlbHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc5MiUyRmxhYmVscyU3QiUyRm5hbWUlN0QnJTJDJTBBJTIwJTIwJ2NvbW1lbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTIlMkZjb21tZW50cyclMkMlMEElMjAlMjAnZXZlbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRjI3OTIlMkZldmVudHMnJTJDJTBBJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTInJTJDJTBBJTIwJTIwJ2lkJyUzQSUyMDk2ODY1MDI3NCUyQyUwQSUyMCUyMCdub2RlX2lkJyUzQSUyMCdNREV4T2xCMWJHeFNaWEYxWlhOME56RXdOelV5TWpjMCclMkMlMEElMjAlMjAnbnVtYmVyJyUzQSUyMDI3OTIlMkMlMEElMjAlMjAndGl0bGUnJTNBJTIwJ1VwZGF0ZSUyMEdvb0FRJyUyQyUwQSUyMCUyMCd1c2VyJyUzQSUyMCU3Qidsb2dpbiclM0ElMjAnYmhhdml0dnlhbWFsaWsnJTJDJTBBJTIwJTIwJTIwJ2lkJyUzQSUyMDE5NzE4ODE4JTJDJTBBJTIwJTIwJTIwJ25vZGVfaWQnJTNBJTIwJ01EUTZWWE5sY2pFNU56RTRPREU0JyUyQyUwQSUyMCUyMCUyMCdhdmF0YXJfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmF2YXRhcnMuZ2l0aHVidXNlcmNvbnRlbnQuY29tJTJGdSUyRjE5NzE4ODE4JTNGdiUzRDQnJTJDJTBBJTIwJTIwJTIwJ2dyYXZhdGFyX2lkJyUzQSUyMCcnJTJDJTBBJTIwJTIwJTIwJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWsnJTJDJTBBJTIwJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnZm9sbG93ZXJzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZmb2xsb3dlcnMnJTJDJTBBJTIwJTIwJTIwJ2ZvbGxvd2luZ191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZm9sbG93aW5nJTdCJTJGb3RoZXJfdXNlciU3RCclMkMlMEElMjAlMjAlMjAnZ2lzdHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmdpc3RzJTdCJTJGZ2lzdF9pZCU3RCclMkMlMEElMjAlMjAlMjAnc3RhcnJlZF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGc3RhcnJlZCU3QiUyRm93bmVyJTdEJTdCJTJGcmVwbyU3RCclMkMlMEElMjAlMjAlMjAnc3Vic2NyaXB0aW9uc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGc3Vic2NyaXB0aW9ucyclMkMlMEElMjAlMjAlMjAnb3JnYW5pemF0aW9uc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGb3JncyclMkMlMEElMjAlMjAlMjAncmVwb3NfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRnJlcG9zJyUyQyUwQSUyMCUyMCUyMCdldmVudHNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmV2ZW50cyU3QiUyRnByaXZhY3klN0QnJTJDJTBBJTIwJTIwJTIwJ3JlY2VpdmVkX2V2ZW50c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGcmVjZWl2ZWRfZXZlbnRzJyUyQyUwQSUyMCUyMCUyMCd0eXBlJyUzQSUyMCdVc2VyJyUyQyUwQSUyMCUyMCUyMCdzaXRlX2FkbWluJyUzQSUyMEZhbHNlJTdEJTJDJTBBJTIwJTIwJ2xhYmVscyclM0ElMjAlNUIlNUQlMkMlMEElMjAlMjAnc3RhdGUnJTNBJTIwJ29wZW4nJTJDJTBBJTIwJTIwJ2xvY2tlZCclM0ElMjBGYWxzZSUyQyUwQSUyMCUyMCdhc3NpZ25lZSclM0ElMjBOb25lJTJDJTBBJTIwJTIwJ2Fzc2lnbmVlcyclM0ElMjAlNUIlNUQlMkMlMEElMjAlMjAnbWlsZXN0b25lJyUzQSUyME5vbmUlMkMlMEElMjAlMjAnY29tbWVudHMnJTNBJTIwMSUyQyUwQSUyMCUyMCdjcmVhdGVkX2F0JyUzQSUyMCcyMDIxLTA4LTEyVDExJTNBNDAlM0ExOFonJTJDJTBBJTIwJTIwJ3VwZGF0ZWRfYXQnJTNBJTIwJzIwMjEtMDgtMTJUMTIlM0EzMSUzQTE3WiclMkMlMEElMjAlMjAnY2xvc2VkX2F0JyUzQSUyME5vbmUlMkMlMEElMjAlMjAnYXV0aG9yX2Fzc29jaWF0aW9uJyUzQSUyMCdDT05UUklCVVRPUiclMkMlMEElMjAlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUzQSUyME5vbmUlMkMlMEElMjAlMjAncHVsbF9yZXF1ZXN0JyUzQSUyMCU3Qid1cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZyZXBvcyUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxscyUyRjI3OTInJTJDJTBBJTIwJTIwJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTInJTJDJTBBJTIwJTIwJTIwJ2RpZmZfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTIuZGlmZiclMkMlMEElMjAlMjAlMjAncGF0Y2hfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjI3OTIucGF0Y2gnJTdEJTJDJTBBJTIwJTIwJ2JvZHknJTNBJTIwJyU1Qkdvb0FRJTVEKGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmFsbGVuYWklMkZnb29hcSklMjBkYXRhc2V0JTIwd2FzJTIwcmVjZW50bHklMjB1cGRhdGVkJTIwYWZ0ZXIlMjBzcGxpdHMlMjB3ZXJlJTIwYWRkZWQlMjBmb3IlMjB0aGUlMjBzYW1lLiUyMFRoaXMlMjBQUiUyMGNvbnRhaW5zJTIwbmV3JTIwdXBkYXRlZCUyMEdvb0FRJTIwd2l0aCUyMHRyYWluJTJGdmFsJTJGdGVzdCUyMHNwbGl0cyUyMGFuZCUyMHVwZGF0ZWQlMjBSRUFETUUlMjBhcyUyMHdlbGwuJyUyQyUwQSUyMCUyMCdwZXJmb3JtZWRfdmlhX2dpdGh1Yl9hcHAnJTNBJTIwTm9uZSU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792&#x27;</span>,
<span class="hljs-string">&#x27;repository_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets&#x27;</span>,
<span class="hljs-string">&#x27;labels_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/labels{/name}&#x27;</span>,
<span class="hljs-string">&#x27;comments_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/comments&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792/events&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">968650274</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDExOlB1bGxSZXF1ZXN0NzEwNzUyMjc0&#x27;</span>,
<span class="hljs-string">&#x27;number&#x27;</span>: <span class="hljs-number">2792</span>,
<span class="hljs-string">&#x27;title&#x27;</span>: <span class="hljs-string">&#x27;Update GooAQ&#x27;</span>,
<span class="hljs-string">&#x27;user&#x27;</span>: {<span class="hljs-string">&#x27;login&#x27;</span>: <span class="hljs-string">&#x27;bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">19718818</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDQ6VXNlcjE5NzE4ODE4&#x27;</span>,
<span class="hljs-string">&#x27;avatar_url&#x27;</span>: <span class="hljs-string">&#x27;https://avatars.githubusercontent.com/u/19718818?v=4&#x27;</span>,
<span class="hljs-string">&#x27;gravatar_id&#x27;</span>: <span class="hljs-string">&#x27;&#x27;</span>,
<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;followers_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/followers&#x27;</span>,
<span class="hljs-string">&#x27;following_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/following{/other_user}&#x27;</span>,
<span class="hljs-string">&#x27;gists_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/gists{/gist_id}&#x27;</span>,
<span class="hljs-string">&#x27;starred_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/starred{/owner}{/repo}&#x27;</span>,
<span class="hljs-string">&#x27;subscriptions_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/subscriptions&#x27;</span>,
<span class="hljs-string">&#x27;organizations_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/orgs&#x27;</span>,
<span class="hljs-string">&#x27;repos_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/repos&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/events{/privacy}&#x27;</span>,
<span class="hljs-string">&#x27;received_events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/received_events&#x27;</span>,
<span class="hljs-string">&#x27;type&#x27;</span>: <span class="hljs-string">&#x27;User&#x27;</span>,
<span class="hljs-string">&#x27;site_admin&#x27;</span>: <span class="hljs-literal">False</span>},
<span class="hljs-string">&#x27;labels&#x27;</span>: [],
<span class="hljs-string">&#x27;state&#x27;</span>: <span class="hljs-string">&#x27;open&#x27;</span>,
<span class="hljs-string">&#x27;locked&#x27;</span>: <span class="hljs-literal">False</span>,
<span class="hljs-string">&#x27;assignee&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;assignees&#x27;</span>: [],
<span class="hljs-string">&#x27;milestone&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;comments&#x27;</span>: <span class="hljs-number">1</span>,
<span class="hljs-string">&#x27;created_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T11:40:18Z&#x27;</span>,
<span class="hljs-string">&#x27;updated_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:31:17Z&#x27;</span>,
<span class="hljs-string">&#x27;closed_at&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;author_association&#x27;</span>: <span class="hljs-string">&#x27;CONTRIBUTOR&#x27;</span>,
<span class="hljs-string">&#x27;active_lock_reason&#x27;</span>: <span class="hljs-literal">None</span>,
<span class="hljs-string">&#x27;pull_request&#x27;</span>: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/2792&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792&#x27;</span>,
<span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792.diff&#x27;</span>,
<span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792.patch&#x27;</span>},
<span class="hljs-string">&#x27;body&#x27;</span>: <span class="hljs-string">&#x27;[GooAQ](https://github.com/allenai/gooaq) dataset was recently updated after splits were added for the same. This PR contains new updated GooAQ with train/val/test splits and updated README as well.&#x27;</span>,
<span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>: <span class="hljs-literal">None</span>}]`,wrap:!1}}),ss=new J({props:{code:"R0lUSFVCX1RPS0VOJTIwJTNEJTIweHh4JTIwJTIwJTIzJTIwQ29weSUyMHlvdXIlMjBHaXRIdWIlMjB0b2tlbiUyMGhlcmUlMEFoZWFkZXJzJTIwJTNEJTIwJTdCJTIyQXV0aG9yaXphdGlvbiUyMiUzQSUyMGYlMjJ0b2tlbiUyMCU3QkdJVEhVQl9UT0tFTiU3RCUyMiU3RA==",highlighted:`GITHUB_TOKEN = xxx <span class="hljs-comment"># Copy your GitHub token here</span>
headers = {<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;token <span class="hljs-subst">{GITHUB_TOKEN}</span>&quot;</span>}`,wrap:!1}}),es=new J({props:{code:"aW1wb3J0JTIwdGltZSUwQWltcG9ydCUyMG1hdGglMEFmcm9tJTIwcGF0aGxpYiUyMGltcG9ydCUyMFBhdGglMEFpbXBvcnQlMjBwYW5kYXMlMjBhcyUyMHBkJTBBZnJvbSUyMHRxZG0ubm90ZWJvb2slMjBpbXBvcnQlMjB0cWRtJTBBJTBBJTBBZGVmJTIwZmV0Y2hfaXNzdWVzKCUwQSUyMCUyMCUyMCUyMG93bmVyJTNEJTIyaHVnZ2luZ2ZhY2UlMjIlMkMlMEElMjAlMjAlMjAlMjByZXBvJTNEJTIyZGF0YXNldHMlMjIlMkMlMEElMjAlMjAlMjAlMjBudW1faXNzdWVzJTNEMTBfMDAwJTJDJTBBJTIwJTIwJTIwJTIwcmF0ZV9saW1pdCUzRDVfMDAwJTJDJTBBJTIwJTIwJTIwJTIwaXNzdWVzX3BhdGglM0RQYXRoKCUyMi4lMjIpJTJDJTBBKSUzQSUwQSUyMCUyMCUyMCUyMGlmJTIwbm90JTIwaXNzdWVzX3BhdGguaXNfZGlyKCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpc3N1ZXNfcGF0aC5ta2RpcihleGlzdF9vayUzRFRydWUpJTBBJTBBJTIwJTIwJTIwJTIwYmF0Y2glMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjBhbGxfaXNzdWVzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwcGVyX3BhZ2UlMjAlM0QlMjAxMDAlMjAlMjAlMjMlMjBOdW1iZXIlMjBvZiUyMGlzc3VlcyUyMHRvJTIwcmV0dXJuJTIwcGVyJTIwcGFnZSUwQSUyMCUyMCUyMCUyMG51bV9wYWdlcyUyMCUzRCUyMG1hdGguY2VpbChudW1faXNzdWVzJTIwJTJGJTIwcGVyX3BhZ2UpJTBBJTIwJTIwJTIwJTIwYmFzZV91cmwlMjAlM0QlMjAlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMjIlMEElMEElMjAlMjAlMjAlMjBmb3IlMjBwYWdlJTIwaW4lMjB0cWRtKHJhbmdlKG51bV9wYWdlcykpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwUXVlcnklMjB3aXRoJTIwc3RhdGUlM0RhbGwlMjB0byUyMGdldCUyMGJvdGglMjBvcGVuJTIwYW5kJTIwY2xvc2VkJTIwaXNzdWVzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcXVlcnklMjAlM0QlMjBmJTIyaXNzdWVzJTNGcGFnZSUzRCU3QnBhZ2UlN0QlMjZwZXJfcGFnZSUzRCU3QnBlcl9wYWdlJTdEJTI2c3RhdGUlM0RhbGwlMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpc3N1ZXMlMjAlM0QlMjByZXF1ZXN0cy5nZXQoZiUyMiU3QmJhc2VfdXJsJTdEJTJGJTdCb3duZXIlN0QlMkYlN0JyZXBvJTdEJTJGJTdCcXVlcnklN0QlMjIlMkMlMjBoZWFkZXJzJTNEaGVhZGVycyklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBiYXRjaC5leHRlbmQoaXNzdWVzLmpzb24oKSklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMGxlbihiYXRjaCklMjAlM0UlMjByYXRlX2xpbWl0JTIwYW5kJTIwbGVuKGFsbF9pc3N1ZXMpJTIwJTNDJTIwbnVtX2lzc3VlcyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFsbF9pc3N1ZXMuZXh0ZW5kKGJhdGNoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGJhdGNoJTIwJTNEJTIwJTVCJTVEJTIwJTIwJTIzJTIwRmx1c2glMjBiYXRjaCUyMGZvciUyMG5leHQlMjB0aW1lJTIwcGVyaW9kJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHJpbnQoZiUyMlJlYWNoZWQlMjBHaXRIdWIlMjByYXRlJTIwbGltaXQuJTIwU2xlZXBpbmclMjBmb3IlMjBvbmUlMjBob3VyJTIwLi4uJTIyKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRpbWUuc2xlZXAoNjAlMjAqJTIwNjAlMjAlMkIlMjAxKSUwQSUwQSUyMCUyMCUyMCUyMGFsbF9pc3N1ZXMuZXh0ZW5kKGJhdGNoKSUwQSUyMCUyMCUyMCUyMGRmJTIwJTNEJTIwcGQuRGF0YUZyYW1lLmZyb21fcmVjb3JkcyhhbGxfaXNzdWVzKSUwQSUyMCUyMCUyMCUyMGRmLnRvX2pzb24oZiUyMiU3Qmlzc3Vlc19wYXRoJTdEJTJGJTdCcmVwbyU3RC1pc3N1ZXMuanNvbmwlMjIlMkMlMjBvcmllbnQlM0QlMjJyZWNvcmRzJTIyJTJDJTIwbGluZXMlM0RUcnVlKSUwQSUyMCUyMCUyMCUyMHByaW50KCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGYlMjJEb3dubG9hZGVkJTIwYWxsJTIwdGhlJTIwaXNzdWVzJTIwZm9yJTIwJTdCcmVwbyU3RCElMjBEYXRhc2V0JTIwc3RvcmVkJTIwYXQlMjAlN0Jpc3N1ZXNfcGF0aCU3RCUyRiU3QnJlcG8lN0QtaXNzdWVzLmpzb25sJTIyJTBBJTIwJTIwJTIwJTIwKQ==",highlighted:`<span class="hljs-keyword">import</span> time
<span class="hljs-keyword">import</span> math
<span class="hljs-keyword">from</span> pathlib <span class="hljs-keyword">import</span> Path
<span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd
<span class="hljs-keyword">from</span> tqdm.notebook <span class="hljs-keyword">import</span> tqdm
<span class="hljs-keyword">def</span> <span class="hljs-title function_">fetch_issues</span>(<span class="hljs-params">
owner=<span class="hljs-string">&quot;huggingface&quot;</span>,
repo=<span class="hljs-string">&quot;datasets&quot;</span>,
num_issues=<span class="hljs-number">10_000</span>,
rate_limit=<span class="hljs-number">5_000</span>,
issues_path=Path(<span class="hljs-params"><span class="hljs-string">&quot;.&quot;</span></span>),
</span>):
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> issues_path.is_dir():
issues_path.mkdir(exist_ok=<span class="hljs-literal">True</span>)
batch = []
all_issues = []
per_page = <span class="hljs-number">100</span> <span class="hljs-comment"># Number of issues to return per page</span>
num_pages = math.ceil(num_issues / per_page)
base_url = <span class="hljs-string">&quot;https://api.github.com/repos&quot;</span>
<span class="hljs-keyword">for</span> page <span class="hljs-keyword">in</span> tqdm(<span class="hljs-built_in">range</span>(num_pages)):
<span class="hljs-comment"># Query with state=all to get both open and closed issues</span>
query = <span class="hljs-string">f&quot;issues?page=<span class="hljs-subst">{page}</span>&amp;per_page=<span class="hljs-subst">{per_page}</span>&amp;state=all&quot;</span>
issues = requests.get(<span class="hljs-string">f&quot;<span class="hljs-subst">{base_url}</span>/<span class="hljs-subst">{owner}</span>/<span class="hljs-subst">{repo}</span>/<span class="hljs-subst">{query}</span>&quot;</span>, headers=headers)
batch.extend(issues.json())
<span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(batch) &gt; rate_limit <span class="hljs-keyword">and</span> <span class="hljs-built_in">len</span>(all_issues) &lt; num_issues:
all_issues.extend(batch)
batch = [] <span class="hljs-comment"># Flush batch for next time period</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Reached GitHub rate limit. Sleeping for one hour ...&quot;</span>)
time.sleep(<span class="hljs-number">60</span> * <span class="hljs-number">60</span> + <span class="hljs-number">1</span>)
all_issues.extend(batch)
df = pd.DataFrame.from_records(all_issues)
df.to_json(<span class="hljs-string">f&quot;<span class="hljs-subst">{issues_path}</span>/<span class="hljs-subst">{repo}</span>-issues.jsonl&quot;</span>, orient=<span class="hljs-string">&quot;records&quot;</span>, lines=<span class="hljs-literal">True</span>)
<span class="hljs-built_in">print</span>(
<span class="hljs-string">f&quot;Downloaded all the issues for <span class="hljs-subst">{repo}</span>! Dataset stored at <span class="hljs-subst">{issues_path}</span>/<span class="hljs-subst">{repo}</span>-issues.jsonl&quot;</span>
)`,wrap:!1}}),ts=new J({props:{code:"JTIzJTIwRGVwZW5kaW5nJTIwb24lMjB5b3VyJTIwaW50ZXJuZXQlMjBjb25uZWN0aW9uJTJDJTIwdGhpcyUyMGNhbiUyMHRha2UlMjBzZXZlcmFsJTIwbWludXRlcyUyMHRvJTIwcnVuLi4uJTBBZmV0Y2hfaXNzdWVzKCk=",highlighted:`<span class="hljs-comment"># Depending on your internet connection, this can take several minutes to run...</span>
fetch_issues()`,wrap:!1}}),os=new J({props:{code:"aXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyanNvbiUyMiUyQyUyMGRhdGFfZmlsZXMlM0QlMjJkYXRhc2V0cy1pc3N1ZXMuanNvbmwlMjIlMkMlMjBzcGxpdCUzRCUyMnRyYWluJTIyKSUwQWlzc3Vlc19kYXRhc2V0",highlighted:`issues_dataset = load_dataset(<span class="hljs-string">&quot;json&quot;</span>, data_files=<span class="hljs-string">&quot;datasets-issues.jsonl&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
issues_dataset`,wrap:!1}}),is=new J({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3RpbWVsaW5lX3VybCclMkMlMjAncGVyZm9ybWVkX3ZpYV9naXRodWJfYXBwJyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMzAxOSUwQSU3RCk=",highlighted:`Dataset({
features: [<span class="hljs-string">&#x27;url&#x27;</span>, <span class="hljs-string">&#x27;repository_url&#x27;</span>, <span class="hljs-string">&#x27;labels_url&#x27;</span>, <span class="hljs-string">&#x27;comments_url&#x27;</span>, <span class="hljs-string">&#x27;events_url&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>, <span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;node_id&#x27;</span>, <span class="hljs-string">&#x27;number&#x27;</span>, <span class="hljs-string">&#x27;title&#x27;</span>, <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;labels&#x27;</span>, <span class="hljs-string">&#x27;state&#x27;</span>, <span class="hljs-string">&#x27;locked&#x27;</span>, <span class="hljs-string">&#x27;assignee&#x27;</span>, <span class="hljs-string">&#x27;assignees&#x27;</span>, <span class="hljs-string">&#x27;milestone&#x27;</span>, <span class="hljs-string">&#x27;comments&#x27;</span>, <span class="hljs-string">&#x27;created_at&#x27;</span>, <span class="hljs-string">&#x27;updated_at&#x27;</span>, <span class="hljs-string">&#x27;closed_at&#x27;</span>, <span class="hljs-string">&#x27;author_association&#x27;</span>, <span class="hljs-string">&#x27;active_lock_reason&#x27;</span>, <span class="hljs-string">&#x27;pull_request&#x27;</span>, <span class="hljs-string">&#x27;body&#x27;</span>, <span class="hljs-string">&#x27;timeline_url&#x27;</span>, <span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>],
num_rows: <span class="hljs-number">3019</span>
})`,wrap:!1}}),ds=new ca({props:{title:"Limpando os dados",local:"limpando-os-dados",headingTag:"h2"}}),ms=new J({props:{code:"c2FtcGxlJTIwJTNEJTIwaXNzdWVzX2RhdGFzZXQuc2h1ZmZsZShzZWVkJTNENjY2KS5zZWxlY3QocmFuZ2UoMykpJTBBJTBBJTIzJTIwUHJpbnQlMjBvdXQlMjB0aGUlMjBVUkwlMjBhbmQlMjBwdWxsJTIwcmVxdWVzdCUyMGVudHJpZXMlMEFmb3IlMjB1cmwlMkMlMjBwciUyMGluJTIwemlwKHNhbXBsZSU1QiUyMmh0bWxfdXJsJTIyJTVEJTJDJTIwc2FtcGxlJTVCJTIycHVsbF9yZXF1ZXN0JTIyJTVEKSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjIlM0UlM0UlMjBVUkwlM0ElMjAlN0J1cmwlN0QlMjIpJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiUzRSUzRSUyMFB1bGwlMjByZXF1ZXN0JTNBJTIwJTdCcHIlN0QlNUNuJTIyKQ==",highlighted:`sample = issues_dataset.shuffle(seed=<span class="hljs-number">666</span>).select(<span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>))
<span class="hljs-comment"># Print out the URL and pull request entries</span>
<span class="hljs-keyword">for</span> url, pr <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(sample[<span class="hljs-string">&quot;html_url&quot;</span>], sample[<span class="hljs-string">&quot;pull_request&quot;</span>]):
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&gt;&gt; URL: <span class="hljs-subst">{url}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&gt;&gt; Pull request: <span class="hljs-subst">{pr}</span>\\n&quot;</span>)`,wrap:!1}}),Ms=new J({props:{code:"JTNFJTNFJTIwVVJMJTNBJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY4NTAlMEElM0UlM0UlMjBQdWxsJTIwcmVxdWVzdCUzQSUyMCU3Qid1cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZyZXBvcyUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxscyUyRjg1MCclMkMlMjAnaHRtbF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGODUwJyUyQyUyMCdkaWZmX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY4NTAuZGlmZiclMkMlMjAncGF0Y2hfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjg1MC5wYXRjaCclN0QlMEElMEElM0UlM0UlMjBVUkwlM0ElMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc3MyUwQSUzRSUzRSUyMFB1bGwlMjByZXF1ZXN0JTNBJTIwTm9uZSUwQSUwQSUzRSUzRSUyMFVSTCUzQSUyMGh0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGNzgzJTBBJTNFJTNFJTIwUHVsbCUyMHJlcXVlc3QlM0ElMjAlN0IndXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbHMlMkY3ODMnJTJDJTIwJ2h0bWxfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGcHVsbCUyRjc4MyclMkMlMjAnZGlmZl91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmh1Z2dpbmdmYWNlJTJGZGF0YXNldHMlMkZwdWxsJTJGNzgzLmRpZmYnJTJDJTIwJ3BhdGNoX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkY3ODMucGF0Y2gnJTdE",highlighted:`&gt;&gt; URL: https://github.com/huggingface/datasets/pull/<span class="hljs-number">850</span>
&gt;&gt; Pull request: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/850&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850&#x27;</span>, <span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850.diff&#x27;</span>, <span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/850.patch&#x27;</span>}
&gt;&gt; URL: https://github.com/huggingface/datasets/issues/<span class="hljs-number">2773</span>
&gt;&gt; Pull request: <span class="hljs-literal">None</span>
&gt;&gt; URL: https://github.com/huggingface/datasets/pull/<span class="hljs-number">783</span>
&gt;&gt; Pull request: {<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/pulls/783&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783&#x27;</span>, <span class="hljs-string">&#x27;diff_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783.diff&#x27;</span>, <span class="hljs-string">&#x27;patch_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/783.patch&#x27;</span>}`,wrap:!1}}),ys=new J({props:{code:"aXNzdWVzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmlzX3B1bGxfcmVxdWVzdCUyMiUzQSUyMEZhbHNlJTIwaWYlMjB4JTVCJTIycHVsbF9yZXF1ZXN0JTIyJTVEJTIwaXMlMjBOb25lJTIwZWxzZSUyMFRydWUlN0QlMEEp",highlighted:`issues_dataset = issues_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">&quot;is_pull_request&quot;</span>: <span class="hljs-literal">False</span> <span class="hljs-keyword">if</span> x[<span class="hljs-string">&quot;pull_request&quot;</span>] <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> <span class="hljs-keyword">else</span> <span class="hljs-literal">True</span>}
)`,wrap:!1}}),Us=new ca({props:{title:"Aumentando o conjunto de dados",local:"aumentando-o-conjunto-de-dados",headingTag:"h2"}}),gs=new J({props:{code:"aXNzdWVfbnVtYmVyJTIwJTNEJTIwMjc5MiUwQXVybCUyMCUzRCUyMGYlMjJodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGJTdCaXNzdWVfbnVtYmVyJTdEJTJGY29tbWVudHMlMjIlMEFyZXNwb25zZSUyMCUzRCUyMHJlcXVlc3RzLmdldCh1cmwlMkMlMjBoZWFkZXJzJTNEaGVhZGVycyklMEFyZXNwb25zZS5qc29uKCk=",highlighted:`issue_number = <span class="hljs-number">2792</span>
url = <span class="hljs-string">f&quot;https://api.github.com/repos/huggingface/datasets/issues/<span class="hljs-subst">{issue_number}</span>/comments&quot;</span>
response = requests.get(url, headers=headers)
response.json()`,wrap:!1}}),ws=new J({props:{code:"JTVCJTdCJ3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRmNvbW1lbnRzJTJGODk3NTk0MTI4JyUyQyUwQSUyMCUyMCdodG1sX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRnB1bGwlMkYyNzkyJTIzaXNzdWVjb21tZW50LTg5NzU5NDEyOCclMkMlMEElMjAlMjAnaXNzdWVfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGcmVwb3MlMkZodWdnaW5nZmFjZSUyRmRhdGFzZXRzJTJGaXNzdWVzJTJGMjc5MiclMkMlMEElMjAlMjAnaWQnJTNBJTIwODk3NTk0MTI4JTJDJTBBJTIwJTIwJ25vZGVfaWQnJTNBJTIwJ0lDX2t3RE9EdW56cHM0MWdETVEnJTJDJTBBJTIwJTIwJ3VzZXInJTNBJTIwJTdCJ2xvZ2luJyUzQSUyMCdiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnaWQnJTNBJTIwMTk3MTg4MTglMkMlMEElMjAlMjAlMjAnbm9kZV9pZCclM0ElMjAnTURRNlZYTmxjakU1TnpFNE9ERTQnJTJDJTBBJTIwJTIwJTIwJ2F2YXRhcl91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXZhdGFycy5naXRodWJ1c2VyY29udGVudC5jb20lMkZ1JTJGMTk3MTg4MTglM0Z2JTNENCclMkMlMEElMjAlMjAlMjAnZ3JhdmF0YXJfaWQnJTNBJTIwJyclMkMlMEElMjAlMjAlMjAndXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayclMkMlMEElMjAlMjAlMjAnaHRtbF91cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGZ2l0aHViLmNvbSUyRmJoYXZpdHZ5YW1hbGlrJyUyQyUwQSUyMCUyMCUyMCdmb2xsb3dlcnNfdXJsJyUzQSUyMCdodHRwcyUzQSUyRiUyRmFwaS5naXRodWIuY29tJTJGdXNlcnMlMkZiaGF2aXR2eWFtYWxpayUyRmZvbGxvd2VycyclMkMlMEElMjAlMjAlMjAnZm9sbG93aW5nX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZmb2xsb3dpbmclN0IlMkZvdGhlcl91c2VyJTdEJyUyQyUwQSUyMCUyMCUyMCdnaXN0c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZ2lzdHMlN0IlMkZnaXN0X2lkJTdEJyUyQyUwQSUyMCUyMCUyMCdzdGFycmVkX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZzdGFycmVkJTdCJTJGb3duZXIlN0QlN0IlMkZyZXBvJTdEJyUyQyUwQSUyMCUyMCUyMCdzdWJzY3JpcHRpb25zX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZzdWJzY3JpcHRpb25zJyUyQyUwQSUyMCUyMCUyMCdvcmdhbml6YXRpb25zX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZvcmdzJyUyQyUwQSUyMCUyMCUyMCdyZXBvc191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGcmVwb3MnJTJDJTBBJTIwJTIwJTIwJ2V2ZW50c191cmwnJTNBJTIwJ2h0dHBzJTNBJTJGJTJGYXBpLmdpdGh1Yi5jb20lMkZ1c2VycyUyRmJoYXZpdHZ5YW1hbGlrJTJGZXZlbnRzJTdCJTJGcHJpdmFjeSU3RCclMkMlMEElMjAlMjAlMjAncmVjZWl2ZWRfZXZlbnRzX3VybCclM0ElMjAnaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnVzZXJzJTJGYmhhdml0dnlhbWFsaWslMkZyZWNlaXZlZF9ldmVudHMnJTJDJTBBJTIwJTIwJTIwJ3R5cGUnJTNBJTIwJ1VzZXInJTJDJTBBJTIwJTIwJTIwJ3NpdGVfYWRtaW4nJTNBJTIwRmFsc2UlN0QlMkMlMEElMjAlMjAnY3JlYXRlZF9hdCclM0ElMjAnMjAyMS0wOC0xMlQxMiUzQTIxJTNBNTJaJyUyQyUwQSUyMCUyMCd1cGRhdGVkX2F0JyUzQSUyMCcyMDIxLTA4LTEyVDEyJTNBMzElM0ExN1onJTJDJTBBJTIwJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclM0ElMjAnQ09OVFJJQlVUT1InJTJDJTBBJTIwJTIwJ2JvZHknJTNBJTIwJTIyJTQwYWxiZXJ0dmlsbGFub3ZhJTIwbXklMjB0ZXN0cyUyMGFyZSUyMGZhaWxpbmclMjBoZXJlJTNBJTVDciU1Q24lNjAlNjAlNjAlNUNyJTVDbmRhdGFzZXRfbmFtZSUyMCUzRCUyMCdnb29hcSclNUNyJTVDbiU1Q3IlNUNuJTIwJTIwJTIwJTIwZGVmJTIwdGVzdF9sb2FkX2RhdGFzZXQoc2VsZiUyQyUyMGRhdGFzZXRfbmFtZSklM0ElNUNyJTVDbiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvbmZpZ3MlMjAlM0QlMjBzZWxmLmRhdGFzZXRfdGVzdGVyLmxvYWRfYWxsX2NvbmZpZ3MoZGF0YXNldF9uYW1lJTJDJTIwaXNfbG9jYWwlM0RUcnVlKSU1QiUzQTElNUQlNUNyJTVDbiUzRSUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGF0YXNldF90ZXN0ZXIuY2hlY2tfbG9hZF9kYXRhc2V0KGRhdGFzZXRfbmFtZSUyQyUyMGNvbmZpZ3MlMkMlMjBpc19sb2NhbCUzRFRydWUlMkMlMjB1c2VfbG9jYWxfZHVtbXlfZGF0YSUzRFRydWUpJTVDciU1Q24lNUNyJTVDbnRlc3RzJTJGdGVzdF9kYXRhc2V0X2NvbW1vbi5weSUzQTIzNCUzQSUyMCU1Q3IlNUNuXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMCU1Q3IlNUNudGVzdHMlMkZ0ZXN0X2RhdGFzZXRfY29tbW9uLnB5JTNBMTg3JTNBJTIwaW4lMjBjaGVja19sb2FkX2RhdGFzZXQlNUNyJTVDbiUyMCUyMCUyMCUyMHNlbGYucGFyZW50LmFzc2VydFRydWUobGVuKGRhdGFzZXQlNUJzcGxpdCU1RCklMjAlM0UlMjAwKSU1Q3IlNUNuRSUyMCUyMCUyMEFzc2VydGlvbkVycm9yJTNBJTIwRmFsc2UlMjBpcyUyMG5vdCUyMHRydWUlNUNyJTVDbiU2MCU2MCU2MCU1Q3IlNUNuV2hlbiUyMEklMjB0cnklMjBsb2FkaW5nJTIwZGF0YXNldCUyMG9uJTIwbG9jYWwlMjBtYWNoaW5lJTIwaXQlMjB3b3JrcyUyMGZpbmUuJTIwQW55JTIwc3VnZ2VzdGlvbnMlMjBvbiUyMGhvdyUyMGNhbiUyMEklMjBhdm9pZCUyMHRoaXMlMjBlcnJvciUzRiUyMiUyQyUwQSUyMCUyMCdwZXJmb3JtZWRfdmlhX2dpdGh1Yl9hcHAnJTNBJTIwTm9uZSU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/comments/897594128&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/huggingface/datasets/pull/2792#issuecomment-897594128&#x27;</span>,
<span class="hljs-string">&#x27;issue_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/repos/huggingface/datasets/issues/2792&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">897594128</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;IC_kwDODunzps41gDMQ&#x27;</span>,
<span class="hljs-string">&#x27;user&#x27;</span>: {<span class="hljs-string">&#x27;login&#x27;</span>: <span class="hljs-string">&#x27;bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-number">19718818</span>,
<span class="hljs-string">&#x27;node_id&#x27;</span>: <span class="hljs-string">&#x27;MDQ6VXNlcjE5NzE4ODE4&#x27;</span>,
<span class="hljs-string">&#x27;avatar_url&#x27;</span>: <span class="hljs-string">&#x27;https://avatars.githubusercontent.com/u/19718818?v=4&#x27;</span>,
<span class="hljs-string">&#x27;gravatar_id&#x27;</span>: <span class="hljs-string">&#x27;&#x27;</span>,
<span class="hljs-string">&#x27;url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;html_url&#x27;</span>: <span class="hljs-string">&#x27;https://github.com/bhavitvyamalik&#x27;</span>,
<span class="hljs-string">&#x27;followers_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/followers&#x27;</span>,
<span class="hljs-string">&#x27;following_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/following{/other_user}&#x27;</span>,
<span class="hljs-string">&#x27;gists_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/gists{/gist_id}&#x27;</span>,
<span class="hljs-string">&#x27;starred_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/starred{/owner}{/repo}&#x27;</span>,
<span class="hljs-string">&#x27;subscriptions_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/subscriptions&#x27;</span>,
<span class="hljs-string">&#x27;organizations_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/orgs&#x27;</span>,
<span class="hljs-string">&#x27;repos_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/repos&#x27;</span>,
<span class="hljs-string">&#x27;events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/events{/privacy}&#x27;</span>,
<span class="hljs-string">&#x27;received_events_url&#x27;</span>: <span class="hljs-string">&#x27;https://api.github.com/users/bhavitvyamalik/received_events&#x27;</span>,
<span class="hljs-string">&#x27;type&#x27;</span>: <span class="hljs-string">&#x27;User&#x27;</span>,
<span class="hljs-string">&#x27;site_admin&#x27;</span>: <span class="hljs-literal">False</span>},
<span class="hljs-string">&#x27;created_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:21:52Z&#x27;</span>,
<span class="hljs-string">&#x27;updated_at&#x27;</span>: <span class="hljs-string">&#x27;2021-08-12T12:31:17Z&#x27;</span>,
<span class="hljs-string">&#x27;author_association&#x27;</span>: <span class="hljs-string">&#x27;CONTRIBUTOR&#x27;</span>,
<span class="hljs-string">&#x27;body&#x27;</span>: <span class="hljs-string">&quot;@albertvillanova my tests are failing here:\\r\\n\`\`\`\\r\\ndataset_name = &#x27;gooaq&#x27;\\r\\n\\r\\n def test_load_dataset(self, dataset_name):\\r\\n configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True)[:1]\\r\\n&gt; self.dataset_tester.check_load_dataset(dataset_name, configs, is_local=True, use_local_dummy_data=True)\\r\\n\\r\\ntests/test_dataset_common.py:234: \\r\\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \\r\\ntests/test_dataset_common.py:187: in check_load_dataset\\r\\n self.parent.assertTrue(len(dataset[split]) &gt; 0)\\r\\nE AssertionError: False is not true\\r\\n\`\`\`\\r\\nWhen I try loading dataset on local machine it works fine. Any suggestions on how can I avoid this error?&quot;</span>,
<span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>: <span class="hljs-literal">None</span>}]`,wrap:!1}}),xs=new J({props:{code:"ZGVmJTIwZ2V0X2NvbW1lbnRzKGlzc3VlX251bWJlciklM0ElMEElMjAlMjAlMjAlMjB1cmwlMjAlM0QlMjBmJTIyaHR0cHMlM0ElMkYlMkZhcGkuZ2l0aHViLmNvbSUyRnJlcG9zJTJGaHVnZ2luZ2ZhY2UlMkZkYXRhc2V0cyUyRmlzc3VlcyUyRiU3Qmlzc3VlX251bWJlciU3RCUyRmNvbW1lbnRzJTIyJTBBJTIwJTIwJTIwJTIwcmVzcG9uc2UlMjAlM0QlMjByZXF1ZXN0cy5nZXQodXJsJTJDJTIwaGVhZGVycyUzRGhlYWRlcnMpJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTVCciU1QiUyMmJvZHklMjIlNUQlMjBmb3IlMjByJTIwaW4lMjByZXNwb25zZS5qc29uKCklNUQlMEElMEElMEElMjMlMjBUZXN0JTIwb3VyJTIwZnVuY3Rpb24lMjB3b3JrcyUyMGFzJTIwZXhwZWN0ZWQlMEFnZXRfY29tbWVudHMoMjc5Mik=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_comments</span>(<span class="hljs-params">issue_number</span>):
url = <span class="hljs-string">f&quot;https://api.github.com/repos/huggingface/datasets/issues/<span class="hljs-subst">{issue_number}</span>/comments&quot;</span>
response = requests.get(url, headers=headers)
<span class="hljs-keyword">return</span> [r[<span class="hljs-string">&quot;body&quot;</span>] <span class="hljs-keyword">for</span> r <span class="hljs-keyword">in</span> response.json()]
<span class="hljs-comment"># Test our function works as expected</span>
get_comments(<span class="hljs-number">2792</span>)`,wrap:!1}}),Zs=new J({props:{code:"JTVCJTIyJTQwYWxiZXJ0dmlsbGFub3ZhJTIwbXklMjB0ZXN0cyUyMGFyZSUyMGZhaWxpbmclMjBoZXJlJTNBJTVDciU1Q24lNjAlNjAlNjAlNUNyJTVDbmRhdGFzZXRfbmFtZSUyMCUzRCUyMCdnb29hcSclNUNyJTVDbiU1Q3IlNUNuJTIwJTIwJTIwJTIwZGVmJTIwdGVzdF9sb2FkX2RhdGFzZXQoc2VsZiUyQyUyMGRhdGFzZXRfbmFtZSklM0ElNUNyJTVDbiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGNvbmZpZ3MlMjAlM0QlMjBzZWxmLmRhdGFzZXRfdGVzdGVyLmxvYWRfYWxsX2NvbmZpZ3MoZGF0YXNldF9uYW1lJTJDJTIwaXNfbG9jYWwlM0RUcnVlKSU1QiUzQTElNUQlNUNyJTVDbiUzRSUyMCUyMCUyMCUyMCUyMCUyMCUyMHNlbGYuZGF0YXNldF90ZXN0ZXIuY2hlY2tfbG9hZF9kYXRhc2V0KGRhdGFzZXRfbmFtZSUyQyUyMGNvbmZpZ3MlMkMlMjBpc19sb2NhbCUzRFRydWUlMkMlMjB1c2VfbG9jYWxfZHVtbXlfZGF0YSUzRFRydWUpJTVDciU1Q24lNUNyJTVDbnRlc3RzJTJGdGVzdF9kYXRhc2V0X2NvbW1vbi5weSUzQTIzNCUzQSUyMCU1Q3IlNUNuXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMF8lMjBfJTIwXyUyMCU1Q3IlNUNudGVzdHMlMkZ0ZXN0X2RhdGFzZXRfY29tbW9uLnB5JTNBMTg3JTNBJTIwaW4lMjBjaGVja19sb2FkX2RhdGFzZXQlNUNyJTVDbiUyMCUyMCUyMCUyMHNlbGYucGFyZW50LmFzc2VydFRydWUobGVuKGRhdGFzZXQlNUJzcGxpdCU1RCklMjAlM0UlMjAwKSU1Q3IlNUNuRSUyMCUyMCUyMEFzc2VydGlvbkVycm9yJTNBJTIwRmFsc2UlMjBpcyUyMG5vdCUyMHRydWUlNUNyJTVDbiU2MCU2MCU2MCU1Q3IlNUNuV2hlbiUyMEklMjB0cnklMjBsb2FkaW5nJTIwZGF0YXNldCUyMG9uJTIwbG9jYWwlMjBtYWNoaW5lJTIwaXQlMjB3b3JrcyUyMGZpbmUuJTIwQW55JTIwc3VnZ2VzdGlvbnMlMjBvbiUyMGhvdyUyMGNhbiUyMEklMjBhdm9pZCUyMHRoaXMlMjBlcnJvciUzRiUyMiU1RA==",highlighted:'[<span class="hljs-string">&quot;@albertvillanova my tests are failing here:\\r\\n```\\r\\ndataset_name = &#x27;gooaq&#x27;\\r\\n\\r\\n def test_load_dataset(self, dataset_name):\\r\\n configs = self.dataset_tester.load_all_configs(dataset_name, is_local=True)[:1]\\r\\n&gt; self.dataset_tester.check_load_dataset(dataset_name, configs, is_local=True, use_local_dummy_data=True)\\r\\n\\r\\ntests/test_dataset_common.py:234: \\r\\n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \\r\\ntests/test_dataset_common.py:187: in check_load_dataset\\r\\n self.parent.assertTrue(len(dataset[split]) &gt; 0)\\r\\nE AssertionError: False is not true\\r\\n```\\r\\nWhen I try loading dataset on local machine it works fine. Any suggestions on how can I avoid this error?&quot;</span>]',wrap:!1}}),Rs=new J({props:{code:"JTIzJTIwRGVwZW5kaW5nJTIwb24lMjB5b3VyJTIwaW50ZXJuZXQlMjBjb25uZWN0aW9uJTJDJTIwdGhpcyUyMGNhbiUyMHRha2UlMjBhJTIwZmV3JTIwbWludXRlcy4uLiUwQWlzc3Vlc193aXRoX2NvbW1lbnRzX2RhdGFzZXQlMjAlM0QlMjBpc3N1ZXNfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIweCUzQSUyMCU3QiUyMmNvbW1lbnRzJTIyJTNBJTIwZ2V0X2NvbW1lbnRzKHglNUIlMjJudW1iZXIlMjIlNUQpJTdEJTBBKQ==",highlighted:`<span class="hljs-comment"># Depending on your internet connection, this can take a few minutes...</span>
issues_with_comments_dataset = issues_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">&quot;comments&quot;</span>: get_comments(x[<span class="hljs-string">&quot;number&quot;</span>])}
)`,wrap:!1}}),vs=new J({props:{code:"aXNzdWVzX3dpdGhfY29tbWVudHNfZGF0YXNldC50b19qc29uKCUyMmlzc3Vlcy1kYXRhc2V0cy13aXRoLWNvbW1lbnRzLmpzb25sJTIyKQ==",highlighted:'issues_with_comments_dataset.to_json(<span class="hljs-string">&quot;issues-datasets-with-comments.jsonl&quot;</span>)',wrap:!1}}),Cs=new ca({props:{title:"Carregando o conjunto de dados para o Hugging Face Hub",local:"carregando-o-conjunto-de-dados-para-o-hugging-face-hub",headingTag:"h2"}}),Xs=new bt({props:{id:"HaN6qCr_Afc"}}),Ns=new J({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGxpc3RfZGF0YXNldHMlMEElMEFhbGxfZGF0YXNldHMlMjAlM0QlMjBsaXN0X2RhdGFzZXRzKCklMEFwcmludChmJTIyTnVtYmVyJTIwb2YlMjBkYXRhc2V0cyUyMG9uJTIwSHViJTNBJTIwJTdCbGVuKGFsbF9kYXRhc2V0cyklN0QlMjIpJTBBcHJpbnQoYWxsX2RhdGFzZXRzJTVCMCU1RCk=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> list_datasets
all_datasets = list_datasets()
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Number of datasets on Hub: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(all_datasets)}</span>&quot;</span>)
<span class="hljs-built_in">print</span>(all_datasets[<span class="hljs-number">0</span>])`,wrap:!1}}),zs=new J({props:{code:"TnVtYmVyJTIwb2YlMjBkYXRhc2V0cyUyMG9uJTIwSHViJTNBJTIwMTQ4NyUwQURhdGFzZXQlMjBOYW1lJTNBJTIwYWNyb255bV9pZGVudGlmaWNhdGlvbiUyQyUyMFRhZ3MlM0ElMjAlNUInYW5ub3RhdGlvbnNfY3JlYXRvcnMlM0FleHBlcnQtZ2VuZXJhdGVkJyUyQyUyMCdsYW5ndWFnZV9jcmVhdG9ycyUzQWZvdW5kJyUyQyUyMCdsYW5ndWFnZXMlM0FlbiclMkMlMjAnbGljZW5zZXMlM0FtaXQnJTJDJTIwJ211bHRpbGluZ3VhbGl0eSUzQW1vbm9saW5ndWFsJyUyQyUyMCdzaXplX2NhdGVnb3JpZXMlM0ExMEslM0NuJTNDMTAwSyclMkMlMjAnc291cmNlX2RhdGFzZXRzJTNBb3JpZ2luYWwnJTJDJTIwJ3Rhc2tfY2F0ZWdvcmllcyUzQXN0cnVjdHVyZS1wcmVkaWN0aW9uJyUyQyUyMCd0YXNrX2lkcyUzQXN0cnVjdHVyZS1wcmVkaWN0aW9uLW90aGVyLWFjcm9ueW0taWRlbnRpZmljYXRpb24nJTVE",highlighted:`Number of datasets on Hub: <span class="hljs-number">1487</span>
Dataset Name: acronym_identification, Tags: [<span class="hljs-string">&#x27;annotations_creators:expert-generated&#x27;</span>, <span class="hljs-string">&#x27;language_creators:found&#x27;</span>, <span class="hljs-string">&#x27;languages:en&#x27;</span>, <span class="hljs-string">&#x27;licenses:mit&#x27;</span>, <span class="hljs-string">&#x27;multilinguality:monolingual&#x27;</span>, <span class="hljs-string">&#x27;size_categories:10K&lt;n&lt;100K&#x27;</span>, <span class="hljs-string">&#x27;source_datasets:original&#x27;</span>, <span class="hljs-string">&#x27;task_categories:structure-prediction&#x27;</span>, <span class="hljs-string">&#x27;task_ids:structure-prediction-other-acronym-identification&#x27;</span>]`,wrap:!1}}),Bs=new J({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMG5vdGVib29rX2xvZ2luJTBBJTBBbm90ZWJvb2tfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
notebook_login()`,wrap:!1}}),Ws=new J({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:"huggingface-cli login",wrap:!1}}),Qs=new J({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGNyZWF0ZV9yZXBvJTBBJTBBcmVwb191cmwlMjAlM0QlMjBjcmVhdGVfcmVwbyhuYW1lJTNEJTIyZ2l0aHViLWlzc3VlcyUyMiUyQyUyMHJlcG9fdHlwZSUzRCUyMmRhdGFzZXQlMjIpJTBBcmVwb191cmw=",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> create_repo
repo_url = create_repo(name=<span class="hljs-string">&quot;github-issues&quot;</span>, repo_type=<span class="hljs-string">&quot;dataset&quot;</span>)
repo_url`,wrap:!1}}),Ys=new J({props:{code:"J2h0dHBzJTNBJTJGJTJGaHVnZ2luZ2ZhY2UuY28lMkZkYXRhc2V0cyUyRmxld3R1biUyRmdpdGh1Yi1pc3N1ZXMn",highlighted:'<span class="hljs-string">&#x27;https://huggingface.co/datasets/lewtun/github-issues&#x27;</span>',wrap:!1}}),Ss=new J({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMFJlcG9zaXRvcnklMEElMEFyZXBvJTIwJTNEJTIwUmVwb3NpdG9yeShsb2NhbF9kaXIlM0QlMjJnaXRodWItaXNzdWVzJTIyJTJDJTIwY2xvbmVfZnJvbSUzRHJlcG9fdXJsKSUwQSFjcCUyMGRhdGFzZXRzLWlzc3Vlcy13aXRoLWNvbW1lbnRzLmpzb25sJTIwZ2l0aHViLWlzc3VlcyUyRg==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> Repository
repo = Repository(local_dir=<span class="hljs-string">&quot;github-issues&quot;</span>, clone_from=repo_url)
!cp datasets-issues-<span class="hljs-keyword">with</span>-comments.jsonl github-issues/`,wrap:!1}}),qs=new J({props:{code:"cmVwby5sZnNfdHJhY2soJTIyKi5qc29ubCUyMik=",highlighted:'repo.lfs_track(<span class="hljs-string">&quot;*.jsonl&quot;</span>)',wrap:!1}}),Ds=new J({props:{code:"cmVwby5wdXNoX3RvX2h1Yigp",highlighted:"repo.push_to_hub()",wrap:!1}}),Ks=new J({props:{code:"cmVtb3RlX2RhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIybGV3dHVuJTJGZ2l0aHViLWlzc3VlcyUyMiUyQyUyMHNwbGl0JTNEJTIydHJhaW4lMjIpJTBBcmVtb3RlX2RhdGFzZXQ=",highlighted:`remote_dataset = load_dataset(<span class="hljs-string">&quot;lewtun/github-issues&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)
remote_dataset`,wrap:!1}}),sa=new J({props:{code:"RGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1Qid1cmwnJTJDJTIwJ3JlcG9zaXRvcnlfdXJsJyUyQyUyMCdsYWJlbHNfdXJsJyUyQyUyMCdjb21tZW50c191cmwnJTJDJTIwJ2V2ZW50c191cmwnJTJDJTIwJ2h0bWxfdXJsJyUyQyUyMCdpZCclMkMlMjAnbm9kZV9pZCclMkMlMjAnbnVtYmVyJyUyQyUyMCd0aXRsZSclMkMlMjAndXNlciclMkMlMjAnbGFiZWxzJyUyQyUyMCdzdGF0ZSclMkMlMjAnbG9ja2VkJyUyQyUyMCdhc3NpZ25lZSclMkMlMjAnYXNzaWduZWVzJyUyQyUyMCdtaWxlc3RvbmUnJTJDJTIwJ2NvbW1lbnRzJyUyQyUyMCdjcmVhdGVkX2F0JyUyQyUyMCd1cGRhdGVkX2F0JyUyQyUyMCdjbG9zZWRfYXQnJTJDJTIwJ2F1dGhvcl9hc3NvY2lhdGlvbiclMkMlMjAnYWN0aXZlX2xvY2tfcmVhc29uJyUyQyUyMCdwdWxsX3JlcXVlc3QnJTJDJTIwJ2JvZHknJTJDJTIwJ3BlcmZvcm1lZF92aWFfZ2l0aHViX2FwcCclMkMlMjAnaXNfcHVsbF9yZXF1ZXN0JyU1RCUyQyUwQSUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjg1NSUwQSU3RCk=",highlighted:`Dataset({
features: [<span class="hljs-string">&#x27;url&#x27;</span>, <span class="hljs-string">&#x27;repository_url&#x27;</span>, <span class="hljs-string">&#x27;labels_url&#x27;</span>, <span class="hljs-string">&#x27;comments_url&#x27;</span>, <span class="hljs-string">&#x27;events_url&#x27;</span>, <span class="hljs-string">&#x27;html_url&#x27;</span>, <span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;node_id&#x27;</span>, <span class="hljs-string">&#x27;number&#x27;</span>, <span class="hljs-string">&#x27;title&#x27;</span>, <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;labels&#x27;</span>, <span class="hljs-string">&#x27;state&#x27;</span>, <span class="hljs-string">&#x27;locked&#x27;</span>, <span class="hljs-string">&#x27;assignee&#x27;</span>, <span class="hljs-string">&#x27;assignees&#x27;</span>, <span class="hljs-string">&#x27;milestone&#x27;</span>, <span class="hljs-string">&#x27;comments&#x27;</span>, <span class="hljs-string">&#x27;created_at&#x27;</span>, <span class="hljs-string">&#x27;updated_at&#x27;</span>, <span class="hljs-string">&#x27;closed_at&#x27;</span>, <span class="hljs-string">&#x27;author_association&#x27;</span>, <span class="hljs-string">&#x27;active_lock_reason&#x27;</span>, <span class="hljs-string">&#x27;pull_request&#x27;</span>, <span class="hljs-string">&#x27;body&#x27;</span>, <span class="hljs-string">&#x27;performed_via_github_app&#x27;</span>, <span class="hljs-string">&#x27;is_pull_request&#x27;</span>],
num_rows: <span class="hljs-number">2855</span>
})`,wrap:!1}}),ea=new ca({props:{title:"Criando um cartão do datasets",local:"criando-um-cartão-do-datasets",headingTag:"h2"}}),pa=new jt({props:{source:"https://github.com/huggingface/course/blob/main/chapters/pt/chapter5/5.mdx"}}),{c(){h=o("meta"),ua=t(),ra=o("p"),ma=t(),c(X.$$.fragment),Ma=t(),c(k.$$.fragment),Ja=t(),c(N.$$.fragment),ya=t(),z=o("p"),z.innerHTML=il,ha=t(),V=o("ul"),V.innerHTML=pl,Ta=t(),_=o("p"),_.textContent=cl,Ua=t(),c(B.$$.fragment),ja=t(),H=o("p"),H.innerHTML=rl,ba=t(),T=o("div"),T.innerHTML=dl,ga=t(),W=o("p"),W.textContent=ul,wa=t(),U=o("div"),U.innerHTML=ml,fa=t(),F=o("p"),F.innerHTML=Ml,xa=t(),Q=o("p"),Q.innerHTML=Jl,Za=t(),c(Y.$$.fragment),Ga=t(),$=o("p"),$.innerHTML=yl,Ra=t(),c(E.$$.fragment),Ia=t(),S=o("p"),S.innerHTML=hl,va=t(),c(A.$$.fragment),Ca=t(),c(q.$$.fragment),Xa=t(),L=o("p"),L.innerHTML=Tl,ka=t(),c(D.$$.fragment),Na=t(),c(P.$$.fragment),za=t(),O=o("p"),O.innerHTML=Ul,Va=t(),j=o("blockquote"),j.innerHTML=jl,_a=t(),K=o("p"),K.innerHTML=bl,Ba=t(),c(ss.$$.fragment),Ha=t(),b=o("blockquote"),b.innerHTML=gl,Wa=t(),as=o("p"),as.textContent=wl,Fa=t(),c(es.$$.fragment),Qa=t(),ls=o("p"),ls.innerHTML=fl,Ya=t(),c(ts.$$.fragment),$a=t(),ns=o("p"),ns.innerHTML=xl,Ea=t(),c(os.$$.fragment),Sa=t(),c(is.$$.fragment),Aa=t(),ps=o("p"),ps.innerHTML=Zl,qa=t(),cs=o("blockquote"),cs.innerHTML=Gl,La=t(),rs=o("p"),rs.textContent=Rl,Da=t(),c(ds.$$.fragment),Pa=t(),us=o("p"),us.innerHTML=Il,Oa=t(),c(ms.$$.fragment),Ka=t(),c(Ms.$$.fragment),se=t(),Js=o("p"),Js.innerHTML=vl,ae=t(),c(ys.$$.fragment),ee=t(),g=o("blockquote"),g.innerHTML=Cl,le=t(),hs=o("p"),hs.textContent=Xl,te=t(),Ts=o("p"),Ts.textContent=kl,ne=t(),c(Us.$$.fragment),oe=t(),js=o("p"),js.textContent=Nl,ie=t(),w=o("div"),w.innerHTML=zl,pe=t(),bs=o("p"),bs.innerHTML=Vl,ce=t(),c(gs.$$.fragment),re=t(),c(ws.$$.fragment),de=t(),fs=o("p"),fs.innerHTML=_l,ue=t(),c(xs.$$.fragment),me=t(),c(Zs.$$.fragment),Me=t(),Gs=o("p"),Gs.innerHTML=Bl,Je=t(),c(Rs.$$.fragment),ye=t(),Is=o("p"),Is.textContent=Hl,he=t(),c(vs.$$.fragment),Te=t(),c(Cs.$$.fragment),Ue=t(),c(Xs.$$.fragment),je=t(),ks=o("p"),ks.innerHTML=Wl,be=t(),c(Ns.$$.fragment),ge=t(),c(zs.$$.fragment),we=t(),Vs=o("p"),Vs.innerHTML=Fl,fe=t(),_s=o("p"),_s.innerHTML=Ql,xe=t(),c(Bs.$$.fragment),Ze=t(),Hs=o("p"),Hs.innerHTML=Yl,Ge=t(),c(Ws.$$.fragment),Re=t(),Fs=o("p"),Fs.innerHTML=$l,Ie=t(),c(Qs.$$.fragment),ve=t(),c(Ys.$$.fragment),Ce=t(),$s=o("p"),$s.innerHTML=El,Xe=t(),f=o("blockquote"),f.innerHTML=Sl,ke=t(),Es=o("p"),Es.innerHTML=Al,Ne=t(),c(Ss.$$.fragment),ze=t(),As=o("p"),As.innerHTML=ql,Ve=t(),c(qs.$$.fragment),_e=t(),Ls=o("p"),Ls.innerHTML=Ll,Be=t(),c(Ds.$$.fragment),He=t(),Ps=o("p"),Ps.innerHTML=Dl,We=t(),x=o("div"),x.innerHTML=Pl,Fe=t(),Os=o("p"),Os.innerHTML=Ol,Qe=t(),c(Ks.$$.fragment),Ye=t(),c(sa.$$.fragment),$e=t(),aa=o("p"),aa.innerHTML=Kl,Ee=t(),Z=o("blockquote"),Z.innerHTML=st,Se=t(),c(ea.$$.fragment),Ae=t(),la=o("p"),la.textContent=at,qe=t(),ta=o("p"),ta.innerHTML=et,Le=t(),na=o("ol"),na.innerHTML=lt,De=t(),G=o("div"),G.innerHTML=tt,Pe=t(),R=o("ol"),R.innerHTML=nt,Oe=t(),oa=o("p"),oa.innerHTML=ot,Ke=t(),I=o("div"),I.innerHTML=it,sl=t(),v=o("blockquote"),v.innerHTML=pt,al=t(),ia=o("p"),ia.textContent=ct,el=t(),C=o("blockquote"),C.innerHTML=rt,ll=t(),c(pa.$$.fragment),tl=t(),da=o("p"),this.h()},l(s){const a=ht("svelte-u9bgzb",document.head);h=i(a,"META",{name:!0,content:!0}),a.forEach(e),ua=n(s),ra=i(s,"P",{}),dt(ra).forEach(e),ma=n(s),r(X.$$.fragment,s),Ma=n(s),r(k.$$.fragment,s),Ja=n(s),r(N.$$.fragment,s),ya=n(s),z=i(s,"P",{"data-svelte-h":!0}),p(z)!=="svelte-qzjzzu"&&(z.innerHTML=il),ha=n(s),V=i(s,"UL",{"data-svelte-h":!0}),p(V)!=="svelte-13fgdbo"&&(V.innerHTML=pl),Ta=n(s),_=i(s,"P",{"data-svelte-h":!0}),p(_)!=="svelte-1t92jpo"&&(_.textContent=cl),Ua=n(s),r(B.$$.fragment,s),ja=n(s),H=i(s,"P",{"data-svelte-h":!0}),p(H)!=="svelte-724jk"&&(H.innerHTML=rl),ba=n(s),T=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(T)!=="svelte-1htetkm"&&(T.innerHTML=dl),ga=n(s),W=i(s,"P",{"data-svelte-h":!0}),p(W)!=="svelte-1abvgj5"&&(W.textContent=ul),wa=n(s),U=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(U)!=="svelte-1jsgvzc"&&(U.innerHTML=ml),fa=n(s),F=i(s,"P",{"data-svelte-h":!0}),p(F)!=="svelte-jyvc3q"&&(F.innerHTML=Ml),xa=n(s),Q=i(s,"P",{"data-svelte-h":!0}),p(Q)!=="svelte-2cc0nx"&&(Q.innerHTML=Jl),Za=n(s),r(Y.$$.fragment,s),Ga=n(s),$=i(s,"P",{"data-svelte-h":!0}),p($)!=="svelte-nc35xk"&&($.innerHTML=yl),Ra=n(s),r(E.$$.fragment,s),Ia=n(s),S=i(s,"P",{"data-svelte-h":!0}),p(S)!=="svelte-1ag02sh"&&(S.innerHTML=hl),va=n(s),r(A.$$.fragment,s),Ca=n(s),r(q.$$.fragment,s),Xa=n(s),L=i(s,"P",{"data-svelte-h":!0}),p(L)!=="svelte-1t5kbkx"&&(L.innerHTML=Tl),ka=n(s),r(D.$$.fragment,s),Na=n(s),r(P.$$.fragment,s),za=n(s),O=i(s,"P",{"data-svelte-h":!0}),p(O)!=="svelte-15zd9ok"&&(O.innerHTML=Ul),Va=n(s),j=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(j)!=="svelte-d1d1zw"&&(j.innerHTML=jl),_a=n(s),K=i(s,"P",{"data-svelte-h":!0}),p(K)!=="svelte-nznw16"&&(K.innerHTML=bl),Ba=n(s),r(ss.$$.fragment,s),Ha=n(s),b=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(b)!=="svelte-p1p1gu"&&(b.innerHTML=gl),Wa=n(s),as=i(s,"P",{"data-svelte-h":!0}),p(as)!=="svelte-1u5zgji"&&(as.textContent=wl),Fa=n(s),r(es.$$.fragment,s),Qa=n(s),ls=i(s,"P",{"data-svelte-h":!0}),p(ls)!=="svelte-1enlzi5"&&(ls.innerHTML=fl),Ya=n(s),r(ts.$$.fragment,s),$a=n(s),ns=i(s,"P",{"data-svelte-h":!0}),p(ns)!=="svelte-16hcfk7"&&(ns.innerHTML=xl),Ea=n(s),r(os.$$.fragment,s),Sa=n(s),r(is.$$.fragment,s),Aa=n(s),ps=i(s,"P",{"data-svelte-h":!0}),p(ps)!=="svelte-97971k"&&(ps.innerHTML=Zl),qa=n(s),cs=i(s,"BLOCKQUOTE",{"data-svelte-h":!0}),p(cs)!=="svelte-127vg01"&&(cs.innerHTML=Gl),La=n(s),rs=i(s,"P",{"data-svelte-h":!0}),p(rs)!=="svelte-1uym4u1"&&(rs.textContent=Rl),Da=n(s),r(ds.$$.fragment,s),Pa=n(s),us=i(s,"P",{"data-svelte-h":!0}),p(us)!=="svelte-r1iiw0"&&(us.innerHTML=Il),Oa=n(s),r(ms.$$.fragment,s),Ka=n(s),r(Ms.$$.fragment,s),se=n(s),Js=i(s,"P",{"data-svelte-h":!0}),p(Js)!=="svelte-z4trhs"&&(Js.innerHTML=vl),ae=n(s),r(ys.$$.fragment,s),ee=n(s),g=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(g)!=="svelte-7lxoiz"&&(g.innerHTML=Cl),le=n(s),hs=i(s,"P",{"data-svelte-h":!0}),p(hs)!=="svelte-58dtlq"&&(hs.textContent=Xl),te=n(s),Ts=i(s,"P",{"data-svelte-h":!0}),p(Ts)!=="svelte-p477r1"&&(Ts.textContent=kl),ne=n(s),r(Us.$$.fragment,s),oe=n(s),js=i(s,"P",{"data-svelte-h":!0}),p(js)!=="svelte-18m93nm"&&(js.textContent=Nl),ie=n(s),w=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(w)!=="svelte-1fxxwaz"&&(w.innerHTML=zl),pe=n(s),bs=i(s,"P",{"data-svelte-h":!0}),p(bs)!=="svelte-1aq93c6"&&(bs.innerHTML=Vl),ce=n(s),r(gs.$$.fragment,s),re=n(s),r(ws.$$.fragment,s),de=n(s),fs=i(s,"P",{"data-svelte-h":!0}),p(fs)!=="svelte-1m47af7"&&(fs.innerHTML=_l),ue=n(s),r(xs.$$.fragment,s),me=n(s),r(Zs.$$.fragment,s),Me=n(s),Gs=i(s,"P",{"data-svelte-h":!0}),p(Gs)!=="svelte-an9xq1"&&(Gs.innerHTML=Bl),Je=n(s),r(Rs.$$.fragment,s),ye=n(s),Is=i(s,"P",{"data-svelte-h":!0}),p(Is)!=="svelte-inkiuv"&&(Is.textContent=Hl),he=n(s),r(vs.$$.fragment,s),Te=n(s),r(Cs.$$.fragment,s),Ue=n(s),r(Xs.$$.fragment,s),je=n(s),ks=i(s,"P",{"data-svelte-h":!0}),p(ks)!=="svelte-1pq8ng9"&&(ks.innerHTML=Wl),be=n(s),r(Ns.$$.fragment,s),ge=n(s),r(zs.$$.fragment,s),we=n(s),Vs=i(s,"P",{"data-svelte-h":!0}),p(Vs)!=="svelte-elk9an"&&(Vs.innerHTML=Fl),fe=n(s),_s=i(s,"P",{"data-svelte-h":!0}),p(_s)!=="svelte-mwq422"&&(_s.innerHTML=Ql),xe=n(s),r(Bs.$$.fragment,s),Ze=n(s),Hs=i(s,"P",{"data-svelte-h":!0}),p(Hs)!=="svelte-3ay613"&&(Hs.innerHTML=Yl),Ge=n(s),r(Ws.$$.fragment,s),Re=n(s),Fs=i(s,"P",{"data-svelte-h":!0}),p(Fs)!=="svelte-1sk1cve"&&(Fs.innerHTML=$l),Ie=n(s),r(Qs.$$.fragment,s),ve=n(s),r(Ys.$$.fragment,s),Ce=n(s),$s=i(s,"P",{"data-svelte-h":!0}),p($s)!=="svelte-1b7m61j"&&($s.innerHTML=El),Xe=n(s),f=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(f)!=="svelte-1u5lplr"&&(f.innerHTML=Sl),ke=n(s),Es=i(s,"P",{"data-svelte-h":!0}),p(Es)!=="svelte-ksirvg"&&(Es.innerHTML=Al),Ne=n(s),r(Ss.$$.fragment,s),ze=n(s),As=i(s,"P",{"data-svelte-h":!0}),p(As)!=="svelte-946zbw"&&(As.innerHTML=ql),Ve=n(s),r(qs.$$.fragment,s),_e=n(s),Ls=i(s,"P",{"data-svelte-h":!0}),p(Ls)!=="svelte-uq1g87"&&(Ls.innerHTML=Ll),Be=n(s),r(Ds.$$.fragment,s),He=n(s),Ps=i(s,"P",{"data-svelte-h":!0}),p(Ps)!=="svelte-ov0i84"&&(Ps.innerHTML=Dl),We=n(s),x=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(x)!=="svelte-18puw29"&&(x.innerHTML=Pl),Fe=n(s),Os=i(s,"P",{"data-svelte-h":!0}),p(Os)!=="svelte-1v7sua7"&&(Os.innerHTML=Ol),Qe=n(s),r(Ks.$$.fragment,s),Ye=n(s),r(sa.$$.fragment,s),$e=n(s),aa=i(s,"P",{"data-svelte-h":!0}),p(aa)!=="svelte-zqtc7l"&&(aa.innerHTML=Kl),Ee=n(s),Z=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(Z)!=="svelte-fv7hit"&&(Z.innerHTML=st),Se=n(s),r(ea.$$.fragment,s),Ae=n(s),la=i(s,"P",{"data-svelte-h":!0}),p(la)!=="svelte-dylmg9"&&(la.textContent=at),qe=n(s),ta=i(s,"P",{"data-svelte-h":!0}),p(ta)!=="svelte-17tg70u"&&(ta.innerHTML=et),Le=n(s),na=i(s,"OL",{"data-svelte-h":!0}),p(na)!=="svelte-gkbv46"&&(na.innerHTML=lt),De=n(s),G=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(G)!=="svelte-1gqifn5"&&(G.innerHTML=tt),Pe=n(s),R=i(s,"OL",{start:!0,"data-svelte-h":!0}),p(R)!=="svelte-1u3mfkz"&&(R.innerHTML=nt),Oe=n(s),oa=i(s,"P",{"data-svelte-h":!0}),p(oa)!=="svelte-1tv90dr"&&(oa.innerHTML=ot),Ke=n(s),I=i(s,"DIV",{class:!0,"data-svelte-h":!0}),p(I)!=="svelte-ct1wn8"&&(I.innerHTML=it),sl=n(s),v=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(v)!=="svelte-1w4xzxq"&&(v.innerHTML=pt),al=n(s),ia=i(s,"P",{"data-svelte-h":!0}),p(ia)!=="svelte-k6tkf8"&&(ia.textContent=ct),el=n(s),C=i(s,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),p(C)!=="svelte-1txz2a0"&&(C.innerHTML=rt),ll=n(s),r(pa.$$.fragment,s),tl=n(s),da=i(s,"P",{}),dt(da).forEach(e),this.h()},h(){y(h,"name","hf:doc:metadata"),y(h,"content",ft),y(T,"class","flex justify-center"),y(U,"class","flex justify-center"),y(j,"class","tip"),y(b,"class","warning"),y(g,"class","tip"),y(w,"class","flex justify-center"),y(f,"class","tip"),y(x,"class","flex justify-center"),y(Z,"class","tip"),y(G,"class","flex justify-center"),y(R,"start","2"),y(I,"class","flex justify-center"),y(v,"class","tip"),y(C,"class","tip")},m(s,a){Tt(document.head,h),l(s,ua,a),l(s,ra,a),l(s,ma,a),d(X,s,a),l(s,Ma,a),d(k,s,a),l(s,Ja,a),d(N,s,a),l(s,ya,a),l(s,z,a),l(s,ha,a),l(s,V,a),l(s,Ta,a),l(s,_,a),l(s,Ua,a),d(B,s,a),l(s,ja,a),l(s,H,a),l(s,ba,a),l(s,T,a),l(s,ga,a),l(s,W,a),l(s,wa,a),l(s,U,a),l(s,fa,a),l(s,F,a),l(s,xa,a),l(s,Q,a),l(s,Za,a),d(Y,s,a),l(s,Ga,a),l(s,$,a),l(s,Ra,a),d(E,s,a),l(s,Ia,a),l(s,S,a),l(s,va,a),d(A,s,a),l(s,Ca,a),d(q,s,a),l(s,Xa,a),l(s,L,a),l(s,ka,a),d(D,s,a),l(s,Na,a),d(P,s,a),l(s,za,a),l(s,O,a),l(s,Va,a),l(s,j,a),l(s,_a,a),l(s,K,a),l(s,Ba,a),d(ss,s,a),l(s,Ha,a),l(s,b,a),l(s,Wa,a),l(s,as,a),l(s,Fa,a),d(es,s,a),l(s,Qa,a),l(s,ls,a),l(s,Ya,a),d(ts,s,a),l(s,$a,a),l(s,ns,a),l(s,Ea,a),d(os,s,a),l(s,Sa,a),d(is,s,a),l(s,Aa,a),l(s,ps,a),l(s,qa,a),l(s,cs,a),l(s,La,a),l(s,rs,a),l(s,Da,a),d(ds,s,a),l(s,Pa,a),l(s,us,a),l(s,Oa,a),d(ms,s,a),l(s,Ka,a),d(Ms,s,a),l(s,se,a),l(s,Js,a),l(s,ae,a),d(ys,s,a),l(s,ee,a),l(s,g,a),l(s,le,a),l(s,hs,a),l(s,te,a),l(s,Ts,a),l(s,ne,a),d(Us,s,a),l(s,oe,a),l(s,js,a),l(s,ie,a),l(s,w,a),l(s,pe,a),l(s,bs,a),l(s,ce,a),d(gs,s,a),l(s,re,a),d(ws,s,a),l(s,de,a),l(s,fs,a),l(s,ue,a),d(xs,s,a),l(s,me,a),d(Zs,s,a),l(s,Me,a),l(s,Gs,a),l(s,Je,a),d(Rs,s,a),l(s,ye,a),l(s,Is,a),l(s,he,a),d(vs,s,a),l(s,Te,a),d(Cs,s,a),l(s,Ue,a),d(Xs,s,a),l(s,je,a),l(s,ks,a),l(s,be,a),d(Ns,s,a),l(s,ge,a),d(zs,s,a),l(s,we,a),l(s,Vs,a),l(s,fe,a),l(s,_s,a),l(s,xe,a),d(Bs,s,a),l(s,Ze,a),l(s,Hs,a),l(s,Ge,a),d(Ws,s,a),l(s,Re,a),l(s,Fs,a),l(s,Ie,a),d(Qs,s,a),l(s,ve,a),d(Ys,s,a),l(s,Ce,a),l(s,$s,a),l(s,Xe,a),l(s,f,a),l(s,ke,a),l(s,Es,a),l(s,Ne,a),d(Ss,s,a),l(s,ze,a),l(s,As,a),l(s,Ve,a),d(qs,s,a),l(s,_e,a),l(s,Ls,a),l(s,Be,a),d(Ds,s,a),l(s,He,a),l(s,Ps,a),l(s,We,a),l(s,x,a),l(s,Fe,a),l(s,Os,a),l(s,Qe,a),d(Ks,s,a),l(s,Ye,a),d(sa,s,a),l(s,$e,a),l(s,aa,a),l(s,Ee,a),l(s,Z,a),l(s,Se,a),d(ea,s,a),l(s,Ae,a),l(s,la,a),l(s,qe,a),l(s,ta,a),l(s,Le,a),l(s,na,a),l(s,De,a),l(s,G,a),l(s,Pe,a),l(s,R,a),l(s,Oe,a),l(s,oa,a),l(s,Ke,a),l(s,I,a),l(s,sl,a),l(s,v,a),l(s,al,a),l(s,ia,a),l(s,el,a),l(s,C,a),l(s,ll,a),d(pa,s,a),l(s,tl,a),l(s,da,a),nl=!0},p:mt,i(s){nl||(u(X.$$.fragment,s),u(k.$$.fragment,s),u(N.$$.fragment,s),u(B.$$.fragment,s),u(Y.$$.fragment,s),u(E.$$.fragment,s),u(A.$$.fragment,s),u(q.$$.fragment,s),u(D.$$.fragment,s),u(P.$$.fragment,s),u(ss.$$.fragment,s),u(es.$$.fragment,s),u(ts.$$.fragment,s),u(os.$$.fragment,s),u(is.$$.fragment,s),u(ds.$$.fragment,s),u(ms.$$.fragment,s),u(Ms.$$.fragment,s),u(ys.$$.fragment,s),u(Us.$$.fragment,s),u(gs.$$.fragment,s),u(ws.$$.fragment,s),u(xs.$$.fragment,s),u(Zs.$$.fragment,s),u(Rs.$$.fragment,s),u(vs.$$.fragment,s),u(Cs.$$.fragment,s),u(Xs.$$.fragment,s),u(Ns.$$.fragment,s),u(zs.$$.fragment,s),u(Bs.$$.fragment,s),u(Ws.$$.fragment,s),u(Qs.$$.fragment,s),u(Ys.$$.fragment,s),u(Ss.$$.fragment,s),u(qs.$$.fragment,s),u(Ds.$$.fragment,s),u(Ks.$$.fragment,s),u(sa.$$.fragment,s),u(ea.$$.fragment,s),u(pa.$$.fragment,s),nl=!0)},o(s){m(X.$$.fragment,s),m(k.$$.fragment,s),m(N.$$.fragment,s),m(B.$$.fragment,s),m(Y.$$.fragment,s),m(E.$$.fragment,s),m(A.$$.fragment,s),m(q.$$.fragment,s),m(D.$$.fragment,s),m(P.$$.fragment,s),m(ss.$$.fragment,s),m(es.$$.fragment,s),m(ts.$$.fragment,s),m(os.$$.fragment,s),m(is.$$.fragment,s),m(ds.$$.fragment,s),m(ms.$$.fragment,s),m(Ms.$$.fragment,s),m(ys.$$.fragment,s),m(Us.$$.fragment,s),m(gs.$$.fragment,s),m(ws.$$.fragment,s),m(xs.$$.fragment,s),m(Zs.$$.fragment,s),m(Rs.$$.fragment,s),m(vs.$$.fragment,s),m(Cs.$$.fragment,s),m(Xs.$$.fragment,s),m(Ns.$$.fragment,s),m(zs.$$.fragment,s),m(Bs.$$.fragment,s),m(Ws.$$.fragment,s),m(Qs.$$.fragment,s),m(Ys.$$.fragment,s),m(Ss.$$.fragment,s),m(qs.$$.fragment,s),m(Ds.$$.fragment,s),m(Ks.$$.fragment,s),m(sa.$$.fragment,s),m(ea.$$.fragment,s),m(pa.$$.fragment,s),nl=!1},d(s){s&&(e(ua),e(ra),e(ma),e(Ma),e(Ja),e(ya),e(z),e(ha),e(V),e(Ta),e(_),e(Ua),e(ja),e(H),e(ba),e(T),e(ga),e(W),e(wa),e(U),e(fa),e(F),e(xa),e(Q),e(Za),e(Ga),e($),e(Ra),e(Ia),e(S),e(va),e(Ca),e(Xa),e(L),e(ka),e(Na),e(za),e(O),e(Va),e(j),e(_a),e(K),e(Ba),e(Ha),e(b),e(Wa),e(as),e(Fa),e(Qa),e(ls),e(Ya),e($a),e(ns),e(Ea),e(Sa),e(Aa),e(ps),e(qa),e(cs),e(La),e(rs),e(Da),e(Pa),e(us),e(Oa),e(Ka),e(se),e(Js),e(ae),e(ee),e(g),e(le),e(hs),e(te),e(Ts),e(ne),e(oe),e(js),e(ie),e(w),e(pe),e(bs),e(ce),e(re),e(de),e(fs),e(ue),e(me),e(Me),e(Gs),e(Je),e(ye),e(Is),e(he),e(Te),e(Ue),e(je),e(ks),e(be),e(ge),e(we),e(Vs),e(fe),e(_s),e(xe),e(Ze),e(Hs),e(Ge),e(Re),e(Fs),e(Ie),e(ve),e(Ce),e($s),e(Xe),e(f),e(ke),e(Es),e(Ne),e(ze),e(As),e(Ve),e(_e),e(Ls),e(Be),e(He),e(Ps),e(We),e(x),e(Fe),e(Os),e(Qe),e(Ye),e($e),e(aa),e(Ee),e(Z),e(Se),e(Ae),e(la),e(qe),e(ta),e(Le),e(na),e(De),e(G),e(Pe),e(R),e(Oe),e(oa),e(Ke),e(I),e(sl),e(v),e(al),e(ia),e(el),e(C),e(ll),e(tl),e(da)),e(h),M(X,s),M(k,s),M(N,s),M(B,s),M(Y,s),M(E,s),M(A,s),M(q,s),M(D,s),M(P,s),M(ss,s),M(es,s),M(ts,s),M(os,s),M(is,s),M(ds,s),M(ms,s),M(Ms,s),M(ys,s),M(Us,s),M(gs,s),M(ws,s),M(xs,s),M(Zs,s),M(Rs,s),M(vs,s),M(Cs,s),M(Xs,s),M(Ns,s),M(zs,s),M(Bs,s),M(Ws,s),M(Qs,s),M(Ys,s),M(Ss,s),M(qs,s),M(Ds,s),M(Ks,s),M(sa,s),M(ea,s),M(pa,s)}}}const ft='{"title":"Criando seu próprio dataset","local":"criando-seu-próprio-dataset","sections":[{"title":"Obtendo os dados","local":"obtendo-os-dados","sections":[],"depth":2},{"title":"Limpando os dados","local":"limpando-os-dados","sections":[],"depth":2},{"title":"Aumentando o conjunto de dados","local":"aumentando-o-conjunto-de-dados","sections":[],"depth":2},{"title":"Carregando o conjunto de dados para o Hugging Face Hub","local":"carregando-o-conjunto-de-dados-para-o-hugging-face-hub","sections":[],"depth":2},{"title":"Criando um cartão do datasets","local":"criando-um-cartão-do-datasets","sections":[],"depth":2}],"depth":1}';function xt(ol){return Mt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Xt extends Jt{constructor(h){super(),yt(this,h,xt,wt,ut,{})}}export{Xt as component};

Xet Storage Details

Size:
84.5 kB
·
Xet hash:
e706cb49be81654b0a9c58690b2b1947a09bd6caa08a71fc2b9193d4e049faf8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.