Buckets:

rtrm's picture
download
raw
82 kB
import{s as Yn,o as Ln,n as Fa}from"../chunks/scheduler.37c15a92.js";import{S as Fn,i as Pn,g as j,s as t,r as M,A as On,h as y,f as a,c as p,j as Gn,u as d,x as U,k as Ya,y as Kn,a as n,v as m,t as i,b as Wa,d as c,w as J,p as Ha}from"../chunks/index.2bf4358c.js";import{T as La}from"../chunks/Tip.363c041f.js";import{Y as Va}from"../chunks/Youtube.1e50a667.js";import{C as x}from"../chunks/CodeBlock.4e987730.js";import{C as qn}from"../chunks/CourseFloatingBanner.6add7356.js";import{F as sl}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Je,E as el}from"../chunks/getInferenceSnippets.24b50994.js";function al(b){let l,u;return l=new qn({props:{chapter:6,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/pt/chapter6/section3_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/pt/chapter6/section3_tf.ipynb"}]}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function nl(b){let l,u;return l=new qn({props:{chapter:6,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/pt/chapter6/section3_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/pt/chapter6/section3_pt.ipynb"}]}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function ll(b){let l,u="⚠️ Ao tokenizar uma única frase, você nem sempre verá uma diferença de velocidade entre as versões lenta e rápida do mesmo tokenizador. Na verdade, a versão rápida pode ser mais lenta! É somente ao tokenizar muitos textos em paralelo ao mesmo tempo que você poderá ver a diferença com maior nitidez.";return{c(){l=j("p"),l.textContent=u},l(o){l=y(o,"P",{"data-svelte-h":!0}),U(l)!=="svelte-pisjvc"&&(l.textContent=u)},m(o,T){n(o,l,T)},p:Fa,d(o){o&&a(l)}}}function tl(b){let l,u="A noção do que é uma palavra é complicada. Por exemplo, “d’água” (uma contração de “da água”) conta como uma ou duas palavras? Na verdade, depende do tokenizador e da operação de pré-tokenização que é aplicada. Alguns tokenizadores apenas dividem em espaços, então eles considerarão isso como uma palavra. Outros usam pontuação em cima dos espaços, então considerarão duas palavras.",o,T,k="✏️ <strong>Experimente!</strong> Crie um tokenizador a partir dos checkpoints de <code>bert-base-cased </code>e <code>roberta-base</code> e tokenize ”81s” com eles. O que você observa? Quais são os IDs das palavras?";return{c(){l=j("p"),l.textContent=u,o=t(),T=j("p"),T.innerHTML=k},l(h){l=y(h,"P",{"data-svelte-h":!0}),U(l)!=="svelte-1t5orfq"&&(l.textContent=u),o=p(h),T=y(h,"P",{"data-svelte-h":!0}),U(T)!=="svelte-u906j4"&&(T.innerHTML=k)},m(h,I){n(h,l,I),n(h,o,I),n(h,T,I)},p:Fa,d(h){h&&(a(l),a(o),a(T))}}}function pl(b){let l,u="✏️ <strong>Experimente!</strong> Crie seu próprio texto de exemplo e veja se você consegue entender quais tokens estão associados ao ID da palavra e também como extrair os intervalos de caracteres para uma única palavra. Como bônus, tente usar duas frases como entrada e veja se os IDs das frases fazem sentido para você.";return{c(){l=j("p"),l.innerHTML=u},l(o){l=y(o,"P",{"data-svelte-h":!0}),U(l)!=="svelte-1u16bbj"&&(l.innerHTML=u)},m(o,T){n(o,l,T)},p:Fa,d(o){o&&a(l)}}}function ol(b){let l,u;return l=new Va({props:{id:"PrX4CjrVnNc"}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function rl(b){let l,u;return l=new Va({props:{id:"0E7ltQB7fM8"}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function il(b){let l,u='Primeiro, precisamos tokenizar nossa entrada e passá-la pelo modelo. Isso é feito exatamente como no <a href="/course/chapter2">Capítulo 2</a>; instanciamos o tokenizador e o modelo usando as classes <code>TFAutoXxx</code> e depois as usamos em nosso exemplo:',o,T,k,h,I="Como estamos usando <code>TFAutoModelForTokenClassification</code> neste caso, obtemos um conjunto de logits para cada token na sequência de entrada:",A,w,C,f,$;return T=new x({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBURkF1dG9Nb2RlbEZvclRva2VuQ2xhc3NpZmljYXRpb24lMEElMEFtb2RlbF9jaGVja3BvaW50JTIwJTNEJTIwJTIyZGJtZHolMkZiZXJ0LWxhcmdlLWNhc2VkLWZpbmV0dW5lZC1jb25sbDAzLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChtb2RlbF9jaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwVEZBdXRvTW9kZWxGb3JUb2tlbkNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZChtb2RlbF9jaGVja3BvaW50KSUwQSUwQWV4YW1wbGUlMjAlM0QlMjAlMjJNeSUyMG5hbWUlMjBpcyUyMFN5bHZhaW4lMjBhbmQlMjBJJTIwd29yayUyMGF0JTIwSHVnZ2luZyUyMEZhY2UlMjBpbiUyMEJyb29rbHluLiUyMiUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplcihleGFtcGxlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJ0ZiUyMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForTokenClassification
model_checkpoint = <span class="hljs-string">&quot;dbmdz/bert-large-cased-finetuned-conll03-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = TFAutoModelForTokenClassification.from_pretrained(model_checkpoint)
example = <span class="hljs-string">&quot;My name is Sylvain and I work at Hugging Face in Brooklyn.&quot;</span>
inputs = tokenizer(example, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
outputs = model(**inputs)`,wrap:!1}}),w=new x({props:{code:"cHJpbnQoaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVELnNoYXBlKSUwQXByaW50KG91dHB1dHMubG9naXRzLnNoYXBlKQ==",highlighted:`<span class="hljs-built_in">print</span>(inputs[<span class="hljs-string">&quot;input_ids&quot;</span>].shape)
<span class="hljs-built_in">print</span>(outputs.logits.shape)`,wrap:!1}}),f=new x({props:{code:"KDElMkMlMjAxOSklMEEoMSUyQyUyMDE5JTJDJTIwOSk=",highlighted:`(<span class="hljs-number">1</span>, <span class="hljs-number">19</span>)
(<span class="hljs-number">1</span>, <span class="hljs-number">19</span>, <span class="hljs-number">9</span>)`,wrap:!1}}),{c(){l=j("p"),l.innerHTML=u,o=t(),M(T.$$.fragment),k=t(),h=j("p"),h.innerHTML=I,A=t(),M(w.$$.fragment),C=t(),M(f.$$.fragment)},l(r){l=y(r,"P",{"data-svelte-h":!0}),U(l)!=="svelte-rfofjl"&&(l.innerHTML=u),o=p(r),d(T.$$.fragment,r),k=p(r),h=y(r,"P",{"data-svelte-h":!0}),U(h)!=="svelte-nkjp9c"&&(h.innerHTML=I),A=p(r),d(w.$$.fragment,r),C=p(r),d(f.$$.fragment,r)},m(r,g){n(r,l,g),n(r,o,g),m(T,r,g),n(r,k,g),n(r,h,g),n(r,A,g),m(w,r,g),n(r,C,g),m(f,r,g),$=!0},i(r){$||(c(T.$$.fragment,r),c(w.$$.fragment,r),c(f.$$.fragment,r),$=!0)},o(r){i(T.$$.fragment,r),i(w.$$.fragment,r),i(f.$$.fragment,r),$=!1},d(r){r&&(a(l),a(o),a(k),a(h),a(A),a(C)),J(T,r),J(w,r),J(f,r)}}}function cl(b){let l,u='Primeiro, precisamos tokenizar nossa entrada e passá-la pelo modelo. Isso é feito exatamente como no <a href="/course/chapter3">Capítulo 2</a>; instanciamos o tokenizador e o modelo usando as classes <code>AutoXxx</code> e depois as usamos em nosso exemplo:',o,T,k,h,I="Como estamos usando <code>AutoModelForTokenClassification</code> neste caso, obtemos um conjunto de logits para cada token na sequência de entrada:",A,w,C,f,$;return T=new x({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBBdXRvTW9kZWxGb3JUb2tlbkNsYXNzaWZpY2F0aW9uJTBBJTBBbW9kZWxfY2hlY2twb2ludCUyMCUzRCUyMCUyMmRibWR6JTJGYmVydC1sYXJnZS1jYXNlZC1maW5ldHVuZWQtY29ubGwwMy1lbmdsaXNoJTIyJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfY2hlY2twb2ludCklMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclRva2VuQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2NoZWNrcG9pbnQpJTBBJTBBZXhhbXBsZSUyMCUzRCUyMCUyMk15JTIwbmFtZSUyMGlzJTIwU3lsdmFpbiUyMGFuZCUyMEklMjB3b3JrJTIwYXQlMjBIdWdnaW5nJTIwRmFjZSUyMGluJTIwQnJvb2tseW4uJTIyJTBBaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKGV4YW1wbGUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForTokenClassification
model_checkpoint = <span class="hljs-string">&quot;dbmdz/bert-large-cased-finetuned-conll03-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
example = <span class="hljs-string">&quot;My name is Sylvain and I work at Hugging Face in Brooklyn.&quot;</span>
inputs = tokenizer(example, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
outputs = model(**inputs)`,wrap:!1}}),w=new x({props:{code:"cHJpbnQoaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVELnNoYXBlKSUwQXByaW50KG91dHB1dHMubG9naXRzLnNoYXBlKQ==",highlighted:`<span class="hljs-built_in">print</span>(inputs[<span class="hljs-string">&quot;input_ids&quot;</span>].shape)
<span class="hljs-built_in">print</span>(outputs.logits.shape)`,wrap:!1}}),f=new x({props:{code:"dG9yY2guU2l6ZSglNUIxJTJDJTIwMTklNUQpJTBBdG9yY2guU2l6ZSglNUIxJTJDJTIwMTklMkMlMjA5JTVEKQ==",highlighted:`torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">19</span>])
torch.Size([<span class="hljs-number">1</span>, <span class="hljs-number">19</span>, <span class="hljs-number">9</span>])`,wrap:!1}}),{c(){l=j("p"),l.innerHTML=u,o=t(),M(T.$$.fragment),k=t(),h=j("p"),h.innerHTML=I,A=t(),M(w.$$.fragment),C=t(),M(f.$$.fragment)},l(r){l=y(r,"P",{"data-svelte-h":!0}),U(l)!=="svelte-17oy2sy"&&(l.innerHTML=u),o=p(r),d(T.$$.fragment,r),k=p(r),h=y(r,"P",{"data-svelte-h":!0}),U(h)!=="svelte-1mf1ji6"&&(h.innerHTML=I),A=p(r),d(w.$$.fragment,r),C=p(r),d(f.$$.fragment,r)},m(r,g){n(r,l,g),n(r,o,g),m(T,r,g),n(r,k,g),n(r,h,g),n(r,A,g),m(w,r,g),n(r,C,g),m(f,r,g),$=!0},i(r){$||(c(T.$$.fragment,r),c(w.$$.fragment,r),c(f.$$.fragment,r),$=!0)},o(r){i(T.$$.fragment,r),i(w.$$.fragment,r),i(f.$$.fragment,r),$=!1},d(r){r&&(a(l),a(o),a(k),a(h),a(A),a(C)),J(T,r),J(w,r),J(f,r)}}}function Ml(b){let l,u;return l=new x({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEElMEFwcm9iYWJpbGl0aWVzJTIwJTNEJTIwdGYubWF0aC5zb2Z0bWF4KG91dHB1dHMubG9naXRzJTJDJTIwYXhpcyUzRC0xKSU1QjAlNUQlMEFwcm9iYWJpbGl0aWVzJTIwJTNEJTIwcHJvYmFiaWxpdGllcy5udW1weSgpLnRvbGlzdCgpJTBBcHJlZGljdGlvbnMlMjAlM0QlMjB0Zi5tYXRoLmFyZ21heChvdXRwdXRzLmxvZ2l0cyUyQyUyMGF4aXMlM0QtMSklNUIwJTVEJTBBcHJlZGljdGlvbnMlMjAlM0QlMjBwcmVkaWN0aW9ucy5udW1weSgpLnRvbGlzdCgpJTBBcHJpbnQocHJlZGljdGlvbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
probabilities = tf.math.softmax(outputs.logits, axis=-<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>]
probabilities = probabilities.numpy().tolist()
predictions = tf.math.argmax(outputs.logits, axis=-<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>]
predictions = predictions.numpy().tolist()
<span class="hljs-built_in">print</span>(predictions)`,wrap:!1}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function dl(b){let l,u;return l=new x({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFwcm9iYWJpbGl0aWVzJTIwJTNEJTIwdG9yY2gubm4uZnVuY3Rpb25hbC5zb2Z0bWF4KG91dHB1dHMubG9naXRzJTJDJTIwZGltJTNELTEpJTVCMCU1RC50b2xpc3QoKSUwQXByZWRpY3Rpb25zJTIwJTNEJTIwb3V0cHV0cy5sb2dpdHMuYXJnbWF4KGRpbSUzRC0xKSU1QjAlNUQudG9saXN0KCklMEFwcmludChwcmVkaWN0aW9ucyk=",highlighted:`<span class="hljs-keyword">import</span> torch
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>].tolist()
predictions = outputs.logits.argmax(dim=-<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>].tolist()
<span class="hljs-built_in">print</span>(predictions)`,wrap:!1}}),{c(){M(l.$$.fragment)},l(o){d(l.$$.fragment,o)},m(o,T){m(l,o,T),u=!0},i(o){u||(c(l.$$.fragment,o),u=!0)},o(o){i(l.$$.fragment,o),u=!1},d(o){J(l,o)}}}function ml(b){let l,u,o,T,k,h,I,A,w,C,f,$,r='Nesta seção, examinaremos mais de perto os recursos dos tokenizadores em 🤗 Transformers. Até agora, só os usamos para tokenizar entradas ou decodificar IDs de volta em texto, mas tokenizadores - especialmente aqueles apoiados pela biblioteca 🤗 Tokenizers - podem fazer muito mais. Para ilustrar esses recursos adicionais, exploraremos como reproduzir os resultados dos pipelines <code>token-classification</code> (que chamamos de <code>ner</code>) e <code>question-answering</code> que encontramos pela primeira vez no <a href="/course/chapter1">Capítulo 1</a>.',g,W,je,H,Pa='Na discussão a seguir, muitas vezes faremos a distinção entre tokenizadores “lentos” e “rápidos”. Tokenizadores lentos são aqueles escritos em Python dentro da biblioteca 🤗 Transformers, enquanto as versões rápidas são aquelas fornecidas por 🤗 Tokenizers, que são escritos em Rust. Se você se lembrar da tabela do <a href="/course/chapter5/3">Capítulo 5</a> que informava quanto tempo levou um tokenizador rápido e um lento para tokenizar o conjunto de dados de revisão de medicamentos, você deve ter uma ideia do motivo pelo qual os chamamos de rápido e lento:',ye,V,Oa='<thead><tr><th align="center">Fast tokenizer</th> <th align="center">Slow tokenizer</th> <th align="center"></th></tr></thead> <tbody><tr><td align="center"><code>batched=True</code></td> <td align="center">10.8s</td> <td align="center">4min41s</td></tr> <tr><td align="center"><code>batched=False</code></td> <td align="center">59.2s</td> <td align="center">5min3s</td></tr></tbody>',Te,E,Ue,X,xe,D,he,G,Ka="A saída de um tokenizador não é um simples dicionário em Python; o que obtemos é, na verdade, um objeto especial chamado <code>BatchEncoding</code>. Este objeto é uma subclasse de um dicionário (e é por isso que conseguimos indexar esse resultado sem nenhum problema antes), mas com métodos adicionais que são usados ​​principalmente por tokenizadores rápidos.",we,q,sn="Além de seus recursos de paralelização, uma funcionalidade importante dos tokenizadores rápidos é que eles sempre acompanham o intervalo original de textos dos quais os tokens finais vêm - um recurso que chamamos de <em>mapeamento de offset</em>. Isso, por sua vez, desbloqueia recursos como o mapeamento de cada palavra para os tokens gerados ou mapeamento de cada caractere do texto original para o token que está dentro e vice-versa.",be,Y,en="Vamos analisar um exemplo:",ge,L,fe,F,an="Como mencionado anteriormente, nós obtemos um objeto <code>BatchEncoding</code> na saída do tokenizador:",ke,P,Ce,O,nn="Como a classe <code>AutoTokenizer</code> escolhe o tokenizador rápido como padrão, podemos usar os métodos adicionais que o objeto <code>BatchEncoding</code> fornece. Temos duas formas de verificar se o nosso tokenizador é rápido ou lento. Podemos, por exemplo, avaliar o atributo <code>is_fast</code> do tokenizador:",Ie,K,$e,ss,Ae,es,ln="ou checar o mesmo atributo do nosso <code>encoding</code>:",Qe,as,ze,ns,ve,ls,tn="Vejamos o que um tokenizador rápido nos permite fazer. Primeiro, podemos acessar os tokens sem precisar converter os IDs de volta em tokens:",Ze,ts,Be,ps,Ne,os,pn="No caso, o token no índice 5 é <code>##yl</code>, que faz parte da palavra “Sylvain” na sentença original. Nós podemos também usar o metodo <code>words_ids()</code> para obter o índice da palavra de onde cada palavra vem:",Ee,rs,Se,is,Re,cs,on="Podemos observar que as palavras especiais do tokenizador <code>[CLS]</code> e <code>[SEP]</code> são mapeados para <code>None</code>, e então cada token é mapeada para a palavra de onde se origina. Isso é especialmente útil para determinar se um token está no início da palavra ou se dois tokens estão em uma mesma palavra. Poderíamos contar com o prefix <code>##</code> para isso, mas apenas para tokenizadores do tipo BERT; este método funciona para qualquer tipo de tokenizador, desde que seja do tipo rápido. No próximo capítulo, nós veremos como podemos usar esse recurso para aplicar os rótulos que temos para cada palavra adequadamente aos tokens em tarefas como reconhecimento de entidade nomeada (em inglês, Named Entity Recognition, ou NER) e marcação de parte da fala (em inglês, part-of-speech, ou POS). Também podemos usá-lo para mascarar todos os tokens provenientes da mesma palavra na modelagem de linguagem mascarada (uma técnica chamada <em>mascaramento da palavra inteira</em>)",_e,S,We,Ms,rn="Da mesma forma, existe um método <code>sentence_ids()</code> que podemos usar para mapear um token para a sentença de onde veio (embora, neste caso, o <code>token_type_ids</code> retornado pelo tokenizador possa nos dar a mesma informação).",He,ds,cn="Por fim, podemos mapear qualquer palavra ou token para caracteres no texto original (e vice-versa) através dos métodos <code>word_to_chars()</code> ou <code>token_to_chars()</code> e <code>char_to_word()</code> ou <code>char_to_token()</code>. Por exemplo, o método <code>word_ids()</code> nos diz que <code>##yl</code> é parte da palavra no índice 3, mas qual palavra está na frase? Podemos descobrir da seguinte forma:",Ve,ms,Xe,Js,De,us,Mn="Como mencionamos anteriormente, isso é apoiado pelo fato de que o tokenizador rápido acompanha o intervalo de texto de cada token em uma lista de <em>offsets</em>. Para ilustrar seu uso, mostraremos a seguir como replicar manualmente os resultados do pipeline <code>token-classification</code>.",Ge,R,qe,js,Ye,ys,dn='No <a href="/course/chapter1">Capítulo 1</a> tivemos o primeiro gosto de aplicar o NER — onde a tarefa é identificar quais partes do texto correspondem a entidades como pessoas, locais ou organizações — com a função do 🤗 Transformers <code>pipeline()</code>. Então, no <a href="/course/chapter2">Capítulo 2</a>, vimos como um pipeline agrupa os três estágios necessários para obter as previsões de um texto: tokenização, passagem das entradas pelo modelo e pós-processamento. As duas primeiras etapas do pipeline <code>token-classification</code> são as mesmas de qualquer outro pipeline, mas o pós-processamento é um pouco mais complexo — vejamos como!',Le,Q,z,Me,Ts,Fe,Us,mn='Primeiro, vamos usar um pipeline de classificação de token para que possamos obter alguns resultados para comparar manualmente. O modelo usado por padrão é <a href="https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english" rel="nofollow"><code>dbmdz/bert-large-cased-finetuned-conll03-english</code></a>; ele executa NER em frases:',Pe,xs,Oe,hs,Ke,ws,Jn="O modelo identificou corretamente cada token gerado por “Sylvain” como uma pessoa, cada token gerado por “Hugging Face” como uma organização e o token “Brooklyn” como um local. Também podemos pedir ao pipeline para agrupar os tokens que correspondem à mesma entidade:",sa,bs,ea,gs,aa,fs,un="O parâmetro <code>aggregation_strategy</code> escolhido mudará as pontuações calculadas para cada entidade agrupada. Com o valor <code>&quot;simple&quot;</code>, a pontuação é apenas a média das pontuações de cada token na entidade dada: por exemplo, a pontuação de “Sylvain” é a média das pontuações que vimos no exemplo anterior para os tokens <code>S</code>, <code>##yl</code>, <code>##va</code>, e <code>##in</code>. Outras estratégias disponíveis são:",na,ks,jn="<li><code>&quot;first&quot;</code>, onde a pontuação de cada entidade é a pontuação do primeiro token dessa entidade (portanto, para “Sylvain” seria 0.993828, a pontuação do token <code>S</code>)</li> <li><code>&quot;max&quot;</code>, onde a pontuação de cada entidade é a pontuação máxima dos tokens naquela entidade (portanto, para “Hugging Face” seria 0.98879766, a pontuação do token <code>&quot;Face&quot;</code>)</li> <li><code>&quot;average&quot;</code>, onde a pontuação de cada entidade é a média das pontuações das palavras que compõem aquela entidade (assim para “Sylvain” não haveria diferença da estratégia <code>&quot;simple&quot;</code>, mas <code>&quot;Hugging Face&quot;</code> teria uma pontuação de 0.9819, a média das pontuações para <code>&quot;Hugging&quot;</code>, 0.975, e <code>&quot;Face&quot;</code>, 0.98879)</li>",la,Cs,yn="Agora vejamos como obter esses resultados sem usar a função <code>pipeline()</code>!",ta,Is,pa,v,Z,de,$s,Tn="Temos um lote com 1 sequência de 19 tokens e o modelo tem 9 rótulos diferentes, então a saída do modelo tem um tamanho de 1 x 19 x 9. Assim como para o pipeline de classificação de texto, usamos uma função softmax para converter esses logits para probabilidades, e pegamos o argmax para obter previsões (note que podemos pegar o argmax nos logits porque o softmax não altera a ordem):",oa,B,N,me,As,ra,Qs,Un="O atributo <code>model.config.id2label</code> contém o mapeamento de índices para rótulos que podemos usar para entender as previsões:",ia,zs,ca,vs,Ma,Zs,xn="Como vimos anteriormente, existem 9 rótulos: <code>O</code> é o rótulo para os tokens que não estão em nenhuma entidade nomeada, e então temos dois rótulos para cada tipo de entidade (miscelânia, pessoa, organização e localização). O rótulo <code>B-XXX</code> indica que o token está no início de uma entidade <code>XXX</code> e o rótulo <code>I-XXX</code> indica que o token está dentro da entidade <code>XXX</code>. No caso do exemplo atual, esperaríamos que o nosso modelo classificasse o token <code>S</code> como <code>B-PER</code> (início de uma entidade pessoa) e os tokens <code>##yl</code>, <code>##va</code> e <code>##in</code> como <code>I-PER</code> (dentro da entidade pessoa).",da,Bs,hn="Você pode pensar que o modelo estava errado neste caso, pois deu o rótulo <code>I-PER</code> a todos esses quatro tokens, mas isso não é totalmente verdade. Na realidade, existem dois formatos para esses rótulos: <code>B-</code> e <code>I-</code>: <em>IOB1</em> e <em>IOB2</em>. O formato IOB2 (em rosa abaixo), é o que introduzimos, enquanto que no formato IOB1 (em azul), os rótulos que começam com <code>B-</code> são usados apenas para separar duas entidades adjacentes do mesmo tipo. O modelo que estamos usando foi ajustado em um conjunto de dados usando esse formato, e é por isso que ele atribui o rótulo <code>I-PER</code> ao token <code>S</code>.",ma,_,wn='<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter6/IOB_versions.svg" alt="IOB1 vs IOB2 format"/> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter6/IOB_versions-dark.svg" alt="IOB1 vs IOB2 format"/>',Ja,Ns,bn="Com este mapa, estamos prontos para reproduzir (quase inteiramente) os resultados do primeiro pipeline — podemos apenas pegar a pontuação e o rótulo de cada token que não foi classificado como <code>O</code>:",ua,Es,ja,Ss,ya,Rs,gn="Isso é muito parecido com o que tínhamos antes, com uma exceção: o pipeline também nos dava informações sobre o <code>start</code> e <code>end</code> de cada entidade na frase original. É aqui que nosso mapeamento de offset entrará em ação. Para obter tais offsets, basta definir <code>return_offsets_mapping=True</code> quando aplicamos o tokenizador às nossas entradas:",Ta,_s,Ua,Ws,xa,Hs,fn="Cada tupla é o intervalo de texto correspondente a cada token, onde <code>(0, 0)</code> é reservado para os tokens especiais. Vimos antes que o token no índice 5 é <code>##yl</code>, que tem <code>(12, 14)</code> como offset aqui. Se pegarmos a fatia correspondente em nosso exemplo:",ha,Vs,wa,Xs,kn="obtemos o intervalo adequado de texto sem o <code>##</code>:",ba,Ds,ga,Gs,Cn="Usando isso, agora podemos completar os resultados anteriores:",fa,qs,ka,Ys,Ca,Ls,In="Este é o mesmo resultado que obtivemos no primeiro pipeline!",Ia,Fs,$a,Ps,$n="Usar os offsets para determinar as chaves inicial e final de cada entidade é útil, mas essa informação não é estritamente necessária. Quando queremos agrupar as entidades, no entanto, os offsets nos pouparão muito código confuso. Por exemplo, se quisermos agrupar os tokens <code>Hu</code>, <code>##gging</code> e <code>Face</code>, podemos fazer regras especiais que digam que os dois primeiros devem ser anexados e removido o <code>##</code>, e o <code>Face</code> deve ser adicionado com um espaço, pois não começa com <code>##</code> — mas isso só funcionaria para esse tipo específico de tokenizador. Teríamos que escrever outro conjunto de regras para um tokenizador SentencePiece ou Byte-Pair-Encoding (discutido mais adiante neste capítulo).",Aa,Os,An="Com os offsets, todo esse código personalizado desaparece: podemos apenas pegar o intervalo no texto original que começa com o primeiro token e termina com o último token. Então, no caso dos tokens <code>Hu</code>, <code>##ging</code> e <code>Face</code>, devemos começar no caractere 33 (o início de <code>Hu</code>) e terminar antes do caractere 45 (o final de <code>Face</code>):",Qa,Ks,za,se,va,ee,Qn="Para escrever o código para o pós-processamento das previsões ao agrupar entidades, agruparemos entidades consecutivas e rotuladas com <code>I-XXX</code>, excento a primeira, que pode ser rotulada como <code>B-XXX</code> ou <code>I-XXX</code> (portanto, paramos de agrupar uma entidade quando obtemos um <code>O</code>, um novo tipo de entidade ou um <code>B-XXX</code> que nos informa que uma entidade do mesmo tipo está iniciando):",Za,ae,Ba,ne,zn="E obtemos os mesmos resultados do nosso segundo pipeline!",Na,le,Ea,te,vn="Outro exemplo de uma tarefa onde esses offsets são extremamente úteis é a resposta a perguntas. O conhecimento deste pipeline, que faremos na próxima seção, também nos permitirá dar uma olhada em um último recurso dos tokenizadores na biblioteca 🤗 Transformers: lidar com tokens em excesso quando truncamos uma entrada em um determinado comprimento.",Sa,pe,Ra,ue,_a;k=new sl({props:{fw:b[0]}}),I=new Je({props:{title:"Os poderes especiais dos tokenizadores rápidos",local:"os-poderes-especiais-dos-tokenizadores-rápidos",headingTag:"h1"}});const Zn=[nl,al],oe=[];function Bn(s,e){return s[0]==="pt"?0:1}w=Bn(b),C=oe[w]=Zn[w](b),W=new Va({props:{id:"g8quOxoqhHQ"}}),E=new La({props:{warning:!0,$$slots:{default:[ll]},$$scope:{ctx:b}}}),X=new Je({props:{title:"Codificação em lote",local:"codificação-em-lote",headingTag:"h2"}}),D=new Va({props:{id:"3umI3tm27Vw"}}),L=new x({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJiZXJ0LWJhc2UtY2FzZWQlMjIpJTBBZXhhbXBsZSUyMCUzRCUyMCUyMk15JTIwbmFtZSUyMGlzJTIwU3lsdmFpbiUyMGFuZCUyMEklMjB3b3JrJTIwYXQlMjBIdWdnaW5nJTIwRmFjZSUyMGluJTIwQnJvb2tseW4uJTIyJTBBZW5jb2RpbmclMjAlM0QlMjB0b2tlbml6ZXIoZXhhbXBsZSklMEFwcmludCh0eXBlKGVuY29kaW5nKSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;bert-base-cased&quot;</span>)
example = <span class="hljs-string">&quot;My name is Sylvain and I work at Hugging Face in Brooklyn.&quot;</span>
encoding = tokenizer(example)
<span class="hljs-built_in">print</span>(<span class="hljs-built_in">type</span>(encoding))`,wrap:!1}}),P=new x({props:{code:"JTNDY2xhc3MlMjAndHJhbnNmb3JtZXJzLnRva2VuaXphdGlvbl91dGlsc19iYXNlLkJhdGNoRW5jb2RpbmcnJTNF",highlighted:'&lt;<span class="hljs-keyword">class</span> <span class="hljs-string">&#x27;transformers.tokenization_utils_base.BatchEncoding&#x27;</span>&gt;',wrap:!1}}),K=new x({props:{code:"dG9rZW5pemVyLmlzX2Zhc3Q=",highlighted:"tokenizer.is_fast",wrap:!1}}),ss=new x({props:{code:"VHJ1ZQ==",highlighted:'<span class="hljs-literal">True</span>',wrap:!1}}),as=new x({props:{code:"ZW5jb2RpbmcuaXNfZmFzdA==",highlighted:"encoding.is_fast",wrap:!1}}),ns=new x({props:{code:"VHJ1ZQ==",highlighted:'<span class="hljs-literal">True</span>',wrap:!1}}),ts=new x({props:{code:"ZW5jb2RpbmcudG9rZW5zKCk=",highlighted:"encoding.tokens()",wrap:!1}}),ps=new x({props:{code:"JTVCJyU1QkNMUyU1RCclMkMlMjAnTXknJTJDJTIwJ25hbWUnJTJDJTIwJ2lzJyUyQyUyMCdTJyUyQyUyMCclMjMlMjN5bCclMkMlMjAnJTIzJTIzdmEnJTJDJTIwJyUyMyUyM2luJyUyQyUyMCdhbmQnJTJDJTIwJ0knJTJDJTIwJ3dvcmsnJTJDJTIwJ2F0JyUyQyUyMCdIdSclMkMlMjAnJTIzJTIzZ2dpbmcnJTJDJTIwJ0ZhY2UnJTJDJTIwJ2luJyUyQyUwQSUyMCdCcm9va2x5biclMkMlMjAnLiclMkMlMjAnJTVCU0VQJTVEJyU1RA==",highlighted:`[<span class="hljs-string">&#x27;[CLS]&#x27;</span>, <span class="hljs-string">&#x27;My&#x27;</span>, <span class="hljs-string">&#x27;name&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;S&#x27;</span>, <span class="hljs-string">&#x27;##yl&#x27;</span>, <span class="hljs-string">&#x27;##va&#x27;</span>, <span class="hljs-string">&#x27;##in&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;I&#x27;</span>, <span class="hljs-string">&#x27;work&#x27;</span>, <span class="hljs-string">&#x27;at&#x27;</span>, <span class="hljs-string">&#x27;Hu&#x27;</span>, <span class="hljs-string">&#x27;##gging&#x27;</span>, <span class="hljs-string">&#x27;Face&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>,
<span class="hljs-string">&#x27;Brooklyn&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>]`,wrap:!1}}),rs=new x({props:{code:"ZW5jb2Rpbmcud29yZF9pZHMoKQ==",highlighted:"encoding.word_ids()",wrap:!1}}),is=new x({props:{code:"JTVCTm9uZSUyQyUyMDAlMkMlMjAxJTJDJTIwMiUyQyUyMDMlMkMlMjAzJTJDJTIwMyUyQyUyMDMlMkMlMjA0JTJDJTIwNSUyQyUyMDYlMkMlMjA3JTJDJTIwOCUyQyUyMDglMkMlMjA5JTJDJTIwMTAlMkMlMjAxMSUyQyUyMDEyJTJDJTIwTm9uZSU1RA==",highlighted:'[<span class="hljs-literal">None</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">3</span>, <span class="hljs-number">3</span>, <span class="hljs-number">3</span>, <span class="hljs-number">4</span>, <span class="hljs-number">5</span>, <span class="hljs-number">6</span>, <span class="hljs-number">7</span>, <span class="hljs-number">8</span>, <span class="hljs-number">8</span>, <span class="hljs-number">9</span>, <span class="hljs-number">10</span>, <span class="hljs-number">11</span>, <span class="hljs-number">12</span>, <span class="hljs-literal">None</span>]',wrap:!1}}),S=new La({props:{$$slots:{default:[tl]},$$scope:{ctx:b}}}),ms=new x({props:{code:"c3RhcnQlMkMlMjBlbmQlMjAlM0QlMjBlbmNvZGluZy53b3JkX3RvX2NoYXJzKDMpJTBBZXhhbXBsZSU1QnN0YXJ0JTNBZW5kJTVE",highlighted:`start, end = encoding.word_to_chars(<span class="hljs-number">3</span>)
example[start:end]`,wrap:!1}}),Js=new x({props:{code:"U3lsdmFpbg==",highlighted:"Sylvain",wrap:!1}}),R=new La({props:{$$slots:{default:[pl]},$$scope:{ctx:b}}}),js=new Je({props:{title:"Dentro do pipeline token-classification",local:"dentro-do-pipeline-token-classification",headingTag:"h2"}});const Nn=[rl,ol],re=[];function En(s,e){return s[0]==="pt"?0:1}Q=En(b),z=re[Q]=Nn[Q](b),Ts=new Je({props:{title:"Obtendo os resultados básicos com o pipeline",local:"obtendo-os-resultados-básicos-com-o-pipeline",headingTag:"h3"}}),xs=new x({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBJTBBdG9rZW5fY2xhc3NpZmllciUyMCUzRCUyMHBpcGVsaW5lKCUyMnRva2VuLWNsYXNzaWZpY2F0aW9uJTIyKSUwQXRva2VuX2NsYXNzaWZpZXIoJTIyTXklMjBuYW1lJTIwaXMlMjBTeWx2YWluJTIwYW5kJTIwSSUyMHdvcmslMjBhdCUyMEh1Z2dpbmclMjBGYWNlJTIwaW4lMjBCcm9va2x5bi4lMjIp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
token_classifier = pipeline(<span class="hljs-string">&quot;token-classification&quot;</span>)
token_classifier(<span class="hljs-string">&quot;My name is Sylvain and I work at Hugging Face in Brooklyn.&quot;</span>)`,wrap:!1}}),hs=new x({props:{code:"JTVCJTdCJ2VudGl0eSclM0ElMjAnSS1QRVInJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTk5MzgyOCUyQyUyMCdpbmRleCclM0ElMjA0JTJDJTIwJ3dvcmQnJTNBJTIwJ1MnJTJDJTIwJ3N0YXJ0JyUzQSUyMDExJTJDJTIwJ2VuZCclM0ElMjAxMiU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5ODE1NDc2JTJDJTIwJ2luZGV4JyUzQSUyMDUlMkMlMjAnd29yZCclM0ElMjAnJTIzJTIzeWwnJTJDJTIwJ3N0YXJ0JyUzQSUyMDEyJTJDJTIwJ2VuZCclM0ElMjAxNCU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5NTkwNzI1JTJDJTIwJ2luZGV4JyUzQSUyMDYlMkMlMjAnd29yZCclM0ElMjAnJTIzJTIzdmEnJTJDJTIwJ3N0YXJ0JyUzQSUyMDE0JTJDJTIwJ2VuZCclM0ElMjAxNiU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5OTIzMjclMkMlMjAnaW5kZXgnJTNBJTIwNyUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjNpbiclMkMlMjAnc3RhcnQnJTNBJTIwMTYlMkMlMjAnZW5kJyUzQSUyMDE4JTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1PUkcnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTczODkzMzQlMkMlMjAnaW5kZXgnJTNBJTIwMTIlMkMlMjAnd29yZCclM0ElMjAnSHUnJTJDJTIwJ3N0YXJ0JyUzQSUyMDMzJTJDJTIwJ2VuZCclM0ElMjAzNSU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktT1JHJyUyQyUyMCdzY29yZSclM0ElMjAwLjk3NjExNSUyQyUyMCdpbmRleCclM0ElMjAxMyUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjNnZ2luZyclMkMlMjAnc3RhcnQnJTNBJTIwMzUlMkMlMjAnZW5kJyUzQSUyMDQwJTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1PUkcnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTg4Nzk3NjYlMkMlMjAnaW5kZXgnJTNBJTIwMTQlMkMlMjAnd29yZCclM0ElMjAnRmFjZSclMkMlMjAnc3RhcnQnJTNBJTIwNDElMkMlMjAnZW5kJyUzQSUyMDQ1JTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1MT0MnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTkzMjEwNTUlMkMlMjAnaW5kZXgnJTNBJTIwMTYlMkMlMjAnd29yZCclM0ElMjAnQnJvb2tseW4nJTJDJTIwJ3N0YXJ0JyUzQSUyMDQ5JTJDJTIwJ2VuZCclM0ElMjA1NyU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9993828</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">4</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;S&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">11</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">12</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99815476</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">5</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##yl&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">14</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99590725</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">6</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##va&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">14</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">16</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9992327</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">7</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##in&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">16</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">18</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.97389334</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hu&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">33</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">35</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.976115</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">13</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##gging&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">35</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">40</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.98879766</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">14</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Face&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">41</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">45</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99321055</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">16</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Brooklyn&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">49</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]`,wrap:!1}}),bs=new x({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBJTBBdG9rZW5fY2xhc3NpZmllciUyMCUzRCUyMHBpcGVsaW5lKCUyMnRva2VuLWNsYXNzaWZpY2F0aW9uJTIyJTJDJTIwYWdncmVnYXRpb25fc3RyYXRlZ3klM0QlMjJzaW1wbGUlMjIpJTBBdG9rZW5fY2xhc3NpZmllciglMjJNeSUyMG5hbWUlMjBpcyUyMFN5bHZhaW4lMjBhbmQlMjBJJTIwd29yayUyMGF0JTIwSHVnZ2luZyUyMEZhY2UlMjBpbiUyMEJyb29rbHluLiUyMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
token_classifier = pipeline(<span class="hljs-string">&quot;token-classification&quot;</span>, aggregation_strategy=<span class="hljs-string">&quot;simple&quot;</span>)
token_classifier(<span class="hljs-string">&quot;My name is Sylvain and I work at Hugging Face in Brooklyn.&quot;</span>)`,wrap:!1}}),gs=new x({props:{code:"JTVCJTdCJ2VudGl0eV9ncm91cCclM0ElMjAnUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5ODE2OTQlMkMlMjAnd29yZCclM0ElMjAnU3lsdmFpbiclMkMlMjAnc3RhcnQnJTNBJTIwMTElMkMlMjAnZW5kJyUzQSUyMDE4JTdEJTJDJTBBJTIwJTdCJ2VudGl0eV9ncm91cCclM0ElMjAnT1JHJyUyQyUyMCdzY29yZSclM0ElMjAwLjk3OTYwMjA0JTJDJTIwJ3dvcmQnJTNBJTIwJ0h1Z2dpbmclMjBGYWNlJyUyQyUyMCdzdGFydCclM0ElMjAzMyUyQyUyMCdlbmQnJTNBJTIwNDUlN0QlMkMlMEElMjAlN0InZW50aXR5X2dyb3VwJyUzQSUyMCdMT0MnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTkzMjEwNTUlMkMlMjAnd29yZCclM0ElMjAnQnJvb2tseW4nJTJDJTIwJ3N0YXJ0JyUzQSUyMDQ5JTJDJTIwJ2VuZCclM0ElMjA1NyU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9981694</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Sylvain&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">11</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">18</span>},
{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.97960204</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hugging Face&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">33</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">45</span>},
{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99321055</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Brooklyn&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">49</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]`,wrap:!1}}),Is=new Je({props:{title:"Das entradas às previsões",local:"das-entradas-às-previsões",headingTag:"h3"}});const Sn=[cl,il],ie=[];function Rn(s,e){return s[0]==="pt"?0:1}v=Rn(b),Z=ie[v]=Sn[v](b);const _n=[dl,Ml],ce=[];function Wn(s,e){return s[0]==="pt"?0:1}return B=Wn(b),N=ce[B]=_n[B](b),As=new x({props:{code:"JTVCMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDQlMkMlMjA0JTJDJTIwNCUyQyUyMDQlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwNiUyQyUyMDYlMkMlMjA2JTJDJTIwMCUyQyUyMDglMkMlMjAwJTJDJTIwMCU1RA==",highlighted:'[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">4</span>, <span class="hljs-number">4</span>, <span class="hljs-number">4</span>, <span class="hljs-number">4</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">6</span>, <span class="hljs-number">6</span>, <span class="hljs-number">6</span>, <span class="hljs-number">0</span>, <span class="hljs-number">8</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]',wrap:!1}}),zs=new x({props:{code:"bW9kZWwuY29uZmlnLmlkMmxhYmVs",highlighted:"model.config.id2label",wrap:!1}}),vs=new x({props:{code:"JTdCMCUzQSUyMCdPJyUyQyUwQSUyMDElM0ElMjAnQi1NSVNDJyUyQyUwQSUyMDIlM0ElMjAnSS1NSVNDJyUyQyUwQSUyMDMlM0ElMjAnQi1QRVInJTJDJTBBJTIwNCUzQSUyMCdJLVBFUiclMkMlMEElMjA1JTNBJTIwJ0ItT1JHJyUyQyUwQSUyMDYlM0ElMjAnSS1PUkcnJTJDJTBBJTIwNyUzQSUyMCdCLUxPQyclMkMlMEElMjA4JTNBJTIwJ0ktTE9DJyU3RA==",highlighted:`{<span class="hljs-number">0</span>: <span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-number">1</span>: <span class="hljs-string">&#x27;B-MISC&#x27;</span>,
<span class="hljs-number">2</span>: <span class="hljs-string">&#x27;I-MISC&#x27;</span>,
<span class="hljs-number">3</span>: <span class="hljs-string">&#x27;B-PER&#x27;</span>,
<span class="hljs-number">4</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>,
<span class="hljs-number">5</span>: <span class="hljs-string">&#x27;B-ORG&#x27;</span>,
<span class="hljs-number">6</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>,
<span class="hljs-number">7</span>: <span class="hljs-string">&#x27;B-LOC&#x27;</span>,
<span class="hljs-number">8</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>}`,wrap:!1}}),Es=new x({props:{code:"cmVzdWx0cyUyMCUzRCUyMCU1QiU1RCUwQXRva2VucyUyMCUzRCUyMGlucHV0cy50b2tlbnMoKSUwQSUwQWZvciUyMGlkeCUyQyUyMHByZWQlMjBpbiUyMGVudW1lcmF0ZShwcmVkaWN0aW9ucyklM0ElMEElMjAlMjAlMjAlMjBsYWJlbCUyMCUzRCUyMG1vZGVsLmNvbmZpZy5pZDJsYWJlbCU1QnByZWQlNUQlMEElMjAlMjAlMjAlMjBpZiUyMGxhYmVsJTIwISUzRCUyMCUyMk8lMjIlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjByZXN1bHRzLmFwcGVuZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJlbnRpdHklMjIlM0ElMjBsYWJlbCUyQyUyMCUyMnNjb3JlJTIyJTNBJTIwcHJvYmFiaWxpdGllcyU1QmlkeCU1RCU1QnByZWQlNUQlMkMlMjAlMjJ3b3JkJTIyJTNBJTIwdG9rZW5zJTVCaWR4JTVEJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKSUwQSUwQXByaW50KHJlc3VsdHMp",highlighted:`results = []
tokens = inputs.tokens()
<span class="hljs-keyword">for</span> idx, pred <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(predictions):
label = model.config.id2label[pred]
<span class="hljs-keyword">if</span> label != <span class="hljs-string">&quot;O&quot;</span>:
results.append(
{<span class="hljs-string">&quot;entity&quot;</span>: label, <span class="hljs-string">&quot;score&quot;</span>: probabilities[idx][pred], <span class="hljs-string">&quot;word&quot;</span>: tokens[idx]}
)
<span class="hljs-built_in">print</span>(results)`,wrap:!1}}),Ss=new x({props:{code:"JTVCJTdCJ2VudGl0eSclM0ElMjAnSS1QRVInJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTk5MzgyOCUyQyUyMCdpbmRleCclM0ElMjA0JTJDJTIwJ3dvcmQnJTNBJTIwJ1MnJTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1QRVInJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTk4MTU0NzYlMkMlMjAnaW5kZXgnJTNBJTIwNSUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjN5bCclN0QlMkMlMEElMjAlN0InZW50aXR5JyUzQSUyMCdJLVBFUiclMkMlMjAnc2NvcmUnJTNBJTIwMC45OTU5MDcyNSUyQyUyMCdpbmRleCclM0ElMjA2JTJDJTIwJ3dvcmQnJTNBJTIwJyUyMyUyM3ZhJyU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5OTIzMjclMkMlMjAnaW5kZXgnJTNBJTIwNyUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjNpbiclN0QlMkMlMEElMjAlN0InZW50aXR5JyUzQSUyMCdJLU9SRyclMkMlMjAnc2NvcmUnJTNBJTIwMC45NzM4OTMzNCUyQyUyMCdpbmRleCclM0ElMjAxMiUyQyUyMCd3b3JkJyUzQSUyMCdIdSclN0QlMkMlMEElMjAlN0InZW50aXR5JyUzQSUyMCdJLU9SRyclMkMlMjAnc2NvcmUnJTNBJTIwMC45NzYxMTUlMkMlMjAnaW5kZXgnJTNBJTIwMTMlMkMlMjAnd29yZCclM0ElMjAnJTIzJTIzZ2dpbmcnJTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1PUkcnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTg4Nzk3NjYlMkMlMjAnaW5kZXgnJTNBJTIwMTQlMkMlMjAnd29yZCclM0ElMjAnRmFjZSclN0QlMkMlMEElMjAlN0InZW50aXR5JyUzQSUyMCdJLUxPQyclMkMlMjAnc2NvcmUnJTNBJTIwMC45OTMyMTA1NSUyQyUyMCdpbmRleCclM0ElMjAxNiUyQyUyMCd3b3JkJyUzQSUyMCdCcm9va2x5biclN0QlNUQ=",highlighted:`[{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9993828</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">4</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;S&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99815476</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">5</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##yl&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99590725</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">6</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##va&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9992327</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">7</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##in&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.97389334</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hu&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.976115</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">13</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##gging&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.98879766</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">14</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Face&#x27;</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99321055</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">16</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Brooklyn&#x27;</span>}]`,wrap:!1}}),_s=new x({props:{code:"aW5wdXRzX3dpdGhfb2Zmc2V0cyUyMCUzRCUyMHRva2VuaXplcihleGFtcGxlJTJDJTIwcmV0dXJuX29mZnNldHNfbWFwcGluZyUzRFRydWUpJTBBaW5wdXRzX3dpdGhfb2Zmc2V0cyU1QiUyMm9mZnNldF9tYXBwaW5nJTIyJTVE",highlighted:`inputs_with_offsets = tokenizer(example, return_offsets_mapping=<span class="hljs-literal">True</span>)
inputs_with_offsets[<span class="hljs-string">&quot;offset_mapping&quot;</span>]`,wrap:!1}}),Ws=new x({props:{code:"JTVCKDAlMkMlMjAwKSUyQyUyMCgwJTJDJTIwMiklMkMlMjAoMyUyQyUyMDcpJTJDJTIwKDglMkMlMjAxMCklMkMlMjAoMTElMkMlMjAxMiklMkMlMjAoMTIlMkMlMjAxNCklMkMlMjAoMTQlMkMlMjAxNiklMkMlMjAoMTYlMkMlMjAxOCklMkMlMjAoMTklMkMlMjAyMiklMkMlMjAoMjMlMkMlMjAyNCklMkMlMjAoMjUlMkMlMjAyOSklMkMlMjAoMzAlMkMlMjAzMiklMkMlMEElMjAoMzMlMkMlMjAzNSklMkMlMjAoMzUlMkMlMjA0MCklMkMlMjAoNDElMkMlMjA0NSklMkMlMjAoNDYlMkMlMjA0OCklMkMlMjAoNDklMkMlMjA1NyklMkMlMjAoNTclMkMlMjA1OCklMkMlMjAoMCUyQyUyMDApJTVE",highlighted:`[(<span class="hljs-number">0</span>, <span class="hljs-number">0</span>), (<span class="hljs-number">0</span>, <span class="hljs-number">2</span>), (<span class="hljs-number">3</span>, <span class="hljs-number">7</span>), (<span class="hljs-number">8</span>, <span class="hljs-number">10</span>), (<span class="hljs-number">11</span>, <span class="hljs-number">12</span>), (<span class="hljs-number">12</span>, <span class="hljs-number">14</span>), (<span class="hljs-number">14</span>, <span class="hljs-number">16</span>), (<span class="hljs-number">16</span>, <span class="hljs-number">18</span>), (<span class="hljs-number">19</span>, <span class="hljs-number">22</span>), (<span class="hljs-number">23</span>, <span class="hljs-number">24</span>), (<span class="hljs-number">25</span>, <span class="hljs-number">29</span>), (<span class="hljs-number">30</span>, <span class="hljs-number">32</span>),
(<span class="hljs-number">33</span>, <span class="hljs-number">35</span>), (<span class="hljs-number">35</span>, <span class="hljs-number">40</span>), (<span class="hljs-number">41</span>, <span class="hljs-number">45</span>), (<span class="hljs-number">46</span>, <span class="hljs-number">48</span>), (<span class="hljs-number">49</span>, <span class="hljs-number">57</span>), (<span class="hljs-number">57</span>, <span class="hljs-number">58</span>), (<span class="hljs-number">0</span>, <span class="hljs-number">0</span>)]`,wrap:!1}}),Vs=new x({props:{code:"ZXhhbXBsZSU1QjEyJTNBMTQlNUQ=",highlighted:'example[<span class="hljs-number">12</span>:<span class="hljs-number">14</span>]',wrap:!1}}),Ds=new x({props:{code:"eWw=",highlighted:"yl",wrap:!1}}),qs=new x({props:{code:"cmVzdWx0cyUyMCUzRCUyMCU1QiU1RCUwQWlucHV0c193aXRoX29mZnNldHMlMjAlM0QlMjB0b2tlbml6ZXIoZXhhbXBsZSUyQyUyMHJldHVybl9vZmZzZXRzX21hcHBpbmclM0RUcnVlKSUwQXRva2VucyUyMCUzRCUyMGlucHV0c193aXRoX29mZnNldHMudG9rZW5zKCklMEFvZmZzZXRzJTIwJTNEJTIwaW5wdXRzX3dpdGhfb2Zmc2V0cyU1QiUyMm9mZnNldF9tYXBwaW5nJTIyJTVEJTBBJTBBZm9yJTIwaWR4JTJDJTIwcHJlZCUyMGluJTIwZW51bWVyYXRlKHByZWRpY3Rpb25zKSUzQSUwQSUyMCUyMCUyMCUyMGxhYmVsJTIwJTNEJTIwbW9kZWwuY29uZmlnLmlkMmxhYmVsJTVCcHJlZCU1RCUwQSUyMCUyMCUyMCUyMGlmJTIwbGFiZWwlMjAhJTNEJTIwJTIyTyUyMiUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0JTJDJTIwZW5kJTIwJTNEJTIwb2Zmc2V0cyU1QmlkeCU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJlc3VsdHMuYXBwZW5kKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVudGl0eSUyMiUzQSUyMGxhYmVsJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc2NvcmUlMjIlM0ElMjBwcm9iYWJpbGl0aWVzJTVCaWR4JTVEJTVCcHJlZCU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndvcmQlMjIlM0ElMjB0b2tlbnMlNUJpZHglNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFydCUyMiUzQSUyMHN0YXJ0JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW5kJTIyJTNBJTIwZW5kJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKSUwQSUwQXByaW50KHJlc3VsdHMp",highlighted:`results = []
inputs_with_offsets = tokenizer(example, return_offsets_mapping=<span class="hljs-literal">True</span>)
tokens = inputs_with_offsets.tokens()
offsets = inputs_with_offsets[<span class="hljs-string">&quot;offset_mapping&quot;</span>]
<span class="hljs-keyword">for</span> idx, pred <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(predictions):
label = model.config.id2label[pred]
<span class="hljs-keyword">if</span> label != <span class="hljs-string">&quot;O&quot;</span>:
start, end = offsets[idx]
results.append(
{
<span class="hljs-string">&quot;entity&quot;</span>: label,
<span class="hljs-string">&quot;score&quot;</span>: probabilities[idx][pred],
<span class="hljs-string">&quot;word&quot;</span>: tokens[idx],
<span class="hljs-string">&quot;start&quot;</span>: start,
<span class="hljs-string">&quot;end&quot;</span>: end,
}
)
<span class="hljs-built_in">print</span>(results)`,wrap:!1}}),Ys=new x({props:{code:"JTVCJTdCJ2VudGl0eSclM0ElMjAnSS1QRVInJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTk5MzgyOCUyQyUyMCdpbmRleCclM0ElMjA0JTJDJTIwJ3dvcmQnJTNBJTIwJ1MnJTJDJTIwJ3N0YXJ0JyUzQSUyMDExJTJDJTIwJ2VuZCclM0ElMjAxMiU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5ODE1NDc2JTJDJTIwJ2luZGV4JyUzQSUyMDUlMkMlMjAnd29yZCclM0ElMjAnJTIzJTIzeWwnJTJDJTIwJ3N0YXJ0JyUzQSUyMDEyJTJDJTIwJ2VuZCclM0ElMjAxNCU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5NTkwNzI1JTJDJTIwJ2luZGV4JyUzQSUyMDYlMkMlMjAnd29yZCclM0ElMjAnJTIzJTIzdmEnJTJDJTIwJ3N0YXJ0JyUzQSUyMDE0JTJDJTIwJ2VuZCclM0ElMjAxNiU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5OTIzMjclMkMlMjAnaW5kZXgnJTNBJTIwNyUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjNpbiclMkMlMjAnc3RhcnQnJTNBJTIwMTYlMkMlMjAnZW5kJyUzQSUyMDE4JTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1PUkcnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTczODkzMzQlMkMlMjAnaW5kZXgnJTNBJTIwMTIlMkMlMjAnd29yZCclM0ElMjAnSHUnJTJDJTIwJ3N0YXJ0JyUzQSUyMDMzJTJDJTIwJ2VuZCclM0ElMjAzNSU3RCUyQyUwQSUyMCU3QidlbnRpdHknJTNBJTIwJ0ktT1JHJyUyQyUyMCdzY29yZSclM0ElMjAwLjk3NjExNSUyQyUyMCdpbmRleCclM0ElMjAxMyUyQyUyMCd3b3JkJyUzQSUyMCclMjMlMjNnZ2luZyclMkMlMjAnc3RhcnQnJTNBJTIwMzUlMkMlMjAnZW5kJyUzQSUyMDQwJTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1PUkcnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTg4Nzk3NjYlMkMlMjAnaW5kZXgnJTNBJTIwMTQlMkMlMjAnd29yZCclM0ElMjAnRmFjZSclMkMlMjAnc3RhcnQnJTNBJTIwNDElMkMlMjAnZW5kJyUzQSUyMDQ1JTdEJTJDJTBBJTIwJTdCJ2VudGl0eSclM0ElMjAnSS1MT0MnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTkzMjEwNTUlMkMlMjAnaW5kZXgnJTNBJTIwMTYlMkMlMjAnd29yZCclM0ElMjAnQnJvb2tseW4nJTJDJTIwJ3N0YXJ0JyUzQSUyMDQ5JTJDJTIwJ2VuZCclM0ElMjA1NyU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9993828</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">4</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;S&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">11</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">12</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99815476</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">5</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##yl&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">14</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99590725</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">6</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##va&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">14</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">16</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9992327</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">7</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##in&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">16</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">18</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.97389334</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hu&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">33</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">35</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.976115</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">13</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##gging&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">35</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">40</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.98879766</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">14</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Face&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">41</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">45</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99321055</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">16</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Brooklyn&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">49</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]`,wrap:!1}}),Fs=new Je({props:{title:"Agrupando entidades",local:"agrupando-entidades",headingTag:"h3"}}),Ks=new x({props:{code:"ZXhhbXBsZSU1QjMzJTNBNDUlNUQ=",highlighted:'example[<span class="hljs-number">33</span>:<span class="hljs-number">45</span>]',wrap:!1}}),se=new x({props:{code:"SHVnZ2luZyUyMEZhY2U=",highlighted:"Hugging Face",wrap:!1}}),ae=new x({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBcmVzdWx0cyUyMCUzRCUyMCU1QiU1RCUwQWlucHV0c193aXRoX29mZnNldHMlMjAlM0QlMjB0b2tlbml6ZXIoZXhhbXBsZSUyQyUyMHJldHVybl9vZmZzZXRzX21hcHBpbmclM0RUcnVlKSUwQXRva2VucyUyMCUzRCUyMGlucHV0c193aXRoX29mZnNldHMudG9rZW5zKCklMEFvZmZzZXRzJTIwJTNEJTIwaW5wdXRzX3dpdGhfb2Zmc2V0cyU1QiUyMm9mZnNldF9tYXBwaW5nJTIyJTVEJTBBJTBBaWR4JTIwJTNEJTIwMCUwQXdoaWxlJTIwaWR4JTIwJTNDJTIwbGVuKHByZWRpY3Rpb25zKSUzQSUwQSUyMCUyMCUyMCUyMHByZWQlMjAlM0QlMjBwcmVkaWN0aW9ucyU1QmlkeCU1RCUwQSUyMCUyMCUyMCUyMGxhYmVsJTIwJTNEJTIwbW9kZWwuY29uZmlnLmlkMmxhYmVsJTVCcHJlZCU1RCUwQSUyMCUyMCUyMCUyMGlmJTIwbGFiZWwlMjAhJTNEJTIwJTIyTyUyMiUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFJlbW92ZW5kbyUyMG8lMjBCLSUyMG91JTIwSS0lMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsYWJlbCUyMCUzRCUyMGxhYmVsJTVCMiUzQSU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0JTJDJTIwXyUyMCUzRCUyMG9mZnNldHMlNUJpZHglNUQlMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBWYW1vcyUyMHBlZ2FyJTIwdG9kb3MlMjBvcyUyMHRva2VucyUyMHJvdHVsYWRvcyUyMGNvbSUyMEktJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYWxsX3Njb3JlcyUyMCUzRCUyMCU1QiU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHdoaWxlJTIwKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlkeCUyMCUzQyUyMGxlbihwcmVkaWN0aW9ucyklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBhbmQlMjBtb2RlbC5jb25maWcuaWQybGFiZWwlNUJwcmVkaWN0aW9ucyU1QmlkeCU1RCU1RCUyMCUzRCUzRCUyMGYlMjJJLSU3QmxhYmVsJTdEJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFsbF9zY29yZXMuYXBwZW5kKHByb2JhYmlsaXRpZXMlNUJpZHglNUQlNUJwcmVkJTVEKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMF8lMkMlMjBlbmQlMjAlM0QlMjBvZmZzZXRzJTVCaWR4JTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaWR4JTIwJTJCJTNEJTIwMSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEElMjBwb250dWElQzMlQTclQzMlQTNvJTIwJUMzJUE5JTIwYSUyMG0lQzMlQTlkaWElMjBkZSUyMHRvZGFzJTIwYXMlMjBwb250dWElQzMlQTclQzMlQjVlcyUyMGRvcyUyMHRva2VucyUyMGRhJTIwZW50aWRhZGUlMjBhZ3J1cGFkYSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNjb3JlJTIwJTNEJTIwbnAubWVhbihhbGxfc2NvcmVzKS5pdGVtKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3b3JkJTIwJTNEJTIwZXhhbXBsZSU1QnN0YXJ0JTNBZW5kJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmVzdWx0cy5hcHBlbmQoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW50aXR5X2dyb3VwJTIyJTNBJTIwbGFiZWwlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzY29yZSUyMiUzQSUyMHNjb3JlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyd29yZCUyMiUzQSUyMHdvcmQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFydCUyMiUzQSUyMHN0YXJ0JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW5kJTIyJTNBJTIwZW5kJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKSUwQSUyMCUyMCUyMCUyMGlkeCUyMCUyQiUzRCUyMDElMEElMEFwcmludChyZXN1bHRzKQ==",highlighted:`<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
results = []
inputs_with_offsets = tokenizer(example, return_offsets_mapping=<span class="hljs-literal">True</span>)
tokens = inputs_with_offsets.tokens()
offsets = inputs_with_offsets[<span class="hljs-string">&quot;offset_mapping&quot;</span>]
idx = <span class="hljs-number">0</span>
<span class="hljs-keyword">while</span> idx &lt; <span class="hljs-built_in">len</span>(predictions):
pred = predictions[idx]
label = model.config.id2label[pred]
<span class="hljs-keyword">if</span> label != <span class="hljs-string">&quot;O&quot;</span>:
<span class="hljs-comment"># Removendo o B- ou I-</span>
label = label[<span class="hljs-number">2</span>:]
start, _ = offsets[idx]
<span class="hljs-comment"># Vamos pegar todos os tokens rotulados com I-</span>
all_scores = []
<span class="hljs-keyword">while</span> (
idx &lt; <span class="hljs-built_in">len</span>(predictions)
<span class="hljs-keyword">and</span> model.config.id2label[predictions[idx]] == <span class="hljs-string">f&quot;I-<span class="hljs-subst">{label}</span>&quot;</span>
):
all_scores.append(probabilities[idx][pred])
_, end = offsets[idx]
idx += <span class="hljs-number">1</span>
<span class="hljs-comment"># A pontuação é a média de todas as pontuações dos tokens da entidade agrupada</span>
score = np.mean(all_scores).item()
word = example[start:end]
results.append(
{
<span class="hljs-string">&quot;entity_group&quot;</span>: label,
<span class="hljs-string">&quot;score&quot;</span>: score,
<span class="hljs-string">&quot;word&quot;</span>: word,
<span class="hljs-string">&quot;start&quot;</span>: start,
<span class="hljs-string">&quot;end&quot;</span>: end,
}
)
idx += <span class="hljs-number">1</span>
<span class="hljs-built_in">print</span>(results)`,wrap:!1}}),le=new x({props:{code:"JTVCJTdCJ2VudGl0eV9ncm91cCclM0ElMjAnUEVSJyUyQyUyMCdzY29yZSclM0ElMjAwLjk5ODE2OTQlMkMlMjAnd29yZCclM0ElMjAnU3lsdmFpbiclMkMlMjAnc3RhcnQnJTNBJTIwMTElMkMlMjAnZW5kJyUzQSUyMDE4JTdEJTJDJTBBJTIwJTdCJ2VudGl0eV9ncm91cCclM0ElMjAnT1JHJyUyQyUyMCdzY29yZSclM0ElMjAwLjk3OTYwMjA0JTJDJTIwJ3dvcmQnJTNBJTIwJ0h1Z2dpbmclMjBGYWNlJyUyQyUyMCdzdGFydCclM0ElMjAzMyUyQyUyMCdlbmQnJTNBJTIwNDUlN0QlMkMlMEElMjAlN0InZW50aXR5X2dyb3VwJyUzQSUyMCdMT0MnJTJDJTIwJ3Njb3JlJyUzQSUyMDAuOTkzMjEwNTUlMkMlMjAnd29yZCclM0ElMjAnQnJvb2tseW4nJTJDJTIwJ3N0YXJ0JyUzQSUyMDQ5JTJDJTIwJ2VuZCclM0ElMjA1NyU3RCU1RA==",highlighted:`[{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;PER&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9981694</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Sylvain&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">11</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">18</span>},
{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.97960204</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hugging Face&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">33</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">45</span>},
{<span class="hljs-string">&#x27;entity_group&#x27;</span>: <span class="hljs-string">&#x27;LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.99321055</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Brooklyn&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">49</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]`,wrap:!1}}),pe=new el({props:{source:"https://github.com/huggingface/course/blob/main/chapters/pt/chapter6/3.mdx"}}),{c(){l=j("meta"),u=t(),o=j("p"),T=t(),M(k.$$.fragment),h=t(),M(I.$$.fragment),A=t(),C.c(),f=t(),$=j("p"),$.innerHTML=r,g=t(),M(W.$$.fragment),je=t(),H=j("p"),H.innerHTML=Pa,ye=t(),V=j("table"),V.innerHTML=Oa,Te=t(),M(E.$$.fragment),Ue=t(),M(X.$$.fragment),xe=t(),M(D.$$.fragment),he=t(),G=j("p"),G.innerHTML=Ka,we=t(),q=j("p"),q.innerHTML=sn,be=t(),Y=j("p"),Y.textContent=en,ge=t(),M(L.$$.fragment),fe=t(),F=j("p"),F.innerHTML=an,ke=t(),M(P.$$.fragment),Ce=t(),O=j("p"),O.innerHTML=nn,Ie=t(),M(K.$$.fragment),$e=t(),M(ss.$$.fragment),Ae=t(),es=j("p"),es.innerHTML=ln,Qe=t(),M(as.$$.fragment),ze=t(),M(ns.$$.fragment),ve=t(),ls=j("p"),ls.textContent=tn,Ze=t(),M(ts.$$.fragment),Be=t(),M(ps.$$.fragment),Ne=t(),os=j("p"),os.innerHTML=pn,Ee=t(),M(rs.$$.fragment),Se=t(),M(is.$$.fragment),Re=t(),cs=j("p"),cs.innerHTML=on,_e=t(),M(S.$$.fragment),We=t(),Ms=j("p"),Ms.innerHTML=rn,He=t(),ds=j("p"),ds.innerHTML=cn,Ve=t(),M(ms.$$.fragment),Xe=t(),M(Js.$$.fragment),De=t(),us=j("p"),us.innerHTML=Mn,Ge=t(),M(R.$$.fragment),qe=t(),M(js.$$.fragment),Ye=t(),ys=j("p"),ys.innerHTML=dn,Le=t(),z.c(),Me=t(),M(Ts.$$.fragment),Fe=t(),Us=j("p"),Us.innerHTML=mn,Pe=t(),M(xs.$$.fragment),Oe=t(),M(hs.$$.fragment),Ke=t(),ws=j("p"),ws.textContent=Jn,sa=t(),M(bs.$$.fragment),ea=t(),M(gs.$$.fragment),aa=t(),fs=j("p"),fs.innerHTML=un,na=t(),ks=j("ul"),ks.innerHTML=jn,la=t(),Cs=j("p"),Cs.innerHTML=yn,ta=t(),M(Is.$$.fragment),pa=t(),Z.c(),de=t(),$s=j("p"),$s.textContent=Tn,oa=t(),N.c(),me=t(),M(As.$$.fragment),ra=t(),Qs=j("p"),Qs.innerHTML=Un,ia=t(),M(zs.$$.fragment),ca=t(),M(vs.$$.fragment),Ma=t(),Zs=j("p"),Zs.innerHTML=xn,da=t(),Bs=j("p"),Bs.innerHTML=hn,ma=t(),_=j("div"),_.innerHTML=wn,Ja=t(),Ns=j("p"),Ns.innerHTML=bn,ua=t(),M(Es.$$.fragment),ja=t(),M(Ss.$$.fragment),ya=t(),Rs=j("p"),Rs.innerHTML=gn,Ta=t(),M(_s.$$.fragment),Ua=t(),M(Ws.$$.fragment),xa=t(),Hs=j("p"),Hs.innerHTML=fn,ha=t(),M(Vs.$$.fragment),wa=t(),Xs=j("p"),Xs.innerHTML=kn,ba=t(),M(Ds.$$.fragment),ga=t(),Gs=j("p"),Gs.textContent=Cn,fa=t(),M(qs.$$.fragment),ka=t(),M(Ys.$$.fragment),Ca=t(),Ls=j("p"),Ls.textContent=In,Ia=t(),M(Fs.$$.fragment),$a=t(),Ps=j("p"),Ps.innerHTML=$n,Aa=t(),Os=j("p"),Os.innerHTML=An,Qa=t(),M(Ks.$$.fragment),za=t(),M(se.$$.fragment),va=t(),ee=j("p"),ee.innerHTML=Qn,Za=t(),M(ae.$$.fragment),Ba=t(),ne=j("p"),ne.textContent=zn,Na=t(),M(le.$$.fragment),Ea=t(),te=j("p"),te.textContent=vn,Sa=t(),M(pe.$$.fragment),Ra=t(),ue=j("p"),this.h()},l(s){const e=On("svelte-u9bgzb",document.head);l=y(e,"META",{name:!0,content:!0}),e.forEach(a),u=p(s),o=y(s,"P",{}),Gn(o).forEach(a),T=p(s),d(k.$$.fragment,s),h=p(s),d(I.$$.fragment,s),A=p(s),C.l(s),f=p(s),$=y(s,"P",{"data-svelte-h":!0}),U($)!=="svelte-1jitoog"&&($.innerHTML=r),g=p(s),d(W.$$.fragment,s),je=p(s),H=y(s,"P",{"data-svelte-h":!0}),U(H)!=="svelte-15b804s"&&(H.innerHTML=Pa),ye=p(s),V=y(s,"TABLE",{"data-svelte-h":!0}),U(V)!=="svelte-1hjgutj"&&(V.innerHTML=Oa),Te=p(s),d(E.$$.fragment,s),Ue=p(s),d(X.$$.fragment,s),xe=p(s),d(D.$$.fragment,s),he=p(s),G=y(s,"P",{"data-svelte-h":!0}),U(G)!=="svelte-1viig75"&&(G.innerHTML=Ka),we=p(s),q=y(s,"P",{"data-svelte-h":!0}),U(q)!=="svelte-1u5skgk"&&(q.innerHTML=sn),be=p(s),Y=y(s,"P",{"data-svelte-h":!0}),U(Y)!=="svelte-1cd6wox"&&(Y.textContent=en),ge=p(s),d(L.$$.fragment,s),fe=p(s),F=y(s,"P",{"data-svelte-h":!0}),U(F)!=="svelte-1yguwg6"&&(F.innerHTML=an),ke=p(s),d(P.$$.fragment,s),Ce=p(s),O=y(s,"P",{"data-svelte-h":!0}),U(O)!=="svelte-ub2nw7"&&(O.innerHTML=nn),Ie=p(s),d(K.$$.fragment,s),$e=p(s),d(ss.$$.fragment,s),Ae=p(s),es=y(s,"P",{"data-svelte-h":!0}),U(es)!=="svelte-1f3fl4t"&&(es.innerHTML=ln),Qe=p(s),d(as.$$.fragment,s),ze=p(s),d(ns.$$.fragment,s),ve=p(s),ls=y(s,"P",{"data-svelte-h":!0}),U(ls)!=="svelte-1i4znjt"&&(ls.textContent=tn),Ze=p(s),d(ts.$$.fragment,s),Be=p(s),d(ps.$$.fragment,s),Ne=p(s),os=y(s,"P",{"data-svelte-h":!0}),U(os)!=="svelte-1u83blj"&&(os.innerHTML=pn),Ee=p(s),d(rs.$$.fragment,s),Se=p(s),d(is.$$.fragment,s),Re=p(s),cs=y(s,"P",{"data-svelte-h":!0}),U(cs)!=="svelte-1ros2ng"&&(cs.innerHTML=on),_e=p(s),d(S.$$.fragment,s),We=p(s),Ms=y(s,"P",{"data-svelte-h":!0}),U(Ms)!=="svelte-sxhiau"&&(Ms.innerHTML=rn),He=p(s),ds=y(s,"P",{"data-svelte-h":!0}),U(ds)!=="svelte-1kolpwn"&&(ds.innerHTML=cn),Ve=p(s),d(ms.$$.fragment,s),Xe=p(s),d(Js.$$.fragment,s),De=p(s),us=y(s,"P",{"data-svelte-h":!0}),U(us)!=="svelte-meb73i"&&(us.innerHTML=Mn),Ge=p(s),d(R.$$.fragment,s),qe=p(s),d(js.$$.fragment,s),Ye=p(s),ys=y(s,"P",{"data-svelte-h":!0}),U(ys)!=="svelte-mhniwm"&&(ys.innerHTML=dn),Le=p(s),z.l(s),Me=p(s),d(Ts.$$.fragment,s),Fe=p(s),Us=y(s,"P",{"data-svelte-h":!0}),U(Us)!=="svelte-168l0sz"&&(Us.innerHTML=mn),Pe=p(s),d(xs.$$.fragment,s),Oe=p(s),d(hs.$$.fragment,s),Ke=p(s),ws=y(s,"P",{"data-svelte-h":!0}),U(ws)!=="svelte-18rkslh"&&(ws.textContent=Jn),sa=p(s),d(bs.$$.fragment,s),ea=p(s),d(gs.$$.fragment,s),aa=p(s),fs=y(s,"P",{"data-svelte-h":!0}),U(fs)!=="svelte-t2vyer"&&(fs.innerHTML=un),na=p(s),ks=y(s,"UL",{"data-svelte-h":!0}),U(ks)!=="svelte-1uxjhjt"&&(ks.innerHTML=jn),la=p(s),Cs=y(s,"P",{"data-svelte-h":!0}),U(Cs)!=="svelte-s5kce"&&(Cs.innerHTML=yn),ta=p(s),d(Is.$$.fragment,s),pa=p(s),Z.l(s),de=p(s),$s=y(s,"P",{"data-svelte-h":!0}),U($s)!=="svelte-w2qpc1"&&($s.textContent=Tn),oa=p(s),N.l(s),me=p(s),d(As.$$.fragment,s),ra=p(s),Qs=y(s,"P",{"data-svelte-h":!0}),U(Qs)!=="svelte-1hnvqpu"&&(Qs.innerHTML=Un),ia=p(s),d(zs.$$.fragment,s),ca=p(s),d(vs.$$.fragment,s),Ma=p(s),Zs=y(s,"P",{"data-svelte-h":!0}),U(Zs)!=="svelte-1n2pna6"&&(Zs.innerHTML=xn),da=p(s),Bs=y(s,"P",{"data-svelte-h":!0}),U(Bs)!=="svelte-gaed70"&&(Bs.innerHTML=hn),ma=p(s),_=y(s,"DIV",{class:!0,"data-svelte-h":!0}),U(_)!=="svelte-qf2tof"&&(_.innerHTML=wn),Ja=p(s),Ns=y(s,"P",{"data-svelte-h":!0}),U(Ns)!=="svelte-1iiizsq"&&(Ns.innerHTML=bn),ua=p(s),d(Es.$$.fragment,s),ja=p(s),d(Ss.$$.fragment,s),ya=p(s),Rs=y(s,"P",{"data-svelte-h":!0}),U(Rs)!=="svelte-12aappk"&&(Rs.innerHTML=gn),Ta=p(s),d(_s.$$.fragment,s),Ua=p(s),d(Ws.$$.fragment,s),xa=p(s),Hs=y(s,"P",{"data-svelte-h":!0}),U(Hs)!=="svelte-1bd52yi"&&(Hs.innerHTML=fn),ha=p(s),d(Vs.$$.fragment,s),wa=p(s),Xs=y(s,"P",{"data-svelte-h":!0}),U(Xs)!=="svelte-1lq59os"&&(Xs.innerHTML=kn),ba=p(s),d(Ds.$$.fragment,s),ga=p(s),Gs=y(s,"P",{"data-svelte-h":!0}),U(Gs)!=="svelte-16rx5s6"&&(Gs.textContent=Cn),fa=p(s),d(qs.$$.fragment,s),ka=p(s),d(Ys.$$.fragment,s),Ca=p(s),Ls=y(s,"P",{"data-svelte-h":!0}),U(Ls)!=="svelte-qxmlm5"&&(Ls.textContent=In),Ia=p(s),d(Fs.$$.fragment,s),$a=p(s),Ps=y(s,"P",{"data-svelte-h":!0}),U(Ps)!=="svelte-15bf4s6"&&(Ps.innerHTML=$n),Aa=p(s),Os=y(s,"P",{"data-svelte-h":!0}),U(Os)!=="svelte-4ed2tv"&&(Os.innerHTML=An),Qa=p(s),d(Ks.$$.fragment,s),za=p(s),d(se.$$.fragment,s),va=p(s),ee=y(s,"P",{"data-svelte-h":!0}),U(ee)!=="svelte-1ap6akw"&&(ee.innerHTML=Qn),Za=p(s),d(ae.$$.fragment,s),Ba=p(s),ne=y(s,"P",{"data-svelte-h":!0}),U(ne)!=="svelte-ahkyax"&&(ne.textContent=zn),Na=p(s),d(le.$$.fragment,s),Ea=p(s),te=y(s,"P",{"data-svelte-h":!0}),U(te)!=="svelte-yul2c3"&&(te.textContent=vn),Sa=p(s),d(pe.$$.fragment,s),Ra=p(s),ue=y(s,"P",{}),Gn(ue).forEach(a),this.h()},h(){Ya(l,"name","hf:doc:metadata"),Ya(l,"content",Jl),Ya(_,"class","flex justify-center")},m(s,e){Kn(document.head,l),n(s,u,e),n(s,o,e),n(s,T,e),m(k,s,e),n(s,h,e),m(I,s,e),n(s,A,e),oe[w].m(s,e),n(s,f,e),n(s,$,e),n(s,g,e),m(W,s,e),n(s,je,e),n(s,H,e),n(s,ye,e),n(s,V,e),n(s,Te,e),m(E,s,e),n(s,Ue,e),m(X,s,e),n(s,xe,e),m(D,s,e),n(s,he,e),n(s,G,e),n(s,we,e),n(s,q,e),n(s,be,e),n(s,Y,e),n(s,ge,e),m(L,s,e),n(s,fe,e),n(s,F,e),n(s,ke,e),m(P,s,e),n(s,Ce,e),n(s,O,e),n(s,Ie,e),m(K,s,e),n(s,$e,e),m(ss,s,e),n(s,Ae,e),n(s,es,e),n(s,Qe,e),m(as,s,e),n(s,ze,e),m(ns,s,e),n(s,ve,e),n(s,ls,e),n(s,Ze,e),m(ts,s,e),n(s,Be,e),m(ps,s,e),n(s,Ne,e),n(s,os,e),n(s,Ee,e),m(rs,s,e),n(s,Se,e),m(is,s,e),n(s,Re,e),n(s,cs,e),n(s,_e,e),m(S,s,e),n(s,We,e),n(s,Ms,e),n(s,He,e),n(s,ds,e),n(s,Ve,e),m(ms,s,e),n(s,Xe,e),m(Js,s,e),n(s,De,e),n(s,us,e),n(s,Ge,e),m(R,s,e),n(s,qe,e),m(js,s,e),n(s,Ye,e),n(s,ys,e),n(s,Le,e),re[Q].m(s,e),n(s,Me,e),m(Ts,s,e),n(s,Fe,e),n(s,Us,e),n(s,Pe,e),m(xs,s,e),n(s,Oe,e),m(hs,s,e),n(s,Ke,e),n(s,ws,e),n(s,sa,e),m(bs,s,e),n(s,ea,e),m(gs,s,e),n(s,aa,e),n(s,fs,e),n(s,na,e),n(s,ks,e),n(s,la,e),n(s,Cs,e),n(s,ta,e),m(Is,s,e),n(s,pa,e),ie[v].m(s,e),n(s,de,e),n(s,$s,e),n(s,oa,e),ce[B].m(s,e),n(s,me,e),m(As,s,e),n(s,ra,e),n(s,Qs,e),n(s,ia,e),m(zs,s,e),n(s,ca,e),m(vs,s,e),n(s,Ma,e),n(s,Zs,e),n(s,da,e),n(s,Bs,e),n(s,ma,e),n(s,_,e),n(s,Ja,e),n(s,Ns,e),n(s,ua,e),m(Es,s,e),n(s,ja,e),m(Ss,s,e),n(s,ya,e),n(s,Rs,e),n(s,Ta,e),m(_s,s,e),n(s,Ua,e),m(Ws,s,e),n(s,xa,e),n(s,Hs,e),n(s,ha,e),m(Vs,s,e),n(s,wa,e),n(s,Xs,e),n(s,ba,e),m(Ds,s,e),n(s,ga,e),n(s,Gs,e),n(s,fa,e),m(qs,s,e),n(s,ka,e),m(Ys,s,e),n(s,Ca,e),n(s,Ls,e),n(s,Ia,e),m(Fs,s,e),n(s,$a,e),n(s,Ps,e),n(s,Aa,e),n(s,Os,e),n(s,Qa,e),m(Ks,s,e),n(s,za,e),m(se,s,e),n(s,va,e),n(s,ee,e),n(s,Za,e),m(ae,s,e),n(s,Ba,e),n(s,ne,e),n(s,Na,e),m(le,s,e),n(s,Ea,e),n(s,te,e),n(s,Sa,e),m(pe,s,e),n(s,Ra,e),n(s,ue,e),_a=!0},p(s,[e]){const Hn={};e&1&&(Hn.fw=s[0]),k.$set(Hn);let Xa=w;w=Bn(s),w!==Xa&&(Ha(),i(oe[Xa],1,1,()=>{oe[Xa]=null}),Wa(),C=oe[w],C||(C=oe[w]=Zn[w](s),C.c()),c(C,1),C.m(f.parentNode,f));const Vn={};e&2&&(Vn.$$scope={dirty:e,ctx:s}),E.$set(Vn);const Xn={};e&2&&(Xn.$$scope={dirty:e,ctx:s}),S.$set(Xn);const Dn={};e&2&&(Dn.$$scope={dirty:e,ctx:s}),R.$set(Dn);let Da=Q;Q=En(s),Q!==Da&&(Ha(),i(re[Da],1,1,()=>{re[Da]=null}),Wa(),z=re[Q],z||(z=re[Q]=Nn[Q](s),z.c()),c(z,1),z.m(Me.parentNode,Me));let Ga=v;v=Rn(s),v!==Ga&&(Ha(),i(ie[Ga],1,1,()=>{ie[Ga]=null}),Wa(),Z=ie[v],Z||(Z=ie[v]=Sn[v](s),Z.c()),c(Z,1),Z.m(de.parentNode,de));let qa=B;B=Wn(s),B!==qa&&(Ha(),i(ce[qa],1,1,()=>{ce[qa]=null}),Wa(),N=ce[B],N||(N=ce[B]=_n[B](s),N.c()),c(N,1),N.m(me.parentNode,me))},i(s){_a||(c(k.$$.fragment,s),c(I.$$.fragment,s),c(C),c(W.$$.fragment,s),c(E.$$.fragment,s),c(X.$$.fragment,s),c(D.$$.fragment,s),c(L.$$.fragment,s),c(P.$$.fragment,s),c(K.$$.fragment,s),c(ss.$$.fragment,s),c(as.$$.fragment,s),c(ns.$$.fragment,s),c(ts.$$.fragment,s),c(ps.$$.fragment,s),c(rs.$$.fragment,s),c(is.$$.fragment,s),c(S.$$.fragment,s),c(ms.$$.fragment,s),c(Js.$$.fragment,s),c(R.$$.fragment,s),c(js.$$.fragment,s),c(z),c(Ts.$$.fragment,s),c(xs.$$.fragment,s),c(hs.$$.fragment,s),c(bs.$$.fragment,s),c(gs.$$.fragment,s),c(Is.$$.fragment,s),c(Z),c(N),c(As.$$.fragment,s),c(zs.$$.fragment,s),c(vs.$$.fragment,s),c(Es.$$.fragment,s),c(Ss.$$.fragment,s),c(_s.$$.fragment,s),c(Ws.$$.fragment,s),c(Vs.$$.fragment,s),c(Ds.$$.fragment,s),c(qs.$$.fragment,s),c(Ys.$$.fragment,s),c(Fs.$$.fragment,s),c(Ks.$$.fragment,s),c(se.$$.fragment,s),c(ae.$$.fragment,s),c(le.$$.fragment,s),c(pe.$$.fragment,s),_a=!0)},o(s){i(k.$$.fragment,s),i(I.$$.fragment,s),i(C),i(W.$$.fragment,s),i(E.$$.fragment,s),i(X.$$.fragment,s),i(D.$$.fragment,s),i(L.$$.fragment,s),i(P.$$.fragment,s),i(K.$$.fragment,s),i(ss.$$.fragment,s),i(as.$$.fragment,s),i(ns.$$.fragment,s),i(ts.$$.fragment,s),i(ps.$$.fragment,s),i(rs.$$.fragment,s),i(is.$$.fragment,s),i(S.$$.fragment,s),i(ms.$$.fragment,s),i(Js.$$.fragment,s),i(R.$$.fragment,s),i(js.$$.fragment,s),i(z),i(Ts.$$.fragment,s),i(xs.$$.fragment,s),i(hs.$$.fragment,s),i(bs.$$.fragment,s),i(gs.$$.fragment,s),i(Is.$$.fragment,s),i(Z),i(N),i(As.$$.fragment,s),i(zs.$$.fragment,s),i(vs.$$.fragment,s),i(Es.$$.fragment,s),i(Ss.$$.fragment,s),i(_s.$$.fragment,s),i(Ws.$$.fragment,s),i(Vs.$$.fragment,s),i(Ds.$$.fragment,s),i(qs.$$.fragment,s),i(Ys.$$.fragment,s),i(Fs.$$.fragment,s),i(Ks.$$.fragment,s),i(se.$$.fragment,s),i(ae.$$.fragment,s),i(le.$$.fragment,s),i(pe.$$.fragment,s),_a=!1},d(s){s&&(a(u),a(o),a(T),a(h),a(A),a(f),a($),a(g),a(je),a(H),a(ye),a(V),a(Te),a(Ue),a(xe),a(he),a(G),a(we),a(q),a(be),a(Y),a(ge),a(fe),a(F),a(ke),a(Ce),a(O),a(Ie),a($e),a(Ae),a(es),a(Qe),a(ze),a(ve),a(ls),a(Ze),a(Be),a(Ne),a(os),a(Ee),a(Se),a(Re),a(cs),a(_e),a(We),a(Ms),a(He),a(ds),a(Ve),a(Xe),a(De),a(us),a(Ge),a(qe),a(Ye),a(ys),a(Le),a(Me),a(Fe),a(Us),a(Pe),a(Oe),a(Ke),a(ws),a(sa),a(ea),a(aa),a(fs),a(na),a(ks),a(la),a(Cs),a(ta),a(pa),a(de),a($s),a(oa),a(me),a(ra),a(Qs),a(ia),a(ca),a(Ma),a(Zs),a(da),a(Bs),a(ma),a(_),a(Ja),a(Ns),a(ua),a(ja),a(ya),a(Rs),a(Ta),a(Ua),a(xa),a(Hs),a(ha),a(wa),a(Xs),a(ba),a(ga),a(Gs),a(fa),a(ka),a(Ca),a(Ls),a(Ia),a($a),a(Ps),a(Aa),a(Os),a(Qa),a(za),a(va),a(ee),a(Za),a(Ba),a(ne),a(Na),a(Ea),a(te),a(Sa),a(Ra),a(ue)),a(l),J(k,s),J(I,s),oe[w].d(s),J(W,s),J(E,s),J(X,s),J(D,s),J(L,s),J(P,s),J(K,s),J(ss,s),J(as,s),J(ns,s),J(ts,s),J(ps,s),J(rs,s),J(is,s),J(S,s),J(ms,s),J(Js,s),J(R,s),J(js,s),re[Q].d(s),J(Ts,s),J(xs,s),J(hs,s),J(bs,s),J(gs,s),J(Is,s),ie[v].d(s),ce[B].d(s),J(As,s),J(zs,s),J(vs,s),J(Es,s),J(Ss,s),J(_s,s),J(Ws,s),J(Vs,s),J(Ds,s),J(qs,s),J(Ys,s),J(Fs,s),J(Ks,s),J(se,s),J(ae,s),J(le,s),J(pe,s)}}}const Jl='{"title":"Os poderes especiais dos tokenizadores rápidos","local":"os-poderes-especiais-dos-tokenizadores-rápidos","sections":[{"title":"Codificação em lote","local":"codificação-em-lote","sections":[],"depth":2},{"title":"Dentro do pipeline token-classification","local":"dentro-do-pipeline-token-classification","sections":[{"title":"Obtendo os resultados básicos com o pipeline","local":"obtendo-os-resultados-básicos-com-o-pipeline","sections":[],"depth":3},{"title":"Das entradas às previsões","local":"das-entradas-às-previsões","sections":[],"depth":3},{"title":"Agrupando entidades","local":"agrupando-entidades","sections":[],"depth":3}],"depth":2}],"depth":1}';function ul(b,l,u){let o="pt";return Ln(()=>{const T=new URLSearchParams(window.location.search);u(0,o=T.get("fw")||"pt")}),[o]}class gl extends Fn{constructor(l){super(),Pn(this,l,ul,ml,Yn,{})}}export{gl as component};

Xet Storage Details

Size:
82 kB
·
Xet hash:
be8e89eba955336fdb83df244f792706b47799a27e982f4534b8d23f1eea5339

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.