Buckets:

rtrm's picture
download
raw
23.8 kB
import{s as ls,o as ts}from"../chunks/scheduler.37c15a92.js";import{S as as,i as ns,g as b,s as o,r as m,A as os,h as J,f as l,c as i,j as Le,u,x as T,k as Ke,y as is,a,v as M,t as r,b as Oe,d as c,w as d,p as es}from"../chunks/index.2bf4358c.js";import{C as j}from"../chunks/CodeBlock.4e987730.js";import{C as ss}from"../chunks/CourseFloatingBanner.6add7356.js";import{F as ps}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as ve,E as rs}from"../chunks/getInferenceSnippets.24b50994.js";function cs(w){let n,p;return n=new ss({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/pt/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/pt/chapter2/section6_tf.ipynb"}]}}),{c(){m(n.$$.fragment)},l(t){u(n.$$.fragment,t)},m(t,y){M(n,t,y),p=!0},i(t){p||(c(n.$$.fragment,t),p=!0)},o(t){r(n.$$.fragment,t),p=!1},d(t){d(n,t)}}}function ms(w){let n,p;return n=new ss({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/pt/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/pt/chapter2/section6_pt.ipynb"}]}}),{c(){m(n.$$.fragment)},l(t){u(n.$$.fragment,t)},m(t,y){M(n,t,y),p=!0},i(t){p||(c(n.$$.fragment,t),p=!0)},o(t){r(n.$$.fragment,t),p=!1},d(t){d(n,t)}}}function us(w){let n,p;return n=new j({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){m(n.$$.fragment)},l(t){u(n.$$.fragment,t)},m(t,y){M(n,t,y),p=!0},i(t){p||(c(n.$$.fragment,t),p=!0)},o(t){r(n.$$.fragment,t),p=!1},d(t){d(n,t)}}}function Ms(w){let n,p;return n=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){m(n.$$.fragment)},l(t){u(n.$$.fragment,t)},m(t,y){M(n,t,y),p=!0},i(t){p||(c(n.$$.fragment,t),p=!0)},o(t){r(n.$$.fragment,t),p=!1},d(t){d(n,t)}}}function ds(w){let n,p,t,y,Z,te,V,ae,f,h,ee,k,qe="Nas últimas seções, temos feito o nosso melhor para fazer a maior parte do trabalho à mão. Exploramos como funcionam os tokenizers e analisamos a tokenização, conversão para IDs de entrada, padding, truncagem e máscaras de atenção.",ne,g,Ge="Entretanto, como vimos na seção 2, a API dos 🤗 Transformers pode tratar de tudo isso para nós com uma função de alto nível, na qual mergulharemos aqui. Quando você chama seu <code>tokenizer</code> diretamente na frase, você recebe de volta entradas que estão prontas para passar pelo seu modelo:",oe,$,ie,W,Xe="Aqui, a variável <code>model_inputs</code> contém tudo o que é necessário para que um modelo funcione bem. Para DistilBERT, isso inclui os IDs de entrada, bem como a máscara de atenção. Outros modelos que aceitam entradas adicionais também terão essas saídas pelo objeto <code>tokenizer</code>.",pe,B,Ee="Como veremos em alguns exemplos abaixo, este método é muito poderoso. Primeiro, ele pode simbolizar uma única sequência:",re,z,ce,v,Ne="Também lida com várias sequências de cada vez, sem nenhuma mudança na API:",me,q,ue,G,xe="Ela pode ser aplicada de acordo com vários objetivos:",Me,X,de,E,_e="Também pode truncar sequências:",be,N,Je,x,Se="O objeto <code>tokenizer</code> pode lidar com a conversão para tensores de estrutura específicos, que podem então ser enviados diretamente para o modelo. Por exemplo, na seguinte amostra de código, estamos solicitando que o tokenizer retorne tensores de diferentes estruturas - <code>&quot;pt&quot;</code> retorna tensores PyTorch, <code>&quot;tf&quot;</code> retorna tensores TensorFlow, e <code>&quot;np&quot;</code> retorna arrays NumPy:",ye,_,Te,S,je,Q,Qe="Se dermos uma olhada nos IDs de entrada devolvidos pelo tokenizer, veremos que eles são um pouco diferentes do que tínhamos anteriormente:",we,R,fe,Y,he,C,Re="Um token ID foi adicionada no início e uma no final. Vamos decodificar as duas sequências de IDs acima para ver do que se trata:",Ie,F,Ue,H,Ze,D,Ye="O tokenizer acrescentou a palavra especial <code>[CLS]</code> no início e a palavra especial <code>[SEP]</code> no final. Isto porque o modelo foi pré-treinado com esses, então para obter os mesmos resultados para inferência, precisamos adicioná-los também. Note que alguns modelos não acrescentam palavras especiais, ou acrescentam palavras diferentes; os modelos também podem acrescentar estas palavras especiais apenas no início, ou apenas no final. Em qualquer caso, o tokenizer sabe quais são as palavras que são esperadas e tratará disso para você.",Ve,A,ke,P,Ce="Agora que já vimos todos os passos individuais que o objeto <code>tokenizer</code> utiliza quando aplicado em textos, vamos ver uma última vez como ele pode lidar com múltiplas sequências (padding!), sequências muito longas (truncagem!), e múltiplos tipos de tensores com seu API principal:",ge,I,U,se,L,$e,le,We;Z=new ps({props:{fw:w[0]}}),V=new ve({props:{title:"Colocando tudo junto",local:"colocando-tudo-junto",headingTag:"h1"}});const Fe=[ms,cs],K=[];function He(e,s){return e[0]==="pt"?0:1}f=He(w),h=K[f]=Fe[f](w),$=new j({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),z=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),q=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
model_inputs = tokenizer(sequences)`,wrap:!1}}),X=new j({props:{code:"JTIzJTIwSXIlQzMlQTElMjBwcmVlbmNoZXIlMjBhcyUyMHNlcXUlQzMlQUFuY2lhcyUyMGF0JUMzJUE5JTIwbyUyMGNvbXByaW1lbnRvJTIwbSVDMyVBMXhpbW8lMjBkYSUyMHNlcXUlQzMlQUFuY2lhJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0QlMjJsb25nZXN0JTIyKSUwQSUwQSUyMyUyMElyJUMzJUExJTIwcHJlZW5jaGVyJTIwYXMlMjBzZXF1JUMzJUFBbmNpYXMlMjBhdCVDMyVBOSUyMG8lMjBjb21wcmltZW50byUyMG0lQzMlQTF4aW1vJTIwZG8lMjBtb2RlbG8lMEElMjMlMjAoNTEyJTIwcGFyYSUyMG8lMjBtb2RlbG8lMjBCRVJUJTIwb3UlMjBEaXN0aWxCRVJUKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiklMEElMEElMjMlMjBJciVDMyVBMSUyMHByZWVuY2hlciUyMGFzJTIwc2VxdSVDMyVBQW5jaWFzJTIwYXQlQzMlQTklMjBvJTIwY29tcHJpbWVudG8lMjBtJUMzJUExeGltbyUyMGVzcGVjaWZpY2FkbyUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiUyQyUyMG1heF9sZW5ndGglM0Q4KQ==",highlighted:`<span class="hljs-comment"># Irá preencher as sequências até o comprimento máximo da sequência</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;longest&quot;</span>)
<span class="hljs-comment"># Irá preencher as sequências até o comprimento máximo do modelo</span>
<span class="hljs-comment"># (512 para o modelo BERT ou DistilBERT)</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>)
<span class="hljs-comment"># Irá preencher as sequências até o comprimento máximo especificado</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),N=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBJciVDMyVBMSUyMHByZWVuY2hlciUyMGFzJTIwc2VxdSVDMyVBQW5jaWFzJTIwYXQlQzMlQTklMjBvJTIwY29tcHJpbWVudG8lMjBtJUMzJUExeGltbyUyMGRvJTIwbW9kZWxvJTBBJTIzJTIwKDUxMiUyMHBhcmElMjBvJTIwbW9kZWxvJTIwQkVSVCUyMG91JTIwRGlzdGlsQkVSVCklMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUpJTBBJTBBJTIzJTIwVHJ1bmNhciVDMyVBMSUyMGFzJTIwc2VxdSVDMyVBQW5jaWFzJTIwcXVlJTIwcyVDMyVBM28lMjBtYWlzJTIwbG9uZ2FzJTIwZG8lMjBxdWUlMjBvJTIwY29tcHJpbWVudG8lMjBtJUMzJUExeGltbyUyMGVzcGVjaWZpY2FkbyUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBtYXhfbGVuZ3RoJTNEOCUyQyUyMHRydW5jYXRpb24lM0RUcnVlKQ==",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># Irá preencher as sequências até o comprimento máximo do modelo</span>
<span class="hljs-comment"># (512 para o modelo BERT ou DistilBERT)</span>
model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Truncará as sequências que são mais longas do que o comprimento máximo especificado</span>
model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),_=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBSZXRvcm5hJTIwdGVuc29yZXMlMjBQeVRvcmNoJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEElMjMlMjBSZXRvcm5hJTIwdGVuc29yZXMlMjBUZW5zb3JGbG93JTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJ0ZiUyMiklMEElMEElMjMlMjBSZXRvcm5hJTIwTnVtUHklMjBhcnJheXMlMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMm5wJTIyKQ==",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># Retorna tensores PyTorch</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-comment"># Retorna tensores TensorFlow</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
<span class="hljs-comment"># Retorna NumPy arrays</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;np&quot;</span>)`,wrap:!1}}),S=new ve({props:{title:"Tokens especiais",local:"tokens-especiais",headingTag:"h2"}}),R=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)
<span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>])
tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
<span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),Y=new j({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>]
[<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),F=new j({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>]))
<span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),H=new j({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">&quot;[CLS] i&#x27;ve been waiting for a huggingface course my whole life. [SEP]&quot;</span>
<span class="hljs-string">&quot;i&#x27;ve been waiting for a huggingface course my whole life.&quot;</span>`,wrap:!1}}),A=new ve({props:{title:"Do tokenizer ao modelo",local:"do-tokenizer-ao-modelo",headingTag:"h2"}});const De=[Ms,us],O=[];function Ae(e,s){return e[0]==="pt"?0:1}return I=Ae(w),U=O[I]=De[I](w),L=new rs({props:{source:"https://github.com/huggingface/course/blob/main/chapters/pt/chapter2/6.mdx"}}),{c(){n=b("meta"),p=o(),t=b("p"),y=o(),m(Z.$$.fragment),te=o(),m(V.$$.fragment),ae=o(),h.c(),ee=o(),k=b("p"),k.textContent=qe,ne=o(),g=b("p"),g.innerHTML=Ge,oe=o(),m($.$$.fragment),ie=o(),W=b("p"),W.innerHTML=Xe,pe=o(),B=b("p"),B.textContent=Ee,re=o(),m(z.$$.fragment),ce=o(),v=b("p"),v.textContent=Ne,me=o(),m(q.$$.fragment),ue=o(),G=b("p"),G.textContent=xe,Me=o(),m(X.$$.fragment),de=o(),E=b("p"),E.textContent=_e,be=o(),m(N.$$.fragment),Je=o(),x=b("p"),x.innerHTML=Se,ye=o(),m(_.$$.fragment),Te=o(),m(S.$$.fragment),je=o(),Q=b("p"),Q.textContent=Qe,we=o(),m(R.$$.fragment),fe=o(),m(Y.$$.fragment),he=o(),C=b("p"),C.textContent=Re,Ie=o(),m(F.$$.fragment),Ue=o(),m(H.$$.fragment),Ze=o(),D=b("p"),D.innerHTML=Ye,Ve=o(),m(A.$$.fragment),ke=o(),P=b("p"),P.innerHTML=Ce,ge=o(),U.c(),se=o(),m(L.$$.fragment),$e=o(),le=b("p"),this.h()},l(e){const s=os("svelte-u9bgzb",document.head);n=J(s,"META",{name:!0,content:!0}),s.forEach(l),p=i(e),t=J(e,"P",{}),Le(t).forEach(l),y=i(e),u(Z.$$.fragment,e),te=i(e),u(V.$$.fragment,e),ae=i(e),h.l(e),ee=i(e),k=J(e,"P",{"data-svelte-h":!0}),T(k)!=="svelte-xmqek0"&&(k.textContent=qe),ne=i(e),g=J(e,"P",{"data-svelte-h":!0}),T(g)!=="svelte-562dly"&&(g.innerHTML=Ge),oe=i(e),u($.$$.fragment,e),ie=i(e),W=J(e,"P",{"data-svelte-h":!0}),T(W)!=="svelte-2razkz"&&(W.innerHTML=Xe),pe=i(e),B=J(e,"P",{"data-svelte-h":!0}),T(B)!=="svelte-cyywnn"&&(B.textContent=Ee),re=i(e),u(z.$$.fragment,e),ce=i(e),v=J(e,"P",{"data-svelte-h":!0}),T(v)!=="svelte-19p2zjb"&&(v.textContent=Ne),me=i(e),u(q.$$.fragment,e),ue=i(e),G=J(e,"P",{"data-svelte-h":!0}),T(G)!=="svelte-11z2iie"&&(G.textContent=xe),Me=i(e),u(X.$$.fragment,e),de=i(e),E=J(e,"P",{"data-svelte-h":!0}),T(E)!=="svelte-1ipipqt"&&(E.textContent=_e),be=i(e),u(N.$$.fragment,e),Je=i(e),x=J(e,"P",{"data-svelte-h":!0}),T(x)!=="svelte-1ycw8i5"&&(x.innerHTML=Se),ye=i(e),u(_.$$.fragment,e),Te=i(e),u(S.$$.fragment,e),je=i(e),Q=J(e,"P",{"data-svelte-h":!0}),T(Q)!=="svelte-vd6zjh"&&(Q.textContent=Qe),we=i(e),u(R.$$.fragment,e),fe=i(e),u(Y.$$.fragment,e),he=i(e),C=J(e,"P",{"data-svelte-h":!0}),T(C)!=="svelte-bf4s2h"&&(C.textContent=Re),Ie=i(e),u(F.$$.fragment,e),Ue=i(e),u(H.$$.fragment,e),Ze=i(e),D=J(e,"P",{"data-svelte-h":!0}),T(D)!=="svelte-n5lvwy"&&(D.innerHTML=Ye),Ve=i(e),u(A.$$.fragment,e),ke=i(e),P=J(e,"P",{"data-svelte-h":!0}),T(P)!=="svelte-1yrr9no"&&(P.innerHTML=Ce),ge=i(e),U.l(e),se=i(e),u(L.$$.fragment,e),$e=i(e),le=J(e,"P",{}),Le(le).forEach(l),this.h()},h(){Ke(n,"name","hf:doc:metadata"),Ke(n,"content",bs)},m(e,s){is(document.head,n),a(e,p,s),a(e,t,s),a(e,y,s),M(Z,e,s),a(e,te,s),M(V,e,s),a(e,ae,s),K[f].m(e,s),a(e,ee,s),a(e,k,s),a(e,ne,s),a(e,g,s),a(e,oe,s),M($,e,s),a(e,ie,s),a(e,W,s),a(e,pe,s),a(e,B,s),a(e,re,s),M(z,e,s),a(e,ce,s),a(e,v,s),a(e,me,s),M(q,e,s),a(e,ue,s),a(e,G,s),a(e,Me,s),M(X,e,s),a(e,de,s),a(e,E,s),a(e,be,s),M(N,e,s),a(e,Je,s),a(e,x,s),a(e,ye,s),M(_,e,s),a(e,Te,s),M(S,e,s),a(e,je,s),a(e,Q,s),a(e,we,s),M(R,e,s),a(e,fe,s),M(Y,e,s),a(e,he,s),a(e,C,s),a(e,Ie,s),M(F,e,s),a(e,Ue,s),M(H,e,s),a(e,Ze,s),a(e,D,s),a(e,Ve,s),M(A,e,s),a(e,ke,s),a(e,P,s),a(e,ge,s),O[I].m(e,s),a(e,se,s),M(L,e,s),a(e,$e,s),a(e,le,s),We=!0},p(e,[s]){const Pe={};s&1&&(Pe.fw=e[0]),Z.$set(Pe);let Be=f;f=He(e),f!==Be&&(es(),r(K[Be],1,1,()=>{K[Be]=null}),Oe(),h=K[f],h||(h=K[f]=Fe[f](e),h.c()),c(h,1),h.m(ee.parentNode,ee));let ze=I;I=Ae(e),I!==ze&&(es(),r(O[ze],1,1,()=>{O[ze]=null}),Oe(),U=O[I],U||(U=O[I]=De[I](e),U.c()),c(U,1),U.m(se.parentNode,se))},i(e){We||(c(Z.$$.fragment,e),c(V.$$.fragment,e),c(h),c($.$$.fragment,e),c(z.$$.fragment,e),c(q.$$.fragment,e),c(X.$$.fragment,e),c(N.$$.fragment,e),c(_.$$.fragment,e),c(S.$$.fragment,e),c(R.$$.fragment,e),c(Y.$$.fragment,e),c(F.$$.fragment,e),c(H.$$.fragment,e),c(A.$$.fragment,e),c(U),c(L.$$.fragment,e),We=!0)},o(e){r(Z.$$.fragment,e),r(V.$$.fragment,e),r(h),r($.$$.fragment,e),r(z.$$.fragment,e),r(q.$$.fragment,e),r(X.$$.fragment,e),r(N.$$.fragment,e),r(_.$$.fragment,e),r(S.$$.fragment,e),r(R.$$.fragment,e),r(Y.$$.fragment,e),r(F.$$.fragment,e),r(H.$$.fragment,e),r(A.$$.fragment,e),r(U),r(L.$$.fragment,e),We=!1},d(e){e&&(l(p),l(t),l(y),l(te),l(ae),l(ee),l(k),l(ne),l(g),l(oe),l(ie),l(W),l(pe),l(B),l(re),l(ce),l(v),l(me),l(ue),l(G),l(Me),l(de),l(E),l(be),l(Je),l(x),l(ye),l(Te),l(je),l(Q),l(we),l(fe),l(he),l(C),l(Ie),l(Ue),l(Ze),l(D),l(Ve),l(ke),l(P),l(ge),l(se),l($e),l(le)),l(n),d(Z,e),d(V,e),K[f].d(e),d($,e),d(z,e),d(q,e),d(X,e),d(N,e),d(_,e),d(S,e),d(R,e),d(Y,e),d(F,e),d(H,e),d(A,e),O[I].d(e),d(L,e)}}}const bs='{"title":"Colocando tudo junto","local":"colocando-tudo-junto","sections":[{"title":"Tokens especiais","local":"tokens-especiais","sections":[],"depth":2},{"title":"Do tokenizer ao modelo","local":"do-tokenizer-ao-modelo","sections":[],"depth":2}],"depth":1}';function Js(w,n,p){let t="pt";return ts(()=>{const y=new URLSearchParams(window.location.search);p(0,t=y.get("fw")||"pt")}),[t]}class Is extends as{constructor(n){super(),ns(this,n,Js,ds,ls,{})}}export{Is as component};

Xet Storage Details

Size:
23.8 kB
·
Xet hash:
f0c04d7bb96582fd20e63ee4e3ebd436b8af064b9377f145bd4047ca49da80bc

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.