Buckets:

rtrm's picture
download
raw
24 kB
import{s as tl,o as sl}from"../chunks/scheduler.37c15a92.js";import{S as nl,i as al,g as d,s as i,r as o,A as il,h as J,f as t,c as p,j as Le,u as m,x as T,k as Ke,y as pl,a as n,v as M,t as r,b as Oe,d as c,w as b,p as el}from"../chunks/index.2bf4358c.js";import{C as j}from"../chunks/CodeBlock.4e987730.js";import{C as ll}from"../chunks/CourseFloatingBanner.6add7356.js";import{F as ul}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Be,E as rl}from"../chunks/getInferenceSnippets.ebf8be91.js";function cl(w){let a,u;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"}]}}),{c(){o(a.$$.fragment)},l(s){m(a.$$.fragment,s)},m(s,y){M(a,s,y),u=!0},i(s){u||(c(a.$$.fragment,s),u=!0)},o(s){r(a.$$.fragment,s),u=!1},d(s){b(a,s)}}}function ol(w){let a,u;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"}]}}),{c(){o(a.$$.fragment)},l(s){m(a.$$.fragment,s)},m(s,y){M(a,s,y),u=!0},i(s){u||(c(a.$$.fragment,s),u=!0)},o(s){r(a.$$.fragment,s),u=!1},d(s){b(a,s)}}}function ml(w){let a,u;return a=new j({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){o(a.$$.fragment)},l(s){m(a.$$.fragment,s)},m(s,y){M(a,s,y),u=!0},i(s){u||(c(a.$$.fragment,s),u=!0)},o(s){r(a.$$.fragment,s),u=!1},d(s){b(a,s)}}}function Ml(w){let a,u;return a=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){o(a.$$.fragment)},l(s){m(a.$$.fragment,s)},m(s,y){M(a,s,y),u=!0},i(s){u||(c(a.$$.fragment,s),u=!0)},o(s){r(a.$$.fragment,s),u=!1},d(s){b(a,s)}}}function bl(w){let a,u,s,y,Z,se,g,ne,f,h,ee,V,Se="În ultimele secțiuni, ne-am străduit să facem cea mai mare parte a muncii manual. Am explorat modul în care funcționează tokenizatoarele și am analizat tokenizarea, conversia în ID-uri de intrare, padding, trunchiere și măști de atenție.",ae,k,Ne="Cu toate acestea, după cum am văzut în secțiunea 2, API-ul 🤗 Transformers poate gestiona toate acestea pentru noi cu o funcție de nivel înalt în care ne vom adânci aici. Atunci când apelați <code>tokenizer</code> direct pe propoziție, primiți înapoi intrări care sunt gata să treacă prin modelul dvs:",ie,$,pe,W,Ge="Aici, variabila <code>model_inputs</code> conține tot ceea ce este necesar pentru ca un model să funcționeze bine. Pentru DistilBERT, aceasta include ID-urile de intrare, precum și masca de atenție. Alte modele care acceptă intrări suplimentare le vor avea, de asemenea, la ieșire prin obiectul <code>tokenizer</code>.",ue,z,Ee="După cum vom vedea în câteva exemple de mai jos, această metodă este foarte puternică. În primul rând, poate tokeniza o singură secvență:",re,v,ce,B,_e="De asemenea, gestionează mai multe secvențe simultan, fără nicio modificare a API-ului:",oe,S,me,N,Qe="Poate aplica padding în funcție de mai multe obiective:",Me,G,be,E,xe="De asemenea, poate trunchia secvențele:",de,_,Je,Q,Xe="Obiectul <code>tokenizer</code> poate gestiona conversia în tensori specifici framework-ului, care pot fi apoi trimiși direct la model. De exemplu, în următorul exemplu de cod, solicităm tokenizatorului să returneze tensori din diferite framework-uri - <code>„pt”</code> returnează tensori PyTorch, <code>„tf”</code> returnează tensori TensorFlow, iar <code>„np”</code> returnează matrici NumPy:",ye,x,Te,X,je,Y,Ye="Dacă aruncăm o privire la ID-urile de intrare returnate de tokenizer, vom vedea că sunt puțin diferite de cele pe care le-am avut mai devreme:",we,R,fe,C,he,H,Re="Un token ID a fost adăugat la început, iar unul la sfârșit. Să decodificăm cele două secvențe de ID-uri de mai sus pentru a vedea despre ce este vorba:",Ue,q,Ie,F,Ze,D,Ce="Tokenizatorul a adăugat cuvântul special <code>[CLS]</code> la început și cuvântul special <code>[SEP]</code> la sfârșit. Acest lucru se datorează faptului că modelul a fost preantrenat cu aceste cuvinte, deci pentru a obține aceleași rezultate pentru inferență trebuie să le adăugăm și pe acestea. Rețineți că unele modele nu adaugă cuvinte speciale sau adaugă cuvinte diferite; de asemenea, modelele pot adăuga aceste cuvinte speciale doar la început sau doar la sfârșit. În orice caz, tokenizatorul știe care sunt cele așteptate și se va ocupa de acest lucru pentru dumneavoastră.",ge,A,Ve,P,He="Acum că am văzut toți pașii individuali pe care îi utilizează obiectul <code>tokenizer</code> atunci când este aplicat pe texte, să vedem o ultimă dată cum poate gestiona secvențe multiple (padding!), secvențe foarte lungi (trunchiere!) și mai multe tipuri de tensori cu API-ul său principal:",ke,U,I,le,L,$e,te,We;Z=new ul({props:{fw:w[0]}}),g=new Be({props:{title:"Să punem totul cap la cap",local:"să-punem-totul-cap-la-cap",headingTag:"h1"}});const qe=[ol,cl],K=[];function Fe(e,l){return e[0]==="pt"?0:1}f=Fe(w),h=K[f]=qe[f](w),$=new j({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),v=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),S=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
model_inputs = tokenizer(sequences)`,wrap:!1}}),G=new j({props:{code:"JTIzJTIwVmElMjB1bXBsZSUyMHNlY3ZlbiVDOCU5QmVsZSUyMHAlQzMlQTJuJUM0JTgzJTIwbGElMjBsdW5naW1lYSUyMG1heGltJUM0JTgzJTIwYSUyMHNlY3ZlbiVDOCU5QmVpJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0QlMjJsb25nZXN0JTIyKSUwQSUwQSUyMyUyMFZhJTIwdW1wbGUlMjBzZWN2ZW4lQzglOUJlbGUlMjBwJUMzJUEybiVDNCU4MyUyMGxhJTIwbHVuZ2ltZWElMjBtYXhpbSVDNCU4MyUyMGElMjBtb2RlbHVsdWklMEElMjMlMjAoNTEyJTIwcGVudHJ1JTIwQkVSVCUyMHNhdSUyMERpc3RpbEJFUlQpJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0QlMjJtYXhfbGVuZ3RoJTIyKSUwQSUwQSUyMyUyMFZhJTIwdW1wbGUlMjBzZWN2ZW4lQzglOUJlbGUlMjBwJUMzJUEybiVDNCU4MyUyMGxhJTIwbHVuZ2ltZWElMjBtYXhpbSVDNCU4MyUyMHNwZWNpZmljYXQlQzQlODMlMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRCUyMm1heF9sZW5ndGglMjIlMkMlMjBtYXhfbGVuZ3RoJTNEOCk=",highlighted:`<span class="hljs-comment"># Va umple secvențele până la lungimea maximă a secvenței</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;longest&quot;</span>)
<span class="hljs-comment"># Va umple secvențele până la lungimea maximă a modelului</span>
<span class="hljs-comment"># (512 pentru BERT sau DistilBERT)</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>)
<span class="hljs-comment"># Va umple secvențele până la lungimea maximă specificată</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),_=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBWYSUyMHRydW5jaGlhJTIwc2VjdmVuJUM4JTlCZWxlJTIwY2FyZSUyMHN1bnQlMjBtYWklMjBsdW5naSUyMGRlYyVDMyVBMnQlMjBsdW5naW1lYSUyMG1heGltJUM0JTgzJTIwYSUyMG1vZGVsdWx1aSUwQSUyMyUyMCg1MTIlMjBwZW50cnUlMjBCRVJUJTIwc2F1JTIwRGlzdGlsQkVSVCklMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUpJTBBJTBBJTIzJTIwVmElMjB0cnVuY2hpYSUyMHNlY3ZlbiVDOCU5QmVsZSUyMGNhcmUlMjBzdW50JTIwbWFpJTIwbHVuZ2klMjBkZWMlQzMlQTJ0JTIwbHVuZ2ltZWElMjBtYXhpbSVDNCU4MyUyMHNwZWNpZmljYXQlQzQlODMlMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwbWF4X2xlbmd0aCUzRDglMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSk=",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># Va trunchia secvențele care sunt mai lungi decât lungimea maximă a modelului</span>
<span class="hljs-comment"># (512 pentru BERT sau DistilBERT)</span>
model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Va trunchia secvențele care sunt mai lungi decât lungimea maximă specificată</span>
model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),x=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBSZXR1cm5lYXolQzQlODMlMjB0ZW5zb3JpJTIwUHlUb3JjaCUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwUmV0dXJuZWF6JUM0JTgzJTIwdGVuc29yaSUyMFRlbnNvckZsb3clMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQSUwQSUyMyUyMFJldHVybmVheiVDNCU4MyUyMGFycmF5LXVyaSUyME51bVB5JTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJucCUyMik=",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># Returnează tensori PyTorch</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-comment"># Returnează tensori TensorFlow</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
<span class="hljs-comment"># Returnează array-uri NumPy</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;np&quot;</span>)`,wrap:!1}}),X=new Be({props:{title:"Token-uri speciale",local:"token-uri-speciale",headingTag:"h2"}}),R=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)
<span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>])
tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
<span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),C=new j({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>]
[<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),q=new j({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>]))
<span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),F=new j({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">&quot;[CLS] i&#x27;ve been waiting for a huggingface course my whole life. [SEP]&quot;</span>
<span class="hljs-string">&quot;i&#x27;ve been waiting for a huggingface course my whole life.&quot;</span>`,wrap:!1}}),A=new Be({props:{title:"Încheiere: De la tokenizer la model",local:"încheiere-de-la-tokenizator-la-model",headingTag:"h2"}});const De=[Ml,ml],O=[];function Ae(e,l){return e[0]==="pt"?0:1}return U=Ae(w),I=O[U]=De[U](w),L=new rl({props:{source:"https://github.com/huggingface/course/blob/main/chapters/rum/chapter2/6.mdx"}}),{c(){a=d("meta"),u=i(),s=d("p"),y=i(),o(Z.$$.fragment),se=i(),o(g.$$.fragment),ne=i(),h.c(),ee=i(),V=d("p"),V.textContent=Se,ae=i(),k=d("p"),k.innerHTML=Ne,ie=i(),o($.$$.fragment),pe=i(),W=d("p"),W.innerHTML=Ge,ue=i(),z=d("p"),z.textContent=Ee,re=i(),o(v.$$.fragment),ce=i(),B=d("p"),B.textContent=_e,oe=i(),o(S.$$.fragment),me=i(),N=d("p"),N.textContent=Qe,Me=i(),o(G.$$.fragment),be=i(),E=d("p"),E.textContent=xe,de=i(),o(_.$$.fragment),Je=i(),Q=d("p"),Q.innerHTML=Xe,ye=i(),o(x.$$.fragment),Te=i(),o(X.$$.fragment),je=i(),Y=d("p"),Y.textContent=Ye,we=i(),o(R.$$.fragment),fe=i(),o(C.$$.fragment),he=i(),H=d("p"),H.textContent=Re,Ue=i(),o(q.$$.fragment),Ie=i(),o(F.$$.fragment),Ze=i(),D=d("p"),D.innerHTML=Ce,ge=i(),o(A.$$.fragment),Ve=i(),P=d("p"),P.innerHTML=He,ke=i(),I.c(),le=i(),o(L.$$.fragment),$e=i(),te=d("p"),this.h()},l(e){const l=il("svelte-u9bgzb",document.head);a=J(l,"META",{name:!0,content:!0}),l.forEach(t),u=p(e),s=J(e,"P",{}),Le(s).forEach(t),y=p(e),m(Z.$$.fragment,e),se=p(e),m(g.$$.fragment,e),ne=p(e),h.l(e),ee=p(e),V=J(e,"P",{"data-svelte-h":!0}),T(V)!=="svelte-debj4s"&&(V.textContent=Se),ae=p(e),k=J(e,"P",{"data-svelte-h":!0}),T(k)!=="svelte-1hco84t"&&(k.innerHTML=Ne),ie=p(e),m($.$$.fragment,e),pe=p(e),W=J(e,"P",{"data-svelte-h":!0}),T(W)!=="svelte-5ylyo3"&&(W.innerHTML=Ge),ue=p(e),z=J(e,"P",{"data-svelte-h":!0}),T(z)!=="svelte-xxf8ed"&&(z.textContent=Ee),re=p(e),m(v.$$.fragment,e),ce=p(e),B=J(e,"P",{"data-svelte-h":!0}),T(B)!=="svelte-19jvf1t"&&(B.textContent=_e),oe=p(e),m(S.$$.fragment,e),me=p(e),N=J(e,"P",{"data-svelte-h":!0}),T(N)!=="svelte-12j5yca"&&(N.textContent=Qe),Me=p(e),m(G.$$.fragment,e),be=p(e),E=J(e,"P",{"data-svelte-h":!0}),T(E)!=="svelte-k2u6eu"&&(E.textContent=xe),de=p(e),m(_.$$.fragment,e),Je=p(e),Q=J(e,"P",{"data-svelte-h":!0}),T(Q)!=="svelte-1hpqg7u"&&(Q.innerHTML=Xe),ye=p(e),m(x.$$.fragment,e),Te=p(e),m(X.$$.fragment,e),je=p(e),Y=J(e,"P",{"data-svelte-h":!0}),T(Y)!=="svelte-ydjxmx"&&(Y.textContent=Ye),we=p(e),m(R.$$.fragment,e),fe=p(e),m(C.$$.fragment,e),he=p(e),H=J(e,"P",{"data-svelte-h":!0}),T(H)!=="svelte-i2lejn"&&(H.textContent=Re),Ue=p(e),m(q.$$.fragment,e),Ie=p(e),m(F.$$.fragment,e),Ze=p(e),D=J(e,"P",{"data-svelte-h":!0}),T(D)!=="svelte-1wvuh16"&&(D.innerHTML=Ce),ge=p(e),m(A.$$.fragment,e),Ve=p(e),P=J(e,"P",{"data-svelte-h":!0}),T(P)!=="svelte-pyekyx"&&(P.innerHTML=He),ke=p(e),I.l(e),le=p(e),m(L.$$.fragment,e),$e=p(e),te=J(e,"P",{}),Le(te).forEach(t),this.h()},h(){Ke(a,"name","hf:doc:metadata"),Ke(a,"content",dl)},m(e,l){pl(document.head,a),n(e,u,l),n(e,s,l),n(e,y,l),M(Z,e,l),n(e,se,l),M(g,e,l),n(e,ne,l),K[f].m(e,l),n(e,ee,l),n(e,V,l),n(e,ae,l),n(e,k,l),n(e,ie,l),M($,e,l),n(e,pe,l),n(e,W,l),n(e,ue,l),n(e,z,l),n(e,re,l),M(v,e,l),n(e,ce,l),n(e,B,l),n(e,oe,l),M(S,e,l),n(e,me,l),n(e,N,l),n(e,Me,l),M(G,e,l),n(e,be,l),n(e,E,l),n(e,de,l),M(_,e,l),n(e,Je,l),n(e,Q,l),n(e,ye,l),M(x,e,l),n(e,Te,l),M(X,e,l),n(e,je,l),n(e,Y,l),n(e,we,l),M(R,e,l),n(e,fe,l),M(C,e,l),n(e,he,l),n(e,H,l),n(e,Ue,l),M(q,e,l),n(e,Ie,l),M(F,e,l),n(e,Ze,l),n(e,D,l),n(e,ge,l),M(A,e,l),n(e,Ve,l),n(e,P,l),n(e,ke,l),O[U].m(e,l),n(e,le,l),M(L,e,l),n(e,$e,l),n(e,te,l),We=!0},p(e,[l]){const Pe={};l&1&&(Pe.fw=e[0]),Z.$set(Pe);let ze=f;f=Fe(e),f!==ze&&(el(),r(K[ze],1,1,()=>{K[ze]=null}),Oe(),h=K[f],h||(h=K[f]=qe[f](e),h.c()),c(h,1),h.m(ee.parentNode,ee));let ve=U;U=Ae(e),U!==ve&&(el(),r(O[ve],1,1,()=>{O[ve]=null}),Oe(),I=O[U],I||(I=O[U]=De[U](e),I.c()),c(I,1),I.m(le.parentNode,le))},i(e){We||(c(Z.$$.fragment,e),c(g.$$.fragment,e),c(h),c($.$$.fragment,e),c(v.$$.fragment,e),c(S.$$.fragment,e),c(G.$$.fragment,e),c(_.$$.fragment,e),c(x.$$.fragment,e),c(X.$$.fragment,e),c(R.$$.fragment,e),c(C.$$.fragment,e),c(q.$$.fragment,e),c(F.$$.fragment,e),c(A.$$.fragment,e),c(I),c(L.$$.fragment,e),We=!0)},o(e){r(Z.$$.fragment,e),r(g.$$.fragment,e),r(h),r($.$$.fragment,e),r(v.$$.fragment,e),r(S.$$.fragment,e),r(G.$$.fragment,e),r(_.$$.fragment,e),r(x.$$.fragment,e),r(X.$$.fragment,e),r(R.$$.fragment,e),r(C.$$.fragment,e),r(q.$$.fragment,e),r(F.$$.fragment,e),r(A.$$.fragment,e),r(I),r(L.$$.fragment,e),We=!1},d(e){e&&(t(u),t(s),t(y),t(se),t(ne),t(ee),t(V),t(ae),t(k),t(ie),t(pe),t(W),t(ue),t(z),t(re),t(ce),t(B),t(oe),t(me),t(N),t(Me),t(be),t(E),t(de),t(Je),t(Q),t(ye),t(Te),t(je),t(Y),t(we),t(fe),t(he),t(H),t(Ue),t(Ie),t(Ze),t(D),t(ge),t(Ve),t(P),t(ke),t(le),t($e),t(te)),t(a),b(Z,e),b(g,e),K[f].d(e),b($,e),b(v,e),b(S,e),b(G,e),b(_,e),b(x,e),b(X,e),b(R,e),b(C,e),b(q,e),b(F,e),b(A,e),O[U].d(e),b(L,e)}}}const dl='{"title":"Să punem totul cap la cap","local":"să-punem-totul-cap-la-cap","sections":[{"title":"Token-uri speciale","local":"token-uri-speciale","sections":[],"depth":2},{"title":"Încheiere: De la tokenizer la model","local":"încheiere-de-la-tokenizator-la-model","sections":[],"depth":2}],"depth":1}';function Jl(w,a,u){let s="pt";return sl(()=>{const y=new URLSearchParams(window.location.search);u(0,s=y.get("fw")||"pt")}),[s]}class Ul extends nl{constructor(a){super(),al(this,a,Jl,bl,tl,{})}}export{Ul as component};

Xet Storage Details

Size:
24 kB
·
Xet hash:
33e60107e7dea2c1dcec131994dd1e25900b499b8c01fe41f73ae62e1b2fbbcd

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.