Buckets:
| import{s as ni,f as ii,o as ri,n as Ee}from"../chunks/scheduler.37c15a92.js";import{S as Mi,i as pi,g as U,s as i,r as d,A as ci,h as T,f as t,c as r,j as Fs,u as m,x as j,k as he,y as ui,a,v as y,t as c,b as _e,d as p,w as J,p as ge}from"../chunks/index.2bf4358c.js";import{T as Ye}from"../chunks/Tip.363c041f.js";import{Y as zs}from"../chunks/Youtube.1e50a667.js";import{C as h}from"../chunks/CodeBlock.4e987730.js";import{C as si}from"../chunks/CourseFloatingBanner.6add7356.js";import{F as oi}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Se,E as di}from"../chunks/getInferenceSnippets.ebf8be91.js";function mi(I){let s,o;return s=new si({props:{chapter:7,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter7/section3_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter7/section3_tf.ipynb"}]}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function yi(I){let s,o;return s=new si({props:{chapter:7,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter7/section3_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter7/section3_pt.ipynb"}]}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function Ji(I){let s,o='🙋 Dacă termenii “masked language modeling” și “pretrained model” nu vă sună familiar, mergeți să verificați <a href="/course/chapter1">Capitolul 1</a>, unde vă explicăm toate aceste concepte de bază, cu videoclipuri!';return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-p8df98"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function Ui(I){let s,o="Să continuăm și să descărcăm modelul DistilBERT folosind clasa <code>TFAutoModelForMaskedLM</code>:",n,u,g,w,R="Putem vedea câți parametri are acest model prin apelarea metodei <code>summary()</code>:",z,_,v,k,B;return u=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRGQXV0b01vZGVsRm9yTWFza2VkTE0lMEElMEFtb2RlbF9jaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQlMjIlMEFtb2RlbCUyMCUzRCUyMFRGQXV0b01vZGVsRm9yTWFza2VkTE0uZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2NoZWNrcG9pbnQp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TFAutoModelForMaskedLM | |
| model_checkpoint = <span class="hljs-string">"distilbert-base-uncased"</span> | |
| model = TFAutoModelForMaskedLM.from_pretrained(model_checkpoint)`,wrap:!1}}),_=new h({props:{code:"bW9kZWwuc3VtbWFyeSgp",highlighted:"model.summary()",wrap:!1}}),k=new h({props:{code:"TW9kZWwlM0ElMjAlMjJ0Zl9kaXN0aWxfYmVydF9mb3JfbWFza2VkX2xtJTIyJTBBX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18lMEFMYXllciUyMCh0eXBlKSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyME91dHB1dCUyMFNoYXBlJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwUGFyYW0lMjAlMjMlMjAlMjAlMjAlMEElM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlM0QlMEFkaXN0aWxiZXJ0JTIwKFRGRGlzdGlsQmVydE1haW4lMjBtdWx0aXBsZSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMDY2MzYyODgwJTIwJTIwJTBBX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18lMEF2b2NhYl90cmFuc2Zvcm0lMjAoRGVuc2UpJTIwJTIwJTIwJTIwJTIwJTIwbXVsdGlwbGUlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjA1OTA1OTIlMjAlMjAlMjAlMjAlMEFfX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fXyUwQXZvY2FiX2xheWVyX25vcm0lMjAoTGF5ZXJOb3JtYSUyMG11bHRpcGxlJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwMTUzNiUyMCUyMCUyMCUyMCUyMCUyMCUwQV9fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fJTBBdm9jYWJfcHJvamVjdG9yJTIwKFRGRGlzdGlsQmVyJTIwbXVsdGlwbGUlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAyMzg2NjE3MCUyMCUyMCUwQSUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUzRCUwQVRvdGFsJTIwcGFyYW1zJTNBJTIwNjYlMkM5ODUlMkM1MzAlMEFUcmFpbmFibGUlMjBwYXJhbXMlM0ElMjA2NiUyQzk4NSUyQzUzMCUwQU5vbi10cmFpbmFibGUlMjBwYXJhbXMlM0ElMjAwJTBBX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18=",highlighted:`Model: <span class="hljs-string">"tf_distil_bert_for_masked_lm"</span> | |
| _________________________________________________________________ | |
| Layer (<span class="hljs-built_in">type</span>) Output Shape Param <span class="hljs-comment"># </span> | |
| ================================================================= | |
| distilbert (TFDistilBertMain multiple <span class="hljs-number">66362880</span> | |
| _________________________________________________________________ | |
| vocab_transform (Dense) multiple <span class="hljs-number">590592</span> | |
| _________________________________________________________________ | |
| vocab_layer_norm (LayerNorma multiple <span class="hljs-number">1536</span> | |
| _________________________________________________________________ | |
| vocab_projector (TFDistilBer multiple <span class="hljs-number">23866170</span> | |
| ================================================================= | |
| Total params: <span class="hljs-number">66</span>,<span class="hljs-number">985</span>,<span class="hljs-number">530</span> | |
| Trainable params: <span class="hljs-number">66</span>,<span class="hljs-number">985</span>,<span class="hljs-number">530</span> | |
| Non-trainable params: <span class="hljs-number">0</span> | |
| _________________________________________________________________`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment),g=i(),w=U("p"),w.innerHTML=R,z=i(),d(_.$$.fragment),v=i(),d(k.$$.fragment)},l(b){s=T(b,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1ec5el9"&&(s.innerHTML=o),n=r(b),m(u.$$.fragment,b),g=r(b),w=T(b,"P",{"data-svelte-h":!0}),j(w)!=="svelte-nv8dz4"&&(w.innerHTML=R),z=r(b),m(_.$$.fragment,b),v=r(b),m(k.$$.fragment,b)},m(b,G){a(b,s,G),a(b,n,G),y(u,b,G),a(b,g,G),a(b,w,G),a(b,z,G),y(_,b,G),a(b,v,G),y(k,b,G),B=!0},i(b){B||(p(u.$$.fragment,b),p(_.$$.fragment,b),p(k.$$.fragment,b),B=!0)},o(b){c(u.$$.fragment,b),c(_.$$.fragment,b),c(k.$$.fragment,b),B=!1},d(b){b&&(t(s),t(n),t(g),t(w),t(z),t(v)),J(u,b),J(_,b),J(k,b)}}}function Ti(I){let s,o="Să continuăm și să descărcăm modelul DistilBERT folosind clasa <code>AutoModelForMaskedLM</code>:",n,u,g,w,R="Putem vedea câți parametri are acest model prin apelarea metodei <code>num_parameters()</code>:",z,_,v,k,B;return u=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvck1hc2tlZExNJTBBJTBBbW9kZWxfY2hlY2twb2ludCUyMCUzRCUyMCUyMmRpc3RpbGJlcnQtYmFzZS11bmNhc2VkJTIyJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JNYXNrZWRMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfY2hlY2twb2ludCk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForMaskedLM | |
| model_checkpoint = <span class="hljs-string">"distilbert-base-uncased"</span> | |
| model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)`,wrap:!1}}),_=new h({props:{code:"ZGlzdGlsYmVydF9udW1fcGFyYW1ldGVycyUyMCUzRCUyMG1vZGVsLm51bV9wYXJhbWV0ZXJzKCklMjAlMkYlMjAxXzAwMF8wMDAlMEFwcmludChmJTIyJyUzRSUzRSUzRSUyMERpc3RpbEJFUlQlMjBudW1iZXIlMjBvZiUyMHBhcmFtZXRlcnMlM0ElMjAlN0Jyb3VuZChkaXN0aWxiZXJ0X251bV9wYXJhbWV0ZXJzKSU3RE0nJTIyKSUwQXByaW50KGYlMjInJTNFJTNFJTNFJTIwQkVSVCUyMG51bWJlciUyMG9mJTIwcGFyYW1ldGVycyUzQSUyMDExME0nJTIyKQ==",highlighted:`distilbert_num_parameters = model.num_parameters() / <span class="hljs-number">1_000_000</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> DistilBERT number of parameters: <span class="hljs-subst">{<span class="hljs-built_in">round</span>(distilbert_num_parameters)}</span>M'"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> BERT number of parameters: 110M'"</span>)`,wrap:!1}}),k=new h({props:{code:"JyUzRSUzRSUzRSUyMERpc3RpbEJFUlQlMjBudW1iZXIlMjBvZiUyMHBhcmFtZXRlcnMlM0ElMjA2N00nJTBBJyUzRSUzRSUzRSUyMEJFUlQlMjBudW1iZXIlMjBvZiUyMHBhcmFtZXRlcnMlM0ElMjAxMTBNJw==",highlighted:`<span class="hljs-string">'>>> DistilBERT number of parameters: 67M'</span> | |
| <span class="hljs-string">'>>> BERT number of parameters: 110M'</span>`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment),g=i(),w=U("p"),w.innerHTML=R,z=i(),d(_.$$.fragment),v=i(),d(k.$$.fragment)},l(b){s=T(b,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1o905j3"&&(s.innerHTML=o),n=r(b),m(u.$$.fragment,b),g=r(b),w=T(b,"P",{"data-svelte-h":!0}),j(w)!=="svelte-1wdkcab"&&(w.innerHTML=R),z=r(b),m(_.$$.fragment,b),v=r(b),m(k.$$.fragment,b)},m(b,G){a(b,s,G),a(b,n,G),y(u,b,G),a(b,g,G),a(b,w,G),a(b,z,G),y(_,b,G),a(b,v,G),y(k,b,G),B=!0},i(b){B||(p(u.$$.fragment,b),p(_.$$.fragment,b),p(k.$$.fragment,b),B=!0)},o(b){c(u.$$.fragment,b),c(_.$$.fragment,b),c(k.$$.fragment,b),B=!1},d(b){b&&(t(s),t(n),t(g),t(w),t(z),t(v)),J(u,b),J(_,b),J(k,b)}}}function wi(I){let s,o;return s=new h({props:{code:"aW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBaW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIodGV4dCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIybnAlMjIpJTBBdG9rZW5fbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUyMyUyMEZpbmQlMjB0aGUlMjBsb2NhdGlvbiUyMG9mJTIwJTVCTUFTSyU1RCUyMGFuZCUyMGV4dHJhY3QlMjBpdHMlMjBsb2dpdHMlMEFtYXNrX3Rva2VuX2luZGV4JTIwJTNEJTIwbnAuYXJnd2hlcmUoaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTIwJTNEJTNEJTIwdG9rZW5pemVyLm1hc2tfdG9rZW5faWQpJTVCMCUyQyUyMDElNUQlMEFtYXNrX3Rva2VuX2xvZ2l0cyUyMCUzRCUyMHRva2VuX2xvZ2l0cyU1QjAlMkMlMjBtYXNrX3Rva2VuX2luZGV4JTJDJTIwJTNBJTVEJTBBJTIzJTIwUGljayUyMHRoZSUyMCU1Qk1BU0slNUQlMjBjYW5kaWRhdGVzJTIwd2l0aCUyMHRoZSUyMGhpZ2hlc3QlMjBsb2dpdHMlMEElMjMlMjBXZSUyMG5lZ2F0ZSUyMHRoZSUyMGFycmF5JTIwYmVmb3JlJTIwYXJnc29ydCUyMHRvJTIwZ2V0JTIwdGhlJTIwbGFyZ2VzdCUyQyUyMG5vdCUyMHRoZSUyMHNtYWxsZXN0JTJDJTIwbG9naXRzJTBBdG9wXzVfdG9rZW5zJTIwJTNEJTIwbnAuYXJnc29ydCgtbWFza190b2tlbl9sb2dpdHMpJTVCJTNBNSU1RC50b2xpc3QoKSUwQSUwQWZvciUyMHRva2VuJTIwaW4lMjB0b3BfNV90b2tlbnMlM0ElMEElMjAlMjAlMjAlMjBwcmludChmJTIyJTNFJTNFJTNFJTIwJTdCdGV4dC5yZXBsYWNlKHRva2VuaXplci5tYXNrX3Rva2VuJTJDJTIwdG9rZW5pemVyLmRlY29kZSglNUJ0b2tlbiU1RCkpJTdEJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| inputs = tokenizer(text, return_tensors=<span class="hljs-string">"np"</span>) | |
| token_logits = model(**inputs).logits | |
| <span class="hljs-comment"># Find the location of [MASK] and extract its logits</span> | |
| mask_token_index = np.argwhere(inputs[<span class="hljs-string">"input_ids"</span>] == tokenizer.mask_token_id)[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>] | |
| mask_token_logits = token_logits[<span class="hljs-number">0</span>, mask_token_index, :] | |
| <span class="hljs-comment"># Pick the [MASK] candidates with the highest logits</span> | |
| <span class="hljs-comment"># We negate the array before argsort to get the largest, not the smallest, logits</span> | |
| top_5_tokens = np.argsort(-mask_token_logits)[:<span class="hljs-number">5</span>].tolist() | |
| <span class="hljs-keyword">for</span> token <span class="hljs-keyword">in</span> top_5_tokens: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f">>> <span class="hljs-subst">{text.replace(tokenizer.mask_token, tokenizer.decode([token]))}</span>"</span>)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function ji(I){let s,o;return s=new h({props:{code:"aW1wb3J0JTIwdG9yY2glMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIodGV4dCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBdG9rZW5fbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUyMyUyMEZpbmQlMjB0aGUlMjBsb2NhdGlvbiUyMG9mJTIwJTVCTUFTSyU1RCUyMGFuZCUyMGV4dHJhY3QlMjBpdHMlMjBsb2dpdHMlMEFtYXNrX3Rva2VuX2luZGV4JTIwJTNEJTIwdG9yY2gud2hlcmUoaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTIwJTNEJTNEJTIwdG9rZW5pemVyLm1hc2tfdG9rZW5faWQpJTVCMSU1RCUwQW1hc2tfdG9rZW5fbG9naXRzJTIwJTNEJTIwdG9rZW5fbG9naXRzJTVCMCUyQyUyMG1hc2tfdG9rZW5faW5kZXglMkMlMjAlM0ElNUQlMEElMjMlMjBQaWNrJTIwdGhlJTIwJTVCTUFTSyU1RCUyMGNhbmRpZGF0ZXMlMjB3aXRoJTIwdGhlJTIwaGlnaGVzdCUyMGxvZ2l0cyUwQXRvcF81X3Rva2VucyUyMCUzRCUyMHRvcmNoLnRvcGsobWFza190b2tlbl9sb2dpdHMlMkMlMjA1JTJDJTIwZGltJTNEMSkuaW5kaWNlcyU1QjAlNUQudG9saXN0KCklMEElMEFmb3IlMjB0b2tlbiUyMGluJTIwdG9wXzVfdG9rZW5zJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiclM0UlM0UlM0UlMjAlN0J0ZXh0LnJlcGxhY2UodG9rZW5pemVyLm1hc2tfdG9rZW4lMkMlMjB0b2tlbml6ZXIuZGVjb2RlKCU1QnRva2VuJTVEKSklN0QnJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| inputs = tokenizer(text, return_tensors=<span class="hljs-string">"pt"</span>) | |
| token_logits = model(**inputs).logits | |
| <span class="hljs-comment"># Find the location of [MASK] and extract its logits</span> | |
| mask_token_index = torch.where(inputs[<span class="hljs-string">"input_ids"</span>] == tokenizer.mask_token_id)[<span class="hljs-number">1</span>] | |
| mask_token_logits = token_logits[<span class="hljs-number">0</span>, mask_token_index, :] | |
| <span class="hljs-comment"># Pick the [MASK] candidates with the highest logits</span> | |
| top_5_tokens = torch.topk(mask_token_logits, <span class="hljs-number">5</span>, dim=<span class="hljs-number">1</span>).indices[<span class="hljs-number">0</span>].tolist() | |
| <span class="hljs-keyword">for</span> token <span class="hljs-keyword">in</span> top_5_tokens: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> <span class="hljs-subst">{text.replace(tokenizer.mask_token, tokenizer.decode([token]))}</span>'"</span>)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function bi(I){let s,o="✏️ <strong>Încearcă!</strong> Creați un sample aleatoriu din segmentul <code>unsupervised</code> și verificați că labelurile nu sunt nici <code>0</code>, nici <code>1</code>. În același timp, ați putea verifica și dacă labelurile din segmentele <code>train</code> și <code>test</code> sunt într-adevăr <code>0</code> sau <code>1</code> - aceasta este o verificare utilă pe care orice practicant NLP ar trebui să o efectueze la începutul unui nou proiect!";return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1ecg9iy"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function hi(I){let s,o='✏️ <strong>Încearcă!</strong> Unele modele Transformer, precum <a href="https://huggingface.co/google/bigbird-roberta-base" rel="nofollow">BigBird</a> și <a href="hf.co/allenai/longformer-base-4096">Longformer</a>, au o lungime de context mult mai mare decât BERT și alte modele Transformer mai vechi. Inițializați tokenizerul pentru unul dintre aceste checkpointuri și verificați dacă <code>model_max_length</code> este în concordanță cu ceea ce este menționat pe model card.';return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1wypaju"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function fi(I){let s,o="Rețineți că utilizarea unei dimensiuni mici a chunkurilor poate fi dăunător în scenariile din lumea reală, astfel încât ar trebui să utilizați o dimensiune care corespunde cazului de utilizare la care veți aplica modelul.";return{c(){s=U("p"),s.textContent=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-tdcvyb"&&(s.textContent=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function Ii(I){let s,o="✏️ <strong>Încercați!</strong> Rulați fragmentul de cod de mai sus de mai multe ori pentru a vedea cum se întâmplă mascarea aleatorie în fața ochilor voștri! De asemenea, înlocuiți metoda <code>tokenizer.decode()</code> cu <code>tokenizer.convert_ids_to_tokens()</code> pentru a vedea că uneori un singur token dintr-un cuvânt dat este mascat, și nu celelalte.";return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-cf6kd0"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function li(I){let s,o="Un efect secundar al mascării aleatorii este faptul că metricile noastre de evaluare nu vor fi deterministe atunci când folosim <code>Trainer</code>, deoarece folosim același data collator pentru seturile de antrenare și testare. Vom vedea mai târziu, când ne vom uita la aplicarea fine-tuningului cu 🤗 Accelerate, cum putem folosi flexibilitatea unei bucle de evaluare personalizate pentru a îngheța caracterul aleatoriu.";return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-skx281"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},d(n){n&&t(s)}}}function Ci(I){let s,o;return s=new h({props:{code:"aW1wb3J0JTIwY29sbGVjdGlvbnMlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzLmRhdGEuZGF0YV9jb2xsYXRvciUyMGltcG9ydCUyMHRmX2RlZmF1bHRfZGF0YV9jb2xsYXRvciUwQSUwQXd3bV9wcm9iYWJpbGl0eSUyMCUzRCUyMDAuMiUwQSUwQSUwQWRlZiUyMHdob2xlX3dvcmRfbWFza2luZ19kYXRhX2NvbGxhdG9yKGZlYXR1cmVzKSUzQSUwQSUyMCUyMCUyMCUyMGZvciUyMGZlYXR1cmUlMjBpbiUyMGZlYXR1cmVzJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwd29yZF9pZHMlMjAlM0QlMjBmZWF0dXJlLnBvcCglMjJ3b3JkX2lkcyUyMiklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBDcmVhdGUlMjBhJTIwbWFwJTIwYmV0d2VlbiUyMHdvcmRzJTIwYW5kJTIwY29ycmVzcG9uZGluZyUyMHRva2VuJTIwaW5kaWNlcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1hcHBpbmclMjAlM0QlMjBjb2xsZWN0aW9ucy5kZWZhdWx0ZGljdChsaXN0KSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGN1cnJlbnRfd29yZF9pbmRleCUyMCUzRCUyMC0xJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY3VycmVudF93b3JkJTIwJTNEJTIwTm9uZSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZvciUyMGlkeCUyQyUyMHdvcmRfaWQlMjBpbiUyMGVudW1lcmF0ZSh3b3JkX2lkcyklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMHdvcmRfaWQlMjBpcyUyMG5vdCUyME5vbmUlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMHdvcmRfaWQlMjAhJTNEJTIwY3VycmVudF93b3JkJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY3VycmVudF93b3JkJTIwJTNEJTIwd29yZF9pZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGN1cnJlbnRfd29yZF9pbmRleCUyMCUyQiUzRCUyMDElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXBwaW5nJTVCY3VycmVudF93b3JkX2luZGV4JTVELmFwcGVuZChpZHgpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwUmFuZG9tbHklMjBtYXNrJTIwd29yZHMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXNrJTIwJTNEJTIwbnAucmFuZG9tLmJpbm9taWFsKDElMkMlMjB3d21fcHJvYmFiaWxpdHklMkMlMjAobGVuKG1hcHBpbmcpJTJDKSklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpbnB1dF9pZHMlMjAlM0QlMjBmZWF0dXJlJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbGFiZWxzJTIwJTNEJTIwZmVhdHVyZSU1QiUyMmxhYmVscyUyMiU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG5ld19sYWJlbHMlMjAlM0QlMjAlNUItMTAwJTVEJTIwKiUyMGxlbihsYWJlbHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwd29yZF9pZCUyMGluJTIwbnAud2hlcmUobWFzayklNUIwJTVEJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwd29yZF9pZCUyMCUzRCUyMHdvcmRfaWQuaXRlbSgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwaWR4JTIwaW4lMjBtYXBwaW5nJTVCd29yZF9pZCU1RCUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG5ld19sYWJlbHMlNUJpZHglNUQlMjAlM0QlMjBsYWJlbHMlNUJpZHglNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpbnB1dF9pZHMlNUJpZHglNUQlMjAlM0QlMjB0b2tlbml6ZXIubWFza190b2tlbl9pZCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmUlNUIlMjJsYWJlbHMlMjIlNUQlMjAlM0QlMjBuZXdfbGFiZWxzJTBBJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwdGZfZGVmYXVsdF9kYXRhX2NvbGxhdG9yKGZlYXR1cmVzKQ==",highlighted:`<span class="hljs-keyword">import</span> collections | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> transformers.data.data_collator <span class="hljs-keyword">import</span> tf_default_data_collator | |
| wwm_probability = <span class="hljs-number">0.2</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">whole_word_masking_data_collator</span>(<span class="hljs-params">features</span>): | |
| <span class="hljs-keyword">for</span> feature <span class="hljs-keyword">in</span> features: | |
| word_ids = feature.pop(<span class="hljs-string">"word_ids"</span>) | |
| <span class="hljs-comment"># Create a map between words and corresponding token indices</span> | |
| mapping = collections.defaultdict(<span class="hljs-built_in">list</span>) | |
| current_word_index = -<span class="hljs-number">1</span> | |
| current_word = <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">for</span> idx, word_id <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(word_ids): | |
| <span class="hljs-keyword">if</span> word_id <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>: | |
| <span class="hljs-keyword">if</span> word_id != current_word: | |
| current_word = word_id | |
| current_word_index += <span class="hljs-number">1</span> | |
| mapping[current_word_index].append(idx) | |
| <span class="hljs-comment"># Randomly mask words</span> | |
| mask = np.random.binomial(<span class="hljs-number">1</span>, wwm_probability, (<span class="hljs-built_in">len</span>(mapping),)) | |
| input_ids = feature[<span class="hljs-string">"input_ids"</span>] | |
| labels = feature[<span class="hljs-string">"labels"</span>] | |
| new_labels = [-<span class="hljs-number">100</span>] * <span class="hljs-built_in">len</span>(labels) | |
| <span class="hljs-keyword">for</span> word_id <span class="hljs-keyword">in</span> np.where(mask)[<span class="hljs-number">0</span>]: | |
| word_id = word_id.item() | |
| <span class="hljs-keyword">for</span> idx <span class="hljs-keyword">in</span> mapping[word_id]: | |
| new_labels[idx] = labels[idx] | |
| input_ids[idx] = tokenizer.mask_token_id | |
| feature[<span class="hljs-string">"labels"</span>] = new_labels | |
| <span class="hljs-keyword">return</span> tf_default_data_collator(features)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function _i(I){let s,o;return s=new h({props:{code:"aW1wb3J0JTIwY29sbGVjdGlvbnMlMEFpbXBvcnQlMjBudW1weSUyMGFzJTIwbnAlMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwZGVmYXVsdF9kYXRhX2NvbGxhdG9yJTBBJTBBd3dtX3Byb2JhYmlsaXR5JTIwJTNEJTIwMC4yJTBBJTBBJTBBZGVmJTIwd2hvbGVfd29yZF9tYXNraW5nX2RhdGFfY29sbGF0b3IoZmVhdHVyZXMpJTNBJTBBJTIwJTIwJTIwJTIwZm9yJTIwZmVhdHVyZSUyMGluJTIwZmVhdHVyZXMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3b3JkX2lkcyUyMCUzRCUyMGZlYXR1cmUucG9wKCUyMndvcmRfaWRzJTIyKSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMENyZWF0ZSUyMGElMjBtYXAlMjBiZXR3ZWVuJTIwd29yZHMlMjBhbmQlMjBjb3JyZXNwb25kaW5nJTIwdG9rZW4lMjBpbmRpY2VzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbWFwcGluZyUyMCUzRCUyMGNvbGxlY3Rpb25zLmRlZmF1bHRkaWN0KGxpc3QpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY3VycmVudF93b3JkX2luZGV4JTIwJTNEJTIwLTElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjdXJyZW50X3dvcmQlMjAlM0QlMjBOb25lJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwaWR4JTJDJTIwd29yZF9pZCUyMGluJTIwZW51bWVyYXRlKHdvcmRfaWRzKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlmJTIwd29yZF9pZCUyMGlzJTIwbm90JTIwTm9uZSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlmJTIwd29yZF9pZCUyMCElM0QlMjBjdXJyZW50X3dvcmQlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjdXJyZW50X3dvcmQlMjAlM0QlMjB3b3JkX2lkJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY3VycmVudF93b3JkX2luZGV4JTIwJTJCJTNEJTIwMSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1hcHBpbmclNUJjdXJyZW50X3dvcmRfaW5kZXglNUQuYXBwZW5kKGlkeCklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBSYW5kb21seSUyMG1hc2slMjB3b3JkcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1hc2slMjAlM0QlMjBucC5yYW5kb20uYmlub21pYWwoMSUyQyUyMHd3bV9wcm9iYWJpbGl0eSUyQyUyMChsZW4obWFwcGluZyklMkMpKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlucHV0X2lkcyUyMCUzRCUyMGZlYXR1cmUlNUIlMjJpbnB1dF9pZHMlMjIlNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsYWJlbHMlMjAlM0QlMjBmZWF0dXJlJTVCJTIybGFiZWxzJTIyJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmV3X2xhYmVscyUyMCUzRCUyMCU1Qi0xMDAlNUQlMjAqJTIwbGVuKGxhYmVscyklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjB3b3JkX2lkJTIwaW4lMjBucC53aGVyZShtYXNrKSU1QjAlNUQlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3b3JkX2lkJTIwJTNEJTIwd29yZF9pZC5pdGVtKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjBpZHglMjBpbiUyMG1hcHBpbmclNUJ3b3JkX2lkJTVEJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbmV3X2xhYmVscyU1QmlkeCU1RCUyMCUzRCUyMGxhYmVscyU1QmlkeCU1RCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlucHV0X2lkcyU1QmlkeCU1RCUyMCUzRCUyMHRva2VuaXplci5tYXNrX3Rva2VuX2lkJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZmVhdHVyZSU1QiUyMmxhYmVscyUyMiU1RCUyMCUzRCUyMG5ld19sYWJlbHMlMEElMEElMjAlMjAlMjAlMjByZXR1cm4lMjBkZWZhdWx0X2RhdGFfY29sbGF0b3IoZmVhdHVyZXMp",highlighted:`<span class="hljs-keyword">import</span> collections | |
| <span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> default_data_collator | |
| wwm_probability = <span class="hljs-number">0.2</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">whole_word_masking_data_collator</span>(<span class="hljs-params">features</span>): | |
| <span class="hljs-keyword">for</span> feature <span class="hljs-keyword">in</span> features: | |
| word_ids = feature.pop(<span class="hljs-string">"word_ids"</span>) | |
| <span class="hljs-comment"># Create a map between words and corresponding token indices</span> | |
| mapping = collections.defaultdict(<span class="hljs-built_in">list</span>) | |
| current_word_index = -<span class="hljs-number">1</span> | |
| current_word = <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">for</span> idx, word_id <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(word_ids): | |
| <span class="hljs-keyword">if</span> word_id <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>: | |
| <span class="hljs-keyword">if</span> word_id != current_word: | |
| current_word = word_id | |
| current_word_index += <span class="hljs-number">1</span> | |
| mapping[current_word_index].append(idx) | |
| <span class="hljs-comment"># Randomly mask words</span> | |
| mask = np.random.binomial(<span class="hljs-number">1</span>, wwm_probability, (<span class="hljs-built_in">len</span>(mapping),)) | |
| input_ids = feature[<span class="hljs-string">"input_ids"</span>] | |
| labels = feature[<span class="hljs-string">"labels"</span>] | |
| new_labels = [-<span class="hljs-number">100</span>] * <span class="hljs-built_in">len</span>(labels) | |
| <span class="hljs-keyword">for</span> word_id <span class="hljs-keyword">in</span> np.where(mask)[<span class="hljs-number">0</span>]: | |
| word_id = word_id.item() | |
| <span class="hljs-keyword">for</span> idx <span class="hljs-keyword">in</span> mapping[word_id]: | |
| new_labels[idx] = labels[idx] | |
| input_ids[idx] = tokenizer.mask_token_id | |
| feature[<span class="hljs-string">"labels"</span>] = new_labels | |
| <span class="hljs-keyword">return</span> default_data_collator(features)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function gi(I){let s,o="✏️ <strong>Încercați!</strong> Rulați fragmentul de cod de mai sus de mai multe ori pentru a vedea cum se întâmplă mascarea aleatorie în fața ochilor voștri! De asemenea, înlocuiți metoda <code>tokenizer.decode()</code> cu <code>tokenizer.convert_ids_to_tokens()</code> pentru a vedea că tokenii dintr-un cuvânt dat sunt întotdeauna mascați împreună.";return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1awm0x5"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function Gi(I){let s,o="Odată ce suntem conectați, putem specifica argumentele pentru <code>Trainer</code>:",n,u,g,w,R="Aici am modificat câteva dintre opțiunile implicite, inclusiv <code>logging_steps</code> pentru a ne asigura că urmărim pierderea de antrenare cu fiecare epocă. De asemenea, am folosit <code>fp16=True</code> pentru a activa antrenarea cu precizie mixtă, ceea ce ne oferă un alt impuls vitezei. În mod implicit, <code>Trainer</code> va elimina toate coloanele care nu fac parte din metoda <code>forward()</code> a modelului. Aceasta înseamnă că, dacă utilizați whole word masking collator, va trebui să setați și <code>remove_unused_columns=False</code> pentru a vă asigura că nu pierdem coloana <code>word_ids</code> în timpul antrenamentului.",z,_,v='Rețineți că puteți specifica numele repositoriului către care doriți să faceți push cu argumentul <code>hub_model_id</code> (în special, va trebui să utilizați acest argument pentru a face push către o organizație). De exemplu, atunci când am făcut push modelului către organizația <a href="https://huggingface.co/huggingface-course" rel="nofollow"><code>huggingface-course</code></a>, am adăugat <code>hub_model_id="huggingface-course/distilbert-finetuned-imdb"</code> la <code>TrainingArguments</code>. În mod implicit, repositoriul utilizat va fi în namespaceul vostru și denumit după output directory-ul pe care l-ați stabilit, deci în cazul nostru va fi <code>"lewtun/distilbert-finetuned-imdb"</code>.',k,B,b="Acum avem toate ingredientele pentru inițializarea <code>Trainer</code>. Aici folosim doar <code>data_collator</code> standard, dar puteți încerca whole word masking collator și să comparați rezultatele ca un exercițiu:",G,W,Q,C,X="Acum suntem gata să rulăm <code>trainer.train()</code> - dar înainte de a face acest lucru, să analizăm pe scurt <em>perplexitatea</em>, care este o metrică comună de evaluare a performanței modelelor de limbaj.",E;return u=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRyYWluaW5nQXJndW1lbnRzJTBBJTBBYmF0Y2hfc2l6ZSUyMCUzRCUyMDY0JTBBJTIzJTIwU2hvdyUyMHRoZSUyMHRyYWluaW5nJTIwbG9zcyUyMHdpdGglMjBldmVyeSUyMGVwb2NoJTBBbG9nZ2luZ19zdGVwcyUyMCUzRCUyMGxlbihkb3duc2FtcGxlZF9kYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQpJTIwJTJGJTJGJTIwYmF0Y2hfc2l6ZSUwQW1vZGVsX25hbWUlMjAlM0QlMjBtb2RlbF9jaGVja3BvaW50LnNwbGl0KCUyMiUyRiUyMiklNUItMSU1RCUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBUcmFpbmluZ0FyZ3VtZW50cyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEZiUyMiU3Qm1vZGVsX25hbWUlN0QtZmluZXR1bmVkLWltZGIlMjIlMkMlMEElMjAlMjAlMjAlMjBvdmVyd3JpdGVfb3V0cHV0X2RpciUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBldmFsdWF0aW9uX3N0cmF0ZWd5JTNEJTIyZXBvY2glMjIlMkMlMEElMjAlMjAlMjAlMjBsZWFybmluZ19yYXRlJTNEMmUtNSUyQyUwQSUyMCUyMCUyMCUyMHdlaWdodF9kZWNheSUzRDAuMDElMkMlMEElMjAlMjAlMjAlMjBwZXJfZGV2aWNlX3RyYWluX2JhdGNoX3NpemUlM0RiYXRjaF9zaXplJTJDJTBBJTIwJTIwJTIwJTIwcGVyX2RldmljZV9ldmFsX2JhdGNoX3NpemUlM0RiYXRjaF9zaXplJTJDJTBBJTIwJTIwJTIwJTIwcHVzaF90b19odWIlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwZnAxNiUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBsb2dnaW5nX3N0ZXBzJTNEbG9nZ2luZ19zdGVwcyUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TrainingArguments | |
| batch_size = <span class="hljs-number">64</span> | |
| <span class="hljs-comment"># Show the training loss with every epoch</span> | |
| logging_steps = <span class="hljs-built_in">len</span>(downsampled_dataset[<span class="hljs-string">"train"</span>]) // batch_size | |
| model_name = model_checkpoint.split(<span class="hljs-string">"/"</span>)[-<span class="hljs-number">1</span>] | |
| training_args = TrainingArguments( | |
| output_dir=<span class="hljs-string">f"<span class="hljs-subst">{model_name}</span>-finetuned-imdb"</span>, | |
| overwrite_output_dir=<span class="hljs-literal">True</span>, | |
| evaluation_strategy=<span class="hljs-string">"epoch"</span>, | |
| learning_rate=<span class="hljs-number">2e-5</span>, | |
| weight_decay=<span class="hljs-number">0.01</span>, | |
| per_device_train_batch_size=batch_size, | |
| per_device_eval_batch_size=batch_size, | |
| push_to_hub=<span class="hljs-literal">True</span>, | |
| fp16=<span class="hljs-literal">True</span>, | |
| logging_steps=logging_steps, | |
| )`,wrap:!1}}),W=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRyYWluZXIlMEElMEF0cmFpbmVyJTIwJTNEJTIwVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwYXJncyUzRHRyYWluaW5nX2FyZ3MlMkMlMEElMjAlMjAlMjAlMjB0cmFpbl9kYXRhc2V0JTNEZG93bnNhbXBsZWRfZGF0YXNldCU1QiUyMnRyYWluJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbF9kYXRhc2V0JTNEZG93bnNhbXBsZWRfZGF0YXNldCU1QiUyMnRlc3QlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBkYXRhX2NvbGxhdG9yJTNEZGF0YV9jb2xsYXRvciUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplciUzRHRva2VuaXplciUyQyUwQSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=downsampled_dataset[<span class="hljs-string">"train"</span>], | |
| eval_dataset=downsampled_dataset[<span class="hljs-string">"test"</span>], | |
| data_collator=data_collator, | |
| tokenizer=tokenizer, | |
| )`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment),g=i(),w=U("p"),w.innerHTML=R,z=i(),_=U("p"),_.innerHTML=v,k=i(),B=U("p"),B.innerHTML=b,G=i(),d(W.$$.fragment),Q=i(),C=U("p"),C.innerHTML=X},l(Z){s=T(Z,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1g4pnjh"&&(s.innerHTML=o),n=r(Z),m(u.$$.fragment,Z),g=r(Z),w=T(Z,"P",{"data-svelte-h":!0}),j(w)!=="svelte-1ntxxw7"&&(w.innerHTML=R),z=r(Z),_=T(Z,"P",{"data-svelte-h":!0}),j(_)!=="svelte-cxhak2"&&(_.innerHTML=v),k=r(Z),B=T(Z,"P",{"data-svelte-h":!0}),j(B)!=="svelte-1d8nnn6"&&(B.innerHTML=b),G=r(Z),m(W.$$.fragment,Z),Q=r(Z),C=T(Z,"P",{"data-svelte-h":!0}),j(C)!=="svelte-7nvjao"&&(C.innerHTML=X)},m(Z,$){a(Z,s,$),a(Z,n,$),y(u,Z,$),a(Z,g,$),a(Z,w,$),a(Z,z,$),a(Z,_,$),a(Z,k,$),a(Z,B,$),a(Z,G,$),y(W,Z,$),a(Z,Q,$),a(Z,C,$),E=!0},i(Z){E||(p(u.$$.fragment,Z),p(W.$$.fragment,Z),E=!0)},o(Z){c(u.$$.fragment,Z),c(W.$$.fragment,Z),E=!1},d(Z){Z&&(t(s),t(n),t(g),t(w),t(z),t(_),t(k),t(B),t(G),t(Q),t(C)),J(u,Z),J(W,Z)}}}function Zi(I){let s,o="Odată ce ne-am conectat, putem crea dataseturile <code>tf.data</code>. Pentru a face acest lucru, vom utiliza metoda <code>prepare_tf_dataset()</code>, care utilizează modelul nostru pentru a deduce automat ce coloane ar trebui să intre în dataset. Dacă doriți să controlați exact ce coloane să utilizați, puteți folosi în schimb metoda <code>Dataset.to_tf_dataset()</code>. Pentru a simplifica lucrurile, vom utiliza aici doar data collatorul standard, dar puteți încerca și whole word masking collator și puteți compara rezultatele ca un exercițiu:",n,u,g,w,R="În continuare, setăm hiperparametrii de antrenare și compilăm modelul nostru. Utilizăm funcția <code>create_optimizer()</code> din biblioteca 🤗 Transformers, care ne oferă un optimizator <code>AdamW</code> cu o scădere liniară a ratei de învățare. Utilizăm, de asemenea, pierderea încorporată în model, care este cea implicită atunci când nu este specificată nicio pierdere ca argument pentru <code>compile()</code>, și setăm precizia de antrenare la <code>"mixed_float16"</code>. Rețineți că, dacă utilizați un GPU Colab sau alt GPU care nu are suport accelerat pentru float16, ar trebui probabil să comentați această linie.",z,_,v='În plus, am configurat un <code>PushToHubCallback</code> care va salva modelul în Hub după fiecare epocă. Puteți specifica numele repositoriului către care doriți să faceți push cu argumentul <code>hub_model_id</code> (în special, va trebui să utilizați acest argument pentru a face push către o organizație). De exemplu, pentru a trimite modelul către organizația <a href="https://huggingface.co/huggingface-course" rel="nofollow"><code>huggingface-course</code></a>, am adăugat <code>hub_model_id="huggingface-course/distilbert-finetuned-imdb"</code>. În mod implicit, repositoriul utilizat va fi în namespaceul vostru și numit după output directory-ul pe care l-ați stabilit, deci în cazul nostru va fi <code>"lewtun/distilbert-finetuned-imdb"</code>.',k,B,b,G,W="Acum suntem gata să executăm <code>model.fit()</code> - dar înainte de a face acest lucru, să ne uităm pe scurt la <em>perplexitate</em>, care este o metrică comună pentru a evalua performanța modelelor de limbaj.",Q;return u=new h({props:{code:"dGZfdHJhaW5fZGF0YXNldCUyMCUzRCUyMG1vZGVsLnByZXBhcmVfdGZfZGF0YXNldCglMEElMjAlMjAlMjAlMjBkb3duc2FtcGxlZF9kYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQlMkMlMEElMjAlMjAlMjAlMjBjb2xsYXRlX2ZuJTNEZGF0YV9jb2xsYXRvciUyQyUwQSUyMCUyMCUyMCUyMHNodWZmbGUlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwYmF0Y2hfc2l6ZSUzRDMyJTJDJTBBKSUwQSUwQXRmX2V2YWxfZGF0YXNldCUyMCUzRCUyMG1vZGVsLnByZXBhcmVfdGZfZGF0YXNldCglMEElMjAlMjAlMjAlMjBkb3duc2FtcGxlZF9kYXRhc2V0JTVCJTIydGVzdCUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMGNvbGxhdGVfZm4lM0RkYXRhX2NvbGxhdG9yJTJDJTBBJTIwJTIwJTIwJTIwc2h1ZmZsZSUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwYmF0Y2hfc2l6ZSUzRDMyJTJDJTBBKQ==",highlighted:`tf_train_dataset = model.prepare_tf_dataset( | |
| downsampled_dataset[<span class="hljs-string">"train"</span>], | |
| collate_fn=data_collator, | |
| shuffle=<span class="hljs-literal">True</span>, | |
| batch_size=<span class="hljs-number">32</span>, | |
| ) | |
| tf_eval_dataset = model.prepare_tf_dataset( | |
| downsampled_dataset[<span class="hljs-string">"test"</span>], | |
| collate_fn=data_collator, | |
| shuffle=<span class="hljs-literal">False</span>, | |
| batch_size=<span class="hljs-number">32</span>, | |
| )`,wrap:!1}}),B=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGNyZWF0ZV9vcHRpbWl6ZXIlMEFmcm9tJTIwdHJhbnNmb3JtZXJzLmtlcmFzX2NhbGxiYWNrcyUyMGltcG9ydCUyMFB1c2hUb0h1YkNhbGxiYWNrJTBBaW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEElMEFudW1fdHJhaW5fc3RlcHMlMjAlM0QlMjBsZW4odGZfdHJhaW5fZGF0YXNldCklMEFvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZSUyMCUzRCUyMGNyZWF0ZV9vcHRpbWl6ZXIoJTBBJTIwJTIwJTIwJTIwaW5pdF9sciUzRDJlLTUlMkMlMEElMjAlMjAlMjAlMjBudW1fd2FybXVwX3N0ZXBzJTNEMV8wMDAlMkMlMEElMjAlMjAlMjAlMjBudW1fdHJhaW5fc3RlcHMlM0RudW1fdHJhaW5fc3RlcHMlMkMlMEElMjAlMjAlMjAlMjB3ZWlnaHRfZGVjYXlfcmF0ZSUzRDAuMDElMkMlMEEpJTBBbW9kZWwuY29tcGlsZShvcHRpbWl6ZXIlM0RvcHRpbWl6ZXIpJTBBJTBBJTIzJTIwVHJhaW4lMjBpbiUyMG1peGVkLXByZWNpc2lvbiUyMGZsb2F0MTYlMEF0Zi5rZXJhcy5taXhlZF9wcmVjaXNpb24uc2V0X2dsb2JhbF9wb2xpY3koJTIybWl4ZWRfZmxvYXQxNiUyMiklMEElMEFtb2RlbF9uYW1lJTIwJTNEJTIwbW9kZWxfY2hlY2twb2ludC5zcGxpdCglMjIlMkYlMjIpJTVCLTElNUQlMEFjYWxsYmFjayUyMCUzRCUyMFB1c2hUb0h1YkNhbGxiYWNrKCUwQSUyMCUyMCUyMCUyMG91dHB1dF9kaXIlM0RmJTIyJTdCbW9kZWxfbmFtZSU3RC1maW5ldHVuZWQtaW1kYiUyMiUyQyUyMHRva2VuaXplciUzRHRva2VuaXplciUwQSk=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> create_optimizer | |
| <span class="hljs-keyword">from</span> transformers.keras_callbacks <span class="hljs-keyword">import</span> PushToHubCallback | |
| <span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| num_train_steps = <span class="hljs-built_in">len</span>(tf_train_dataset) | |
| optimizer, schedule = create_optimizer( | |
| init_lr=<span class="hljs-number">2e-5</span>, | |
| num_warmup_steps=<span class="hljs-number">1_000</span>, | |
| num_train_steps=num_train_steps, | |
| weight_decay_rate=<span class="hljs-number">0.01</span>, | |
| ) | |
| model.<span class="hljs-built_in">compile</span>(optimizer=optimizer) | |
| <span class="hljs-comment"># Train in mixed-precision float16</span> | |
| tf.keras.mixed_precision.set_global_policy(<span class="hljs-string">"mixed_float16"</span>) | |
| model_name = model_checkpoint.split(<span class="hljs-string">"/"</span>)[-<span class="hljs-number">1</span>] | |
| callback = PushToHubCallback( | |
| output_dir=<span class="hljs-string">f"<span class="hljs-subst">{model_name}</span>-finetuned-imdb"</span>, tokenizer=tokenizer | |
| )`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment),g=i(),w=U("p"),w.innerHTML=R,z=i(),_=U("p"),_.innerHTML=v,k=i(),d(B.$$.fragment),b=i(),G=U("p"),G.innerHTML=W},l(C){s=T(C,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1ew67dq"&&(s.innerHTML=o),n=r(C),m(u.$$.fragment,C),g=r(C),w=T(C,"P",{"data-svelte-h":!0}),j(w)!=="svelte-19uajgb"&&(w.innerHTML=R),z=r(C),_=T(C,"P",{"data-svelte-h":!0}),j(_)!=="svelte-11qfh74"&&(_.innerHTML=v),k=r(C),m(B.$$.fragment,C),b=r(C),G=T(C,"P",{"data-svelte-h":!0}),j(G)!=="svelte-drf4xh"&&(G.innerHTML=W)},m(C,X){a(C,s,X),a(C,n,X),y(u,C,X),a(C,g,X),a(C,w,X),a(C,z,X),a(C,_,X),a(C,k,X),y(B,C,X),a(C,b,X),a(C,G,X),Q=!0},i(C){Q||(p(u.$$.fragment,C),p(B.$$.fragment,C),Q=!0)},o(C){c(u.$$.fragment,C),c(B.$$.fragment,C),Q=!1},d(C){C&&(t(s),t(n),t(g),t(w),t(z),t(_),t(k),t(b),t(G)),J(u,C),J(B,C)}}}function Bi(I){let s,o="Presupunând că setul nostru de testare constă în cea mai mare parte din propoziții corecte din punct de vedere gramatical, atunci o modalitate de a măsura calitatea modelului nostru lingvistic este de a calcula probabilitățile pe care le atribuie următorului cuvânt în toate propozițiile din setul de testare. Probabilitățile ridicate indică faptul că modelul indică faptul că modelul nu este “surprins” sau “perplex” de exemplele nevăzute și sugerează că a învățat modelele de bază ale gramaticii limbii. Există diverse definiții matematice ale perplexității, dar cea pe care o vom folosi o definește ca the exponential of the cross-entropy loss. Astfel, putem calcula perplexitatea modelului nostru preantrenat folosind metoda <code>model.evaluate()</code> pentru a calcula pierderea de entropie încrucișată pe setul de testare și apoi luând exponențiala rezultatului:",n,u,g;return u=new h({props:{code:"aW1wb3J0JTIwbWF0aCUwQSUwQWV2YWxfbG9zcyUyMCUzRCUyMG1vZGVsLmV2YWx1YXRlKHRmX2V2YWxfZGF0YXNldCklMEFwcmludChmJTIyUGVycGxleGl0eSUzQSUyMCU3Qm1hdGguZXhwKGV2YWxfbG9zcyklM0EuMmYlN0QlMjIp",highlighted:`<span class="hljs-keyword">import</span> math | |
| eval_loss = model.evaluate(tf_eval_dataset) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Perplexity: <span class="hljs-subst">{math.exp(eval_loss):<span class="hljs-number">.2</span>f}</span>"</span>)`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment)},l(w){s=T(w,"P",{"data-svelte-h":!0}),j(s)!=="svelte-bzoqdy"&&(s.innerHTML=o),n=r(w),m(u.$$.fragment,w)},m(w,R){a(w,s,R),a(w,n,R),y(u,w,R),g=!0},i(w){g||(p(u.$$.fragment,w),g=!0)},o(w){c(u.$$.fragment,w),g=!1},d(w){w&&(t(s),t(n)),J(u,w)}}}function ki(I){let s,o="Presupunând că setul nostru de testare constă în cea mai mare parte din propoziții corecte din punct de vedere gramatical, atunci o modalitate de a măsura calitatea modelului nostru lingvistic este de a calcula probabilitățile pe care le atribuie următorului cuvânt în toate propozițiile din setul de testare. Probabilitatea ridicată indică faptul că modelul nu este “surprins” sau “perplex” de exemplele nevăzute și sugerează că a învățat tiparele gramaticale de bază ale limbii. Există diverse definiții matematice ale perplexității, dar cea pe care o vom utiliza o definește ca the exponential of the cross-entropy loss. Astfel, putem calcula perplexitatea modelului nostru preantrenat utilizând funcția <code>Trainer.evaluate()</code> pentru a calcula pierderea de cross-entropy pe setul de testare și apoi luând exponențiala rezultatului:",n,u,g;return u=new h({props:{code:"aW1wb3J0JTIwbWF0aCUwQSUwQWV2YWxfcmVzdWx0cyUyMCUzRCUyMHRyYWluZXIuZXZhbHVhdGUoKSUwQXByaW50KGYlMjIlM0UlM0UlM0UlMjBQZXJwbGV4aXR5JTNBJTIwJTdCbWF0aC5leHAoZXZhbF9yZXN1bHRzJTVCJ2V2YWxfbG9zcyclNUQpJTNBLjJmJTdEJTIyKQ==",highlighted:`<span class="hljs-keyword">import</span> math | |
| eval_results = trainer.evaluate() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f">>> Perplexity: <span class="hljs-subst">{math.exp(eval_results[<span class="hljs-string">'eval_loss'</span>]):<span class="hljs-number">.2</span>f}</span>"</span>)`,wrap:!1}}),{c(){s=U("p"),s.innerHTML=o,n=i(),d(u.$$.fragment)},l(w){s=T(w,"P",{"data-svelte-h":!0}),j(s)!=="svelte-104exk3"&&(s.innerHTML=o),n=r(w),m(u.$$.fragment,w)},m(w,R){a(w,s,R),a(w,n,R),y(u,w,R),g=!0},i(w){g||(p(u.$$.fragment,w),g=!0)},o(w){c(u.$$.fragment,w),g=!1},d(w){w&&(t(s),t(n)),J(u,w)}}}function Ri(I){let s,o;return s=new h({props:{code:"bW9kZWwuZml0KHRmX3RyYWluX2RhdGFzZXQlMkMlMjB2YWxpZGF0aW9uX2RhdGElM0R0Zl9ldmFsX2RhdGFzZXQlMkMlMjBjYWxsYmFja3MlM0QlNUJjYWxsYmFjayU1RCk=",highlighted:"model.fit(tf_train_dataset, validation_data=tf_eval_dataset, callbacks=[callback])",wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function vi(I){let s,o;return s=new h({props:{code:"dHJhaW5lci50cmFpbigp",highlighted:"trainer.train()",wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function zi(I){let s,o;return s=new h({props:{code:"ZXZhbF9sb3NzJTIwJTNEJTIwbW9kZWwuZXZhbHVhdGUodGZfZXZhbF9kYXRhc2V0KSUwQXByaW50KGYlMjJQZXJwbGV4aXR5JTNBJTIwJTdCbWF0aC5leHAoZXZhbF9sb3NzKSUzQS4yZiU3RCUyMik=",highlighted:`eval_loss = model.evaluate(tf_eval_dataset) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Perplexity: <span class="hljs-subst">{math.exp(eval_loss):<span class="hljs-number">.2</span>f}</span>"</span>)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function Xi(I){let s,o;return s=new h({props:{code:"ZXZhbF9yZXN1bHRzJTIwJTNEJTIwdHJhaW5lci5ldmFsdWF0ZSgpJTBBcHJpbnQoZiUyMiUzRSUzRSUzRSUyMFBlcnBsZXhpdHklM0ElMjAlN0JtYXRoLmV4cChldmFsX3Jlc3VsdHMlNUInZXZhbF9sb3NzJyU1RCklM0EuMmYlN0QlMjIp",highlighted:`eval_results = trainer.evaluate() | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f">>> Perplexity: <span class="hljs-subst">{math.exp(eval_results[<span class="hljs-string">'eval_loss'</span>]):<span class="hljs-number">.2</span>f}</span>"</span>)`,wrap:!1}}),{c(){d(s.$$.fragment)},l(n){m(s.$$.fragment,n)},m(n,u){y(s,n,u),o=!0},i(n){o||(p(s.$$.fragment,n),o=!0)},o(n){c(s.$$.fragment,n),o=!1},d(n){J(s,n)}}}function ti(I){let s,o="Odată ce antrenarea este finalizată, putem trimite cardul modelului cu informațiile de antrenare către Hub (checkpointurile sunt salvate în timpul antrenare):",n,u,g;return u=new h({props:{code:"dHJhaW5lci5wdXNoX3RvX2h1Yigp",highlighted:"trainer.push_to_hub()",wrap:!1}}),{c(){s=U("p"),s.textContent=o,n=i(),d(u.$$.fragment)},l(w){s=T(w,"P",{"data-svelte-h":!0}),j(s)!=="svelte-75txcs"&&(s.textContent=o),n=r(w),m(u.$$.fragment,w)},m(w,R){a(w,s,R),a(w,n,R),y(u,w,R),g=!0},i(w){g||(p(u.$$.fragment,w),g=!0)},o(w){c(u.$$.fragment,w),g=!1},d(w){w&&(t(s),t(n)),J(u,w)}}}function $i(I){let s,o="✏️ <strong>Rândul tău!</strong> Rulați antrenamentul de mai sus după schimbarea data collatorului cu whole word masking collator. Obțineți rezultate mai bune?";return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-3ph1g7"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function ai(I){let s,o="În cazul nostru de utilizare, nu a fost nevoie să facem nimic special cu bucla de antrenare, dar în unele cazuri s-ar putea să fie nevoie să implementați o logică personalizată. Pentru aceste aplicații, puteți utiliza 🤗 Accelerate — să aruncăm o privire!",n,u,g,w,R='Așa cum am văzut cu <code>Trainer</code>, fine-tuningul unui model de limbaj mascat este foarte asemănător cu exemplul de clasificare a textului din <a href="/course/chapter3">Capitolul 3</a>. De fapt, singura subtilitate este utilizarea unui data collator special, pe care l-am abordat mai devreme în această secțiune!',z,_,v="Cu toate acestea, am văzut că <code>DataCollatorForLanguageModeling</code> aplică, de asemenea, o mascare aleatorie cu fiecare evaluare, astfel încât vom vedea unele fluctuații în scorurile noastre de perplexitate cu fiecare rulare de antrenament. O modalitate de a elimina această sursă de dezordine este de a aplica mascarea <em>o singură dată</em> pe întregul set de teste și apoi de a utiliza data collatorul implicit din 🤗 Transformers pentru a colecta batch-urile în timpul evaluării. Pentru a vedea cum funcționează acest lucru, să implementăm o funcție simplă care aplică mascarea pe un batch, similară cu prima noastră întâlnire cu <code>DataCollatorForLanguageModeling</code>:",k,B,b,G,W="În continuare, vom aplica această funcție setului nostru de testare și vom elimina coloanele nemascate pentru a le putea înlocui cu cele mascate. Puteți utiliza whole word masking prin înlocuirea <code>data_collator</code> de mai sus cu cel corespunzător, caz în care trebuie să eliminați prima linie de aici:",Q,C,X,E,Z="Putem configura apoi dataloaderele ca de obicei, dar vom folosi <code>default_data_collator</code> de la 🤗 Transformers pentru setul de evaluare:",$,fe,Ge,H,Yt="De aici, vom urma pașii standard cu 🤗 Accelerate. În primul rând, se încarcă o versiune nouă a modelului antrenat:",Ze,V,He,ae,Ie="Apoi trebuie să specificăm optimizatorul; vom folosi standardul <code>AdamW</code>:",Le,se,ne,ie,ce="Cu aceste obiecte, acum putem pregăti totul pentru antrenare cu obiectul <code>Accelerator</code>:",Be,L,ke,D,St="Acum că modelul, optimizatorul și dataloaderul sunt configurate, putem specifica learning rate schedulerul cum urmează:",Re,F,De,re,Ce="Mai este un singur lucru de făcut înainte de antrenare: creați un repositoriu de modele pe Hugging Face Hub! Putem utiliza biblioteca 🤗 Hub pentru a genera mai întâi numele complet al repositoriul nostru:",qe,Me,Y,x,ue,q,Et="apoi creați și clonați repositoriul folosind clasa <code>Repository</code> din 🤗 Hub:",ve,K,ze,P,Ht="Odată ce acest lucru este făcut, trebuie doar să scriem ciclul complet de antrenare și evaluare:",Xe,O,$e,ee,Ke,pe,te="Mișto, am reușit să evaluăm perplexitatea cu fiecare epocă și să ne asigurăm că mai multe runde de antrenament sunt reproductibile!",S;return u=new Se({props:{title:"Fine-tuningul DistilBERT cu 🤗 Accelerate",local:"fine-tuning-distilbert-with-accelerate",headingTag:"h2"}}),B=new h({props:{code:"ZGVmJTIwaW5zZXJ0X3JhbmRvbV9tYXNrKGJhdGNoKSUzQSUwQSUyMCUyMCUyMCUyMGZlYXR1cmVzJTIwJTNEJTIwJTVCZGljdCh6aXAoYmF0Y2glMkMlMjB0KSklMjBmb3IlMjB0JTIwaW4lMjB6aXAoKmJhdGNoLnZhbHVlcygpKSU1RCUwQSUyMCUyMCUyMCUyMG1hc2tlZF9pbnB1dHMlMjAlM0QlMjBkYXRhX2NvbGxhdG9yKGZlYXR1cmVzKSUwQSUyMCUyMCUyMCUyMCUyMyUyMENyZWF0ZSUyMGElMjBuZXclMjAlMjJtYXNrZWQlMjIlMjBjb2x1bW4lMjBmb3IlMjBlYWNoJTIwY29sdW1uJTIwaW4lMjB0aGUlMjBkYXRhc2V0JTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwJTdCJTIybWFza2VkXyUyMiUyMCUyQiUyMGslM0ElMjB2Lm51bXB5KCklMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwbWFza2VkX2lucHV0cy5pdGVtcygpJTdE",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">insert_random_mask</span>(<span class="hljs-params">batch</span>): | |
| features = [<span class="hljs-built_in">dict</span>(<span class="hljs-built_in">zip</span>(batch, t)) <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(*batch.values())] | |
| masked_inputs = data_collator(features) | |
| <span class="hljs-comment"># Create a new "masked" column for each column in the dataset</span> | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"masked_"</span> + k: v.numpy() <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> masked_inputs.items()}`,wrap:!1}}),C=new h({props:{code:"ZG93bnNhbXBsZWRfZGF0YXNldCUyMCUzRCUyMGRvd25zYW1wbGVkX2RhdGFzZXQucmVtb3ZlX2NvbHVtbnMoJTVCJTIyd29yZF9pZHMlMjIlNUQpJTBBZXZhbF9kYXRhc2V0JTIwJTNEJTIwZG93bnNhbXBsZWRfZGF0YXNldCU1QiUyMnRlc3QlMjIlNUQubWFwKCUwQSUyMCUyMCUyMCUyMGluc2VydF9yYW5kb21fbWFzayUyQyUwQSUyMCUyMCUyMCUyMGJhdGNoZWQlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwcmVtb3ZlX2NvbHVtbnMlM0Rkb3duc2FtcGxlZF9kYXRhc2V0JTVCJTIydGVzdCUyMiU1RC5jb2x1bW5fbmFtZXMlMkMlMEEpJTBBZXZhbF9kYXRhc2V0JTIwJTNEJTIwZXZhbF9kYXRhc2V0LnJlbmFtZV9jb2x1bW5zKCUwQSUyMCUyMCUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1hc2tlZF9pbnB1dF9pZHMlMjIlM0ElMjAlMjJpbnB1dF9pZHMlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJtYXNrZWRfYXR0ZW50aW9uX21hc2slMjIlM0ElMjAlMjJhdHRlbnRpb25fbWFzayUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1hc2tlZF9sYWJlbHMlMjIlM0ElMjAlMjJsYWJlbHMlMjIlMkMlMEElMjAlMjAlMjAlMjAlN0QlMEEp",highlighted:`downsampled_dataset = downsampled_dataset.remove_columns([<span class="hljs-string">"word_ids"</span>]) | |
| eval_dataset = downsampled_dataset[<span class="hljs-string">"test"</span>].<span class="hljs-built_in">map</span>( | |
| insert_random_mask, | |
| batched=<span class="hljs-literal">True</span>, | |
| remove_columns=downsampled_dataset[<span class="hljs-string">"test"</span>].column_names, | |
| ) | |
| eval_dataset = eval_dataset.rename_columns( | |
| { | |
| <span class="hljs-string">"masked_input_ids"</span>: <span class="hljs-string">"input_ids"</span>, | |
| <span class="hljs-string">"masked_attention_mask"</span>: <span class="hljs-string">"attention_mask"</span>, | |
| <span class="hljs-string">"masked_labels"</span>: <span class="hljs-string">"labels"</span>, | |
| } | |
| )`,wrap:!1}}),fe=new h({props:{code:"ZnJvbSUyMHRvcmNoLnV0aWxzLmRhdGElMjBpbXBvcnQlMjBEYXRhTG9hZGVyJTBBZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGRlZmF1bHRfZGF0YV9jb2xsYXRvciUwQSUwQWJhdGNoX3NpemUlMjAlM0QlMjA2NCUwQXRyYWluX2RhdGFsb2FkZXIlMjAlM0QlMjBEYXRhTG9hZGVyKCUwQSUyMCUyMCUyMCUyMGRvd25zYW1wbGVkX2RhdGFzZXQlNUIlMjJ0cmFpbiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMHNodWZmbGUlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwYmF0Y2hfc2l6ZSUzRGJhdGNoX3NpemUlMkMlMEElMjAlMjAlMjAlMjBjb2xsYXRlX2ZuJTNEZGF0YV9jb2xsYXRvciUyQyUwQSklMEFldmFsX2RhdGFsb2FkZXIlMjAlM0QlMjBEYXRhTG9hZGVyKCUwQSUyMCUyMCUyMCUyMGV2YWxfZGF0YXNldCUyQyUyMGJhdGNoX3NpemUlM0RiYXRjaF9zaXplJTJDJTIwY29sbGF0ZV9mbiUzRGRlZmF1bHRfZGF0YV9jb2xsYXRvciUwQSk=",highlighted:`<span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> default_data_collator | |
| batch_size = <span class="hljs-number">64</span> | |
| train_dataloader = DataLoader( | |
| downsampled_dataset[<span class="hljs-string">"train"</span>], | |
| shuffle=<span class="hljs-literal">True</span>, | |
| batch_size=batch_size, | |
| collate_fn=data_collator, | |
| ) | |
| eval_dataloader = DataLoader( | |
| eval_dataset, batch_size=batch_size, collate_fn=default_data_collator | |
| )`,wrap:!1}}),V=new h({props:{code:"bW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JNYXNrZWRMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfY2hlY2twb2ludCk=",highlighted:'model = <span class="hljs-module-access"><span class="hljs-module"><span class="hljs-identifier">AutoModelForMaskedLM</span>.</span></span>from<span class="hljs-constructor">_pretrained(<span class="hljs-params">model_checkpoint</span>)</span>',wrap:!1}}),se=new h({props:{code:"ZnJvbSUyMHRvcmNoLm9wdGltJTIwaW1wb3J0JTIwQWRhbVclMEElMEFvcHRpbWl6ZXIlMjAlM0QlMjBBZGFtVyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBsciUzRDVlLTUp",highlighted:`<span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">5e-5</span>)`,wrap:!1}}),L=new h({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwdHJhaW5fZGF0YWxvYWRlciUyQyUyMGV2YWxfZGF0YWxvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjB0cmFpbl9kYXRhbG9hZGVyJTJDJTIwZXZhbF9kYXRhbG9hZGVyJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator | |
| accelerator = Accelerator() | |
| model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare( | |
| model, optimizer, train_dataloader, eval_dataloader | |
| )`,wrap:!1}}),F=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGdldF9zY2hlZHVsZXIlMEElMEFudW1fdHJhaW5fZXBvY2hzJTIwJTNEJTIwMyUwQW51bV91cGRhdGVfc3RlcHNfcGVyX2Vwb2NoJTIwJTNEJTIwbGVuKHRyYWluX2RhdGFsb2FkZXIpJTBBbnVtX3RyYWluaW5nX3N0ZXBzJTIwJTNEJTIwbnVtX3RyYWluX2Vwb2NocyUyMColMjBudW1fdXBkYXRlX3N0ZXBzX3Blcl9lcG9jaCUwQSUwQWxyX3NjaGVkdWxlciUyMCUzRCUyMGdldF9zY2hlZHVsZXIoJTBBJTIwJTIwJTIwJTIwJTIybGluZWFyJTIyJTJDJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyJTNEb3B0aW1pemVyJTJDJTBBJTIwJTIwJTIwJTIwbnVtX3dhcm11cF9zdGVwcyUzRDAlMkMlMEElMjAlMjAlMjAlMjBudW1fdHJhaW5pbmdfc3RlcHMlM0RudW1fdHJhaW5pbmdfc3RlcHMlMkMlMEEp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> get_scheduler | |
| num_train_epochs = <span class="hljs-number">3</span> | |
| num_update_steps_per_epoch = <span class="hljs-built_in">len</span>(train_dataloader) | |
| num_training_steps = num_train_epochs * num_update_steps_per_epoch | |
| lr_scheduler = get_scheduler( | |
| <span class="hljs-string">"linear"</span>, | |
| optimizer=optimizer, | |
| num_warmup_steps=<span class="hljs-number">0</span>, | |
| num_training_steps=num_training_steps, | |
| )`,wrap:!1}}),Me=new h({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGdldF9mdWxsX3JlcG9fbmFtZSUwQSUwQW1vZGVsX25hbWUlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtaW1kYi1hY2NlbGVyYXRlJTIyJTBBcmVwb19uYW1lJTIwJTNEJTIwZ2V0X2Z1bGxfcmVwb19uYW1lKG1vZGVsX25hbWUpJTBBcmVwb19uYW1l",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_full_repo_name | |
| model_name = <span class="hljs-string">"distilbert-base-uncased-finetuned-imdb-accelerate"</span> | |
| repo_name = get_full_repo_name(model_name) | |
| repo_name`,wrap:!1}}),x=new h({props:{code:"J2xld3R1biUyRmRpc3RpbGJlcnQtYmFzZS11bmNhc2VkLWZpbmV0dW5lZC1pbWRiLWFjY2VsZXJhdGUn",highlighted:'<span class="hljs-string">'lewtun/distilbert-base-uncased-finetuned-imdb-accelerate'</span>',wrap:!1}}),K=new h({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMFJlcG9zaXRvcnklMEElMEFvdXRwdXRfZGlyJTIwJTNEJTIwbW9kZWxfbmFtZSUwQXJlcG8lMjAlM0QlMjBSZXBvc2l0b3J5KG91dHB1dF9kaXIlMkMlMjBjbG9uZV9mcm9tJTNEcmVwb19uYW1lKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> Repository | |
| output_dir = model_name | |
| repo = Repository(output_dir, clone_from=repo_name)`,wrap:!1}}),O=new h({props:{code:"ZnJvbSUyMHRxZG0uYXV0byUyMGltcG9ydCUyMHRxZG0lMEFpbXBvcnQlMjB0b3JjaCUwQWltcG9ydCUyMG1hdGglMEElMEFwcm9ncmVzc19iYXIlMjAlM0QlMjB0cWRtKHJhbmdlKG51bV90cmFpbmluZ19zdGVwcykpJTBBJTBBZm9yJTIwZXBvY2glMjBpbiUyMHJhbmdlKG51bV90cmFpbl9lcG9jaHMpJTNBJTBBJTIwJTIwJTIwJTIwJTIzJTIwVHJhaW5pbmclMEElMjAlMjAlMjAlMjBtb2RlbC50cmFpbigpJTBBJTIwJTIwJTIwJTIwZm9yJTIwYmF0Y2glMjBpbiUyMHRyYWluX2RhdGFsb2FkZXIlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKipiYXRjaCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwb3V0cHV0cy5sb3NzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbHJfc2NoZWR1bGVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByb2dyZXNzX2Jhci51cGRhdGUoMSklMEElMEElMjAlMjAlMjAlMjAlMjMlMjBFdmFsdWF0aW9uJTBBJTIwJTIwJTIwJTIwbW9kZWwuZXZhbCgpJTBBJTIwJTIwJTIwJTIwbG9zc2VzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwZm9yJTIwc3RlcCUyQyUyMGJhdGNoJTIwaW4lMjBlbnVtZXJhdGUoZXZhbF9kYXRhbG9hZGVyKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHdpdGglMjB0b3JjaC5ub19ncmFkKCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKipiYXRjaCklMEElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwb3V0cHV0cy5sb3NzJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zc2VzLmFwcGVuZChhY2NlbGVyYXRvci5nYXRoZXIobG9zcy5yZXBlYXQoYmF0Y2hfc2l6ZSkpKSUwQSUwQSUyMCUyMCUyMCUyMGxvc3NlcyUyMCUzRCUyMHRvcmNoLmNhdChsb3NzZXMpJTBBJTIwJTIwJTIwJTIwbG9zc2VzJTIwJTNEJTIwbG9zc2VzJTVCJTNBJTIwbGVuKGV2YWxfZGF0YXNldCklNUQlMEElMjAlMjAlMjAlMjB0cnklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwZXJwbGV4aXR5JTIwJTNEJTIwbWF0aC5leHAodG9yY2gubWVhbihsb3NzZXMpKSUwQSUyMCUyMCUyMCUyMGV4Y2VwdCUyME92ZXJmbG93RXJyb3IlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwZXJwbGV4aXR5JTIwJTNEJTIwZmxvYXQoJTIyaW5mJTIyKSUwQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjIlM0UlM0UlM0UlMjBFcG9jaCUyMCU3QmVwb2NoJTdEJTNBJTIwUGVycGxleGl0eSUzQSUyMCU3QnBlcnBsZXhpdHklN0QlMjIpJTBBJTBBJTIwJTIwJTIwJTIwJTIzJTIwU2F2ZSUyMGFuZCUyMHVwbG9hZCUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLndhaXRfZm9yX2V2ZXJ5b25lKCklMEElMjAlMjAlMjAlMjB1bndyYXBwZWRfbW9kZWwlMjAlM0QlMjBhY2NlbGVyYXRvci51bndyYXBfbW9kZWwobW9kZWwpJTBBJTIwJTIwJTIwJTIwdW53cmFwcGVkX21vZGVsLnNhdmVfcHJldHJhaW5lZChvdXRwdXRfZGlyJTJDJTIwc2F2ZV9mdW5jdGlvbiUzRGFjY2VsZXJhdG9yLnNhdmUpJTBBJTIwJTIwJTIwJTIwaWYlMjBhY2NlbGVyYXRvci5pc19tYWluX3Byb2Nlc3MlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b2tlbml6ZXIuc2F2ZV9wcmV0cmFpbmVkKG91dHB1dF9kaXIpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmVwby5wdXNoX3RvX2h1YiglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjb21taXRfbWVzc2FnZSUzRGYlMjJUcmFpbmluZyUyMGluJTIwcHJvZ3Jlc3MlMjBlcG9jaCUyMCU3QmVwb2NoJTdEJTIyJTJDJTIwYmxvY2tpbmclM0RGYWxzZSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCk=",highlighted:`<span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> math | |
| progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps)) | |
| <span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_train_epochs): | |
| <span class="hljs-comment"># Training</span> | |
| model.train() | |
| <span class="hljs-keyword">for</span> batch <span class="hljs-keyword">in</span> train_dataloader: | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| accelerator.backward(loss) | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| progress_bar.update(<span class="hljs-number">1</span>) | |
| <span class="hljs-comment"># Evaluation</span> | |
| model.<span class="hljs-built_in">eval</span>() | |
| losses = [] | |
| <span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(eval_dataloader): | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| losses.append(accelerator.gather(loss.repeat(batch_size))) | |
| losses = torch.cat(losses) | |
| losses = losses[: <span class="hljs-built_in">len</span>(eval_dataset)] | |
| <span class="hljs-keyword">try</span>: | |
| perplexity = math.exp(torch.mean(losses)) | |
| <span class="hljs-keyword">except</span> OverflowError: | |
| perplexity = <span class="hljs-built_in">float</span>(<span class="hljs-string">"inf"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f">>> Epoch <span class="hljs-subst">{epoch}</span>: Perplexity: <span class="hljs-subst">{perplexity}</span>"</span>) | |
| <span class="hljs-comment"># Save and upload</span> | |
| accelerator.wait_for_everyone() | |
| unwrapped_model = accelerator.unwrap_model(model) | |
| unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save) | |
| <span class="hljs-keyword">if</span> accelerator.is_main_process: | |
| tokenizer.save_pretrained(output_dir) | |
| repo.push_to_hub( | |
| commit_message=<span class="hljs-string">f"Training in progress epoch <span class="hljs-subst">{epoch}</span>"</span>, blocking=<span class="hljs-literal">False</span> | |
| )`,wrap:!1}}),ee=new h({props:{code:"RXBvY2glMjAwJTNBJTIwUGVycGxleGl0eSUzQSUyMDExLjM5NzU0NTMwNzkwMDQ3MiUwQUVwb2NoJTIwMSUzQSUyMFBlcnBsZXhpdHklM0ElMjAxMC45MDQ5MDkzMzA5ODMwOTIlMEFFcG9jaCUyMDIlM0ElMjBQZXJwbGV4aXR5JTNBJTIwMTAuNzI5NTAzNTA1MzQwNDA5",highlighted:`<span class="hljs-meta">>>> </span>Epoch <span class="hljs-number">0</span>: Perplexity: <span class="hljs-number">11.397545307900472</span> | |
| <span class="hljs-meta">>>> </span>Epoch <span class="hljs-number">1</span>: Perplexity: <span class="hljs-number">10.904909330983092</span> | |
| <span class="hljs-meta">>>> </span>Epoch <span class="hljs-number">2</span>: Perplexity: <span class="hljs-number">10.729503505340409</span>`,wrap:!1}}),{c(){s=U("p"),s.textContent=o,n=i(),d(u.$$.fragment),g=i(),w=U("p"),w.innerHTML=R,z=i(),_=U("p"),_.innerHTML=v,k=i(),d(B.$$.fragment),b=i(),G=U("p"),G.innerHTML=W,Q=i(),d(C.$$.fragment),X=i(),E=U("p"),E.innerHTML=Z,$=i(),d(fe.$$.fragment),Ge=i(),H=U("p"),H.textContent=Yt,Ze=i(),d(V.$$.fragment),He=i(),ae=U("p"),ae.innerHTML=Ie,Le=i(),d(se.$$.fragment),ne=i(),ie=U("p"),ie.innerHTML=ce,Be=i(),d(L.$$.fragment),ke=i(),D=U("p"),D.textContent=St,Re=i(),d(F.$$.fragment),De=i(),re=U("p"),re.textContent=Ce,qe=i(),d(Me.$$.fragment),Y=i(),d(x.$$.fragment),ue=i(),q=U("p"),q.innerHTML=Et,ve=i(),d(K.$$.fragment),ze=i(),P=U("p"),P.textContent=Ht,Xe=i(),d(O.$$.fragment),$e=i(),d(ee.$$.fragment),Ke=i(),pe=U("p"),pe.textContent=te},l(M){s=T(M,"P",{"data-svelte-h":!0}),j(s)!=="svelte-10xp4ra"&&(s.textContent=o),n=r(M),m(u.$$.fragment,M),g=r(M),w=T(M,"P",{"data-svelte-h":!0}),j(w)!=="svelte-1epdxoj"&&(w.innerHTML=R),z=r(M),_=T(M,"P",{"data-svelte-h":!0}),j(_)!=="svelte-nhehjb"&&(_.innerHTML=v),k=r(M),m(B.$$.fragment,M),b=r(M),G=T(M,"P",{"data-svelte-h":!0}),j(G)!=="svelte-1dvdug7"&&(G.innerHTML=W),Q=r(M),m(C.$$.fragment,M),X=r(M),E=T(M,"P",{"data-svelte-h":!0}),j(E)!=="svelte-1t7rb9m"&&(E.innerHTML=Z),$=r(M),m(fe.$$.fragment,M),Ge=r(M),H=T(M,"P",{"data-svelte-h":!0}),j(H)!=="svelte-12af5vb"&&(H.textContent=Yt),Ze=r(M),m(V.$$.fragment,M),He=r(M),ae=T(M,"P",{"data-svelte-h":!0}),j(ae)!=="svelte-i6gmne"&&(ae.innerHTML=Ie),Le=r(M),m(se.$$.fragment,M),ne=r(M),ie=T(M,"P",{"data-svelte-h":!0}),j(ie)!=="svelte-1fu1rck"&&(ie.innerHTML=ce),Be=r(M),m(L.$$.fragment,M),ke=r(M),D=T(M,"P",{"data-svelte-h":!0}),j(D)!=="svelte-16ytijo"&&(D.textContent=St),Re=r(M),m(F.$$.fragment,M),De=r(M),re=T(M,"P",{"data-svelte-h":!0}),j(re)!=="svelte-dqvp2"&&(re.textContent=Ce),qe=r(M),m(Me.$$.fragment,M),Y=r(M),m(x.$$.fragment,M),ue=r(M),q=T(M,"P",{"data-svelte-h":!0}),j(q)!=="svelte-yj1sgy"&&(q.innerHTML=Et),ve=r(M),m(K.$$.fragment,M),ze=r(M),P=T(M,"P",{"data-svelte-h":!0}),j(P)!=="svelte-96p1ml"&&(P.textContent=Ht),Xe=r(M),m(O.$$.fragment,M),$e=r(M),m(ee.$$.fragment,M),Ke=r(M),pe=T(M,"P",{"data-svelte-h":!0}),j(pe)!=="svelte-1b1diss"&&(pe.textContent=te)},m(M,f){a(M,s,f),a(M,n,f),y(u,M,f),a(M,g,f),a(M,w,f),a(M,z,f),a(M,_,f),a(M,k,f),y(B,M,f),a(M,b,f),a(M,G,f),a(M,Q,f),y(C,M,f),a(M,X,f),a(M,E,f),a(M,$,f),y(fe,M,f),a(M,Ge,f),a(M,H,f),a(M,Ze,f),y(V,M,f),a(M,He,f),a(M,ae,f),a(M,Le,f),y(se,M,f),a(M,ne,f),a(M,ie,f),a(M,Be,f),y(L,M,f),a(M,ke,f),a(M,D,f),a(M,Re,f),y(F,M,f),a(M,De,f),a(M,re,f),a(M,qe,f),y(Me,M,f),a(M,Y,f),y(x,M,f),a(M,ue,f),a(M,q,f),a(M,ve,f),y(K,M,f),a(M,ze,f),a(M,P,f),a(M,Xe,f),y(O,M,f),a(M,$e,f),y(ee,M,f),a(M,Ke,f),a(M,pe,f),S=!0},i(M){S||(p(u.$$.fragment,M),p(B.$$.fragment,M),p(C.$$.fragment,M),p(fe.$$.fragment,M),p(V.$$.fragment,M),p(se.$$.fragment,M),p(L.$$.fragment,M),p(F.$$.fragment,M),p(Me.$$.fragment,M),p(x.$$.fragment,M),p(K.$$.fragment,M),p(O.$$.fragment,M),p(ee.$$.fragment,M),S=!0)},o(M){c(u.$$.fragment,M),c(B.$$.fragment,M),c(C.$$.fragment,M),c(fe.$$.fragment,M),c(V.$$.fragment,M),c(se.$$.fragment,M),c(L.$$.fragment,M),c(F.$$.fragment,M),c(Me.$$.fragment,M),c(x.$$.fragment,M),c(K.$$.fragment,M),c(O.$$.fragment,M),c(ee.$$.fragment,M),S=!1},d(M){M&&(t(s),t(n),t(g),t(w),t(z),t(_),t(k),t(b),t(G),t(Q),t(X),t(E),t($),t(Ge),t(H),t(Ze),t(He),t(ae),t(Le),t(ne),t(ie),t(Be),t(ke),t(D),t(Re),t(De),t(re),t(qe),t(Y),t(ue),t(q),t(ve),t(ze),t(P),t(Xe),t($e),t(Ke),t(pe)),J(u,M),J(B,M),J(C,M),J(fe,M),J(V,M),J(se,M),J(L,M),J(F,M),J(Me,M),J(x,M),J(K,M),J(O,M),J(ee,M)}}}function Wi(I){let s,o='✏️ <strong>Încercați!</strong> Pentru a cuantifica beneficiile adaptării domeniului, faceți fine-tune unui clasificator pe labelurile IMDb atât pentru checkpointurile DistilBERT preantrenate, cât și pentru cele fine-tuned. Dacă aveți nevoie de o recapitulare a clasificării textului, consultați <a href="/course/chapter3">Capitolul 3</a>.';return{c(){s=U("p"),s.innerHTML=o},l(n){s=T(n,"P",{"data-svelte-h":!0}),j(s)!=="svelte-1wijmsj"&&(s.innerHTML=o)},m(n,u){a(n,s,u)},p:Ee,d(n){n&&t(s)}}}function Vi(I){let s,o,n,u,g,w,R,z,_,v,k,B,b="Pentru multe aplicații NLP care implică modele Transformer, puteți lua pur și simplu un model preantrenat de pe Hugging Face Hub și să îl faceți fine-tune direct pe datele voastre pentru sarcina dată. Cu condiția că corpusul utilizat pentru preantrenare să nu fie prea diferit de corpusul utilizat pentru fine-tuning, învățarea prin transfer va produce de obicei rezultate bune.",G,W,Q="Cu toate acestea, există câteva cazuri în care veți dori să faceți fine-tune mai întâi modelelor lingvistice pe datele voastre, înainte de a antrena un head specific sarcinii. De exemplu, dacă datasetul vostru conține contracte juridice sau articole științifice, un model Transformer obișnuit, precum BERT, va trata de obicei cuvintele specifice domeniului din corpus ca pe niște tokeni rari, iar performanța rezultată poate fi mai puțin satisfăcătoare. Prin fine-tuningul modelului lingvistic pe baza datelor din domeniu, puteți crește performanța multor sarcini, ceea ce înseamnă că, de obicei, trebuie să efectuați acest pas o singură dată!",C,X,E='Acest proces de fine-tuning a unui model lingvistic preantrenat pe date din domeniu se numește de obicei <em>adaptare la domeniu</em>. Acesta a fost popularizat în 2018 de <a href="https://arxiv.org/abs/1801.06146" rel="nofollow">ULMFiT</a>, care a fost una dintre primele arhitecturi neuronale (bazate pe LSTM-uri) care a făcut ca învățarea prin transfer să funcționeze cu adevărat pentru NLP. Un exemplu de adaptare la domeniu cu ULMFiT este prezentat în imaginea de mai jos; în această secțiune vom face ceva similar, dar cu un Transformer în loc de un LSTM!',Z,$,fe='<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/ulmfit.svg" alt="ULMFiT."/> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/ulmfit-dark.svg" alt="ULMFiT."/>',Ge,H,Yt='Până la sfârșitul acestei secțiuni, veți avea un <a href="https://huggingface.co/huggingface-course/distilbert-base-uncased-finetuned-imdb?text=This+is+a+great+%5BMASK%5D." rel="nofollow">model de limbaj mascat</a> pe Hub, care poate completa automat propoziții, după cum se poate vedea mai jos:',Ze,V,He,ae,Ie,Le="Hai să începem!",se,ne,ie,ce,Be,L,ke,D,St='Pentru a începe, să alegem un model preantrenat adecvat pentru modelarea limbajului mascat. După cum se vede în următoarea captură de ecran, puteți găsi o listă de candidați prin aplicarea filtrului “Fill-Mask” pe <a href="https://huggingface.co/models?pipeline_tag=fill-mask&sort=downloads" rel="nofollow">Hugging Face Hub</a>:',Re,F,De='<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/mlm-models.png" alt="Hub models." width="80%"/>',re,Ce,qe=`Deși modelele din familia BERT și RoBERTa sunt cele mai descărcate, vom utiliza un model numit <a href="https://huggingface.co/distilbert-base-uncased" rel="nofollow">DistilBERT</a> | |
| care poate fi antrenat mult mai rapid, cu o pierdere mică sau nulă a performanței în aval. Acest model a fost antrenat folosind o tehnică specială numită <a href="https://en.wikipedia.org/wiki/Knowledge_distillation" rel="nofollow"><em>knowledge distillation</em></a>, în care un “model profesor” mare, precum BERT, este folosit pentru a ghida antrenarea unui “model elev” care are mult mai puțini parametrii. O explicație a detaliilor privind distilarea cunoștințelor ne-ar duce prea departe în această secțiune, dar dacă ești interesat, poți citi totul despre aceasta în <a href="https://www.oreilly.com/library/view/natural-language-processing/9781098136789/" rel="nofollow"><em>Natural Language Processing with Transformers</em></a> (cunoscut sub numele colocvial de Transformers textbooks).`,Me,Y,x,ue,q,Et="Cu aproximativ 67 de milioane de parametri, DistilBERT este de aproximativ două ori mai mic decât modelul de bază BERT, ceea ce se traduce aproximativ printr-o creștere de două ori a vitezei de antrenare - super! Să vedem acum ce tipuri de tokeni prezice acest model ca fiind cele mai probabile completări ale unui mic sample de text:",ve,K,ze,P,Ht='Ca oameni, ne putem imagina multe posibilități pentru tokenul <code>[MASK]</code>, cum ar fi “day”, “ride” sau “painting”. Pentru modelele preantrenate, predicțiile depind de corpusul pe care modelul a fost antrenat, deoarece acesta învață să detecteze tiparele statistice prezente în date. La fel ca BERT, DistilBERT a fost preantrenat pe dataseturile <a href="https://huggingface.co/datasets/wikipedia" rel="nofollow">English Wikipedia</a> și <a href="https://huggingface.co/datasets/bookcorpus" rel="nofollow">BookCorpus</a>, astfel încât ne așteptăm ca predicțiile pentru <code>[MASK]</code> să reflecte aceste domenii. Pentru a prezice masca, avem nevoie de tokenizerul DistilBERT pentru a produce inputurile pentru model, deci hai să-l descărcăm și pe acesta din Hub:',Xe,O,$e,ee,Ke="Cu un tokenizer și un model, putem acum să transmitem exemplul nostru de text modelului, să extragem logiturile și să tipărim primii 5 candidați:",pe,te,S,M,f,Dt,Pe,Ys="Putem vedea din rezultate că predicțiile modelului se referă la termeni din viața de zi cu zi, ceea ce poate că nu este surprinzător având în vedere fundamentul Wikipedia în limba engleză. Să vedem cum putem schimba acest domeniu în ceva puțin mai nișat - recenzii de filme foarte polarizate!",qt,Oe,Kt,el,Ss='Pentru a prezenta adaptarea la domeniu, vom utiliza faimosul [Large Movie Review Dataset] (<a href="https://huggingface.co/datasets/imdb" rel="nofollow">https://huggingface.co/datasets/imdb</a>) (sau IMDb pe scurt), care este un corpus de recenzii de filme care este adesea utilizat pentru a evalua modelele de analiză a sentimentelor. Prin fine-tuningul aplicat asupra DistilBERT pe acest corpus, ne așteptăm ca modelul de limbaj să își adapteze vocabularul de la datele factuale din Wikipedia pe care a fost antrenat în prealabil la elementele mai subiective ale recenziilor de film. Putem obține datele din Hugging Face Hub cu funcția <code>load_dataset()</code> din 🤗 Datasets:',Pt,ll,Ot,tl,ea,al,Es="Putem vedea că segmentele <code>train</code> și <code>test</code> conțin fiecare 25.000 de recenzii, în timp ce există un segment fără label numită <code>unsupervised</code> care conține 50.000 de recenzii. Să aruncăm o privire la câteva sampleuri pentru a ne face o idee despre tipul de text cu care avem de-a face. Așa cum am făcut în capitolele anterioare ale cursului, vom combina funcțiile <code>Dataset.shuffle()</code> și <code>Dataset.select()</code> pentru a crea un sample aleatoriu:",la,sl,ta,nl,aa,il,Hs="Da, acestea sunt cu siguranță recenzii de film și, dacă sunteți suficient de bătrâni, ați putea chiar înțelege comentariul din ultima recenzie despre deținerea unei versiuni VHS 😜! Deși nu vom avea nevoie de labeluri pentru modelarea limbajului, putem vedea deja că un <code>0</code> denotă o recenzie negativă, în timp ce un <code>1</code> corespunde uneia pozitive.",sa,We,na,rl,Ls='Acum că am aruncat o privire rapidă asupra datelor, să ne apucăm să le pregătim pentru modelarea limbajului mascat. După cum vom vedea, există câteva etape suplimentare pe care trebuie să le parcurgem în comparație cu sarcinile de clasificare a secvențelor pe care le-am văzut în <a href="/course/chapter3">Capitolul 3</a>. Să începem!',ia,Ml,ra,pl,Ma,cl,Ds="Atât pentru auto-regressive cât și pentru masked language modeling, un pas comun de preprocesare este concatenarea tuturor exemplelor și apoi împărțirea întregului corpus în bucăți de dimensiuni egale. Acest lucru este destul de diferit de abordarea noastră obișnuită, în care pur și simplu tokenizăm exemplele individuale. De ce să concatenăm totul împreună? Motivul este că exemplele individuale ar putea fi trunchiate dacă sunt prea lungi, ceea ce ar duce la pierderea de informații care ar putea fi utile pentru sarcina de modelare a limbajului!",pa,ul,qs='Deci, pentru a începe, vom tokeniza mai întâi corpusul nostru ca de obicei, dar <em>fără</em> a seta opțiunea <code>truncation=True</code> în tokenizerul nostru. De asemenea, vom prelua ID-urile cuvintelor dacă acestea sunt disponibile (ceea ce va fi cazul dacă folosim un tokenizer rapid, așa cum este descris în <a href="/course/chapter6/3">Capitolul 6</a>), deoarece vom avea nevoie de ele mai târziu pentru a face mascarea întregului cuvânt. Vom include acest lucru într-o funcție simplă și, în același timp, vom elimina coloanele <code>text</code> și <code>label</code>, deoarece nu mai avem nevoie de ele:',ca,ol,ua,dl,oa,ml,Ks="Deoarece DistilBERT este un model de tip BERT, putem vedea că textele codate constau din <code>input_ids</code> și <code>attention_mask</code> pe care le-am văzut în alte capitole, precum și din <code>word_ids</code> pe care le-am adăugat.",da,yl,Ps="Acum că am tokenizat recenziile de filme, următorul pas este să le grupăm pe toate și să împărțim rezultatul în chunkuri. Dar cât de mari ar trebui să fie aceste chunkuri? Acest lucru va fi determinat în cele din urmă de cantitatea de memorie GPU pe care o aveți disponibilă, dar un bun punct de plecare este să vedeți care este dimensiunea maximă a contextului modelului. Aceasta poate fi dedusă prin inspectarea atributului <code>model_max_length</code> al tokenizerului:",ma,Jl,ya,Ul,Ja,Tl,Os="Această valoare este derivată din fișierul <em>tokenizer_config.json</em> asociat cu un checkpoint; în acest caz putem vedea că dimensiunea contextului este de 512 tokeni, la fel ca în cazul BERT.",Ua,Ve,Ta,wl,en="Prin urmare, pentru a derula experimentele pe GPU-uri precum cele de pe Google Colab, vom alege ceva mai mic care să încapă în memorie:",wa,jl,ja,xe,ba,bl,ln="Acum vine partea distractivă. Pentru a arăta cum funcționează concatenarea, să luăm câteva recenzii din setul nostru de antrenare tokenizat și să imprimăm numărul de tokeni per recenzie:",ha,hl,fa,fl,Ia,Il,tn="Putem apoi concatena toate aceste exemple cu un dictionary comprehension, după cum urmează:",Ca,Cl,_a,_l,ga,gl,an="Minunat, lungimea totală se verifică - așa că acum să împărțim recenziile concatenate în chunkuri de dimensiunea dată de <code>chunk_size</code>. Pentru a face acest lucru, iterăm peste caracteristicile din <code>concatenated_examples</code> și folosim un list comprehension pentru a crea slice-uri ale fiecărei caracteristici. Rezultatul este un dicționar de chunkuri pentru fiecare caracteristică:",Ga,Gl,Za,Zl,Ba,Bl,sn="După cum puteți vedea în acest exemplu, ultimul fragment va fi în general mai mic decât dimensiunea maximă a fragmentului. Există două strategii principale pentru a face față acestei situații:",ka,kl,nn="<li>Aruncați ultimul chunk dacă este mai mic decât <code>chunk_size</code>.</li> <li>Faceți padding ultimului chunk până când lungimea sa este egală cu <code>chunk_size</code>.</li>",Ra,Rl,rn="Vom adopta prima abordare aici, așa că hai să încorporăm toată logica de mai sus într-o singură funcție pe care o putem aplica dataseturilor tokenizate:",va,vl,za,zl,Mn="Observați că în ultimul pas al <code>group_texts()</code> creăm o nouă coloană <code>labels</code> care este o copie a coloanei <code>input_ids</code>. După cum vom vedea în curând, acest lucru se datorează faptului că în modelarea limbajului mascat obiectivul este de a prezice tokeni mascați aleatoriu în input batch, iar prin crearea unei coloane <code>labels</code> furnizăm adevărul de bază din care modelul nostru de limbaj poate să învețe.",Xa,Xl,pn="Să aplicăm acum funcția <code>group_texts()</code> dataseturilor tokenizate folosind funcția noastră de încredere <code>Dataset.map()</code>:",$a,$l,Wa,Wl,Va,Vl,cn="Puteți vedea că gruparea și apoi fragmentarea textelor a produs mult mai multe exemple decât cele 25.000 inițiale pentru spliturile <code>train</code> și <code>test</code>. Acest lucru se datorează faptului că acum avem exemple care implică <em>contigous tokens</em> care se întind pe mai multe exemple din corpusul original. Puteți vedea acest lucru în mod explicit căutând tokenii speciali <code>[SEP]</code> și <code>[CLS]</code> într-unul dintre chunkuri:",xa,xl,Aa,Al,Na,Nl,un="În acest exemplu puteți vedea două recenzii de film care se suprapun, una despre un film de liceu și cealaltă despre persoanele fără adăpost. Să verificăm, de asemenea, cum arată labelurile pentru modelarea limbajului mascat:",Qa,Ql,Fa,Fl,Ya,Yl,on="Așa cum era de așteptat de la funcția noastră <code>group_texts()</code> de mai sus, acest lucru pare identic cu <code>input_ids</code> decodificat - dar atunci cum poate modelul nostru să învețe ceva? Ne lipsește un pas cheie: inserarea tokenilor <code>[MASK]</code> în poziții aleatorii în inputuri! Să vedem cum putem face acest lucru din mers, în timpul fine-tuningului, folosind un data collator special.",Sa,Sl,Ea,El,dn='Fine-tuningul unui model lingvistic mascat este aproape identic cu fine-tuningul a unui model de clasificare a secvențelor, așa cum am făcut în <a href="/course/chapter3">Capitolul 3</a>. Singura diferență este că avem nevoie de un data collator care poate masca aleatoriu o parte dintre tokeni din fiecare batch de texte. Din fericire, 🤗 Transformers vine pregătit cu un <code>DataCollatorForLanguageModeling</code> dedicat tocmai pentru această sarcină. Trebuie doar să îi transmitem tokenizerul și un argument <code>mlm_probability</code> care specifică ce fracțiune din tokeni trebuie mascată. Vom alege 15%, care este cantitatea utilizată pentru BERT și o alegere comună în literatura de specialitate:',Ha,Hl,La,Ll,mn="Pentru a vedea cum funcționează mascarea aleatorie, să introducem câteva exemple în data collator. Deoarece se așteaptă la o listă de “dict”-uri, în care fiecare “dict” reprezintă un singur chunk de text continuu, mai întâi iterăm peste dataset înainte de a trimite batchul către data collator. Eliminăm cheia <code>"word_ids"</code> pentru acest data collator, deoarece acesta nu o așteaptă:",Da,Dl,qa,ql,Ka,Kl,yn="Frumos, a funcționat! Putem vedea că tokenul <code>[MASK]</code> a fost inserat aleatoriu în diferite locuri din textul nostru. Acestea vor fi tokenii pe care modelul nostru va trebui să le prezică în timpul antrenamentului - iar frumusețea data collatorului este că va introduce aleatoriu tokenul <code>[MASK]</code> cu fiecare batch!",Pa,Ae,Oa,$t,Pl,Jn="La antrenarea modelelor pentru modelarea limbajului mascat, o tehnică care poate fi utilizată este mascarea cuvintelor întregi împreună, nu doar a tokenilor individuali. Această abordare se numește <em>whole word masking</em>. Dacă dorim să utilizăm mascarea întregului cuvânt, va trebui să construim noi înșine un data collator. Un data collator este doar o funcție care preia o listă de sampleuri și le convertește într-un batch, așa că hai să facem asta acum! Vom utiliza ID-urile cuvintelor calculate mai devreme pentru a realiza o hartă între indicii cuvintelor și tokenii corespunzători, apoi vom decide aleatoriu ce cuvinte să mascăm și vom aplica masca respectivă asupra inputurilor. Rețineți că labelurile sunt toate <code>-100</code>, cu excepția celor care corespund cuvintelor mascate.",es,oe,de,Wt,Ol,Un="În continuare, îl putem încerca pe aceleași sampleuri ca înainte:",ls,et,ts,lt,as,Ne,ss,tt,Tn='Acum, că avem două data collators, restul pașilor fine-tuning sunt standard. Pregătirea poate dura ceva timp pe Google Colab dacă nu sunteți suficient de norocos să obțineți un GPU P100 mitic 😭, așa că vom reduce mai întâi dimensiunea setului de antrenare la câteva mii de exemple. Nu vă faceți griji, vom obține în continuare un model lingvistic destul de decent! O modalitate rapidă de a reduce sampleurile unui dataset în 🤗 Datasets este prin intermediul funcției <code>Dataset.train_test_split()</code> pe care am văzut-o în <a href="/course/chapter5">Capitolul 5</a>:',ns,at,is,st,rs,nt,wn="Acest lucru a creat în mod automat noi splituri de <code>train</code> și <code>test</code>, cu dimensiunea setului de antrenare setată la 10.000 de exemple și a setului de validare la 10% din aceasta - nu ezitați să măriți această valoare dacă aveți un GPU puternic! Următorul lucru pe care trebuie să îl facem este să ne conectăm la Hugging Face Hub. Dacă executați acest cod într-un notebook, puteți face acest lucru cu următoarea funcție utilitară:",Ms,it,ps,rt,jn="care va afișa un widget în care vă puteți introduce credențialele. Alternativ, puteți rula:",cs,Mt,us,pt,bn="in your favorite terminal and log in there.",os,me,ye,Vt,ct,ds,ut,ms,ot,hn="Spre deosebire de alte sarcini, cum ar fi clasificarea textului sau răspunderea la întrebări, unde ni se oferă un corpus labeled pe care să antrenăm, cu modelarea limbajului nu avem labeluri explicite. Așadar, cum determinăm ce face un model lingvistic bun? La fel ca în cazul funcției de autocorectare din telefon, un model lingvistic bun este unul care atribuie probabilități ridicate propozițiilor corecte din punct de vedere gramatical și probabilități scăzute propozițiilor fără sens. Pentru a vă face o idee mai bună despre cum arată acest lucru, puteți găsi online seturi întregi de “autocorrect fails”, în care modelul din telefonul unei persoane a produs niște completări destul de amuzante (și adesea nepotrivite)!",ys,Je,Ue,xt,dt,Js,mt,fn="Un scor de perplexitate mai mic înseamnă un model lingvistic mai bun, iar aici putem vedea că modelul nostru inițial are o valoare oarecum mare. Să vedem dacă o putem reduce prin fine-tuning! Pentru a face acest lucru, vom rula mai întâi bucla de antrenare:",Us,Te,we,At,yt,In="și apoi calculați perplexitatea rezultată pe setul de testare ca înainte:",Ts,je,be,Nt,Jt,ws,Ut,Cn="Grozav - aceasta este o reducere destul de mare a perplexității, ceea ce ne spune că modelul a învățat ceva despre domeniul recenziilor de filme!",js,Qt,Qe,bs,Ft,Tt,hs,wt,_n="Puteți interacționa cu modelul vostru fine-tuned utilizând widgetul său de pe Hub, fie local cu <code>pipeline</code> din 🤗 Transformers. Să folosim aceasta din urmă pentru a descărca modelul nostru folosind pipelineuul <code>fill-mask</code>:",fs,jt,Is,bt,gn="Putem apoi să alimentăm pipelineul cu exemplul nostru de text “This is a hrea [MASK]” și să vedem care sunt primele 5 predicții:",Cs,ht,_s,ft,gs,It,Gn="Frumos - modelul nostru și-a adaptat în mod clar weighturile pentru a prezice cuvintele care sunt asociate mai puternic cu filmele!",Gs,Ct,Zs,_t,Zn='Acest lucru încheie primul nostru experiment de antrenare a unui model lingvistic. În <a href="/course/ro/chapter7/6">secțiunea 6</a> veți învăța cum să antrenați de la zero un model auto-regressive precum GPT-2; mergeți acolo dacă doriți să vedeți cum vă puteți preantrena propriul model Transformer!',Bs,Fe,ks,gt,Rs,Lt,vs;g=new oi({props:{fw:I[0]}}),R=new Se({props:{title:"Fine-tuningul la un masked language model",local:"fine-tuning-a-masked-language-model",headingTag:"h1"}});const Bn=[yi,mi],Gt=[];function kn(e,l){return e[0]==="pt"?0:1}_=kn(I),v=Gt[_]=Bn[_](I),ne=new zs({props:{id:"mqElG5QJWUg"}}),ce=new Ye({props:{$$slots:{default:[Ji]},$$scope:{ctx:I}}}),L=new Se({props:{title:"Alegerea unui model preantrenat pentru masked language modeling",local:"picking-a-pretrained-model-for-masked-language-modeling",headingTag:"h2"}});const Rn=[Ti,Ui],Zt=[];function vn(e,l){return e[0]==="pt"?0:1}Y=vn(I),x=Zt[Y]=Rn[Y](I),K=new h({props:{code:"dGV4dCUyMCUzRCUyMCUyMlRoaXMlMjBpcyUyMGElMjBncmVhdCUyMCU1Qk1BU0slNUQuJTIy",highlighted:'text = <span class="hljs-string">"This is a great [MASK]."</span>',wrap:!1}}),O=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChtb2RlbF9jaGVja3BvaW50KQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)`,wrap:!1}});const zn=[ji,wi],Bt=[];function Xn(e,l){return e[0]==="pt"?0:1}te=Xn(I),S=Bt[te]=zn[te](I),f=new h({props:{code:"JyUzRSUzRSUzRSUyMFRoaXMlMjBpcyUyMGElMjBncmVhdCUyMGRlYWwuJyUwQSclM0UlM0UlM0UlMjBUaGlzJTIwaXMlMjBhJTIwZ3JlYXQlMjBzdWNjZXNzLiclMEEnJTNFJTNFJTNFJTIwVGhpcyUyMGlzJTIwYSUyMGdyZWF0JTIwYWR2ZW50dXJlLiclMEEnJTNFJTNFJTNFJTIwVGhpcyUyMGlzJTIwYSUyMGdyZWF0JTIwaWRlYS4nJTBBJyUzRSUzRSUzRSUyMFRoaXMlMjBpcyUyMGElMjBncmVhdCUyMGZlYXQuJw==",highlighted:`<span class="hljs-string">'>>> This is a great deal.'</span> | |
| <span class="hljs-string">'>>> This is a great success.'</span> | |
| <span class="hljs-string">'>>> This is a great adventure.'</span> | |
| <span class="hljs-string">'>>> This is a great idea.'</span> | |
| <span class="hljs-string">'>>> This is a great feat.'</span>`,wrap:!1}}),Oe=new Se({props:{title:"Datasetul",local:"the-dataset",headingTag:"h2"}}),ll=new h({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBaW1kYl9kYXRhc2V0JTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmltZGIlMjIpJTBBaW1kYl9kYXRhc2V0",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| imdb_dataset = load_dataset(<span class="hljs-string">"imdb"</span>) | |
| imdb_dataset`,wrap:!1}}),tl=new h({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ3RleHQnJTJDJTIwJ2xhYmVsJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjUwMDAlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTIwJTIwJTIwJTIwdGVzdCUzQSUyMERhdGFzZXQoJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZmVhdHVyZXMlM0ElMjAlNUIndGV4dCclMkMlMjAnbGFiZWwnJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbnVtX3Jvd3MlM0ElMjAyNTAwMCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB1bnN1cGVydmlzZWQlM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ3RleHQnJTJDJTIwJ2xhYmVsJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNTAwMDAlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'text'</span>, <span class="hljs-string">'label'</span>], | |
| num_rows: <span class="hljs-number">25000</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'text'</span>, <span class="hljs-string">'label'</span>], | |
| num_rows: <span class="hljs-number">25000</span> | |
| }) | |
| unsupervised: Dataset({ | |
| features: [<span class="hljs-string">'text'</span>, <span class="hljs-string">'label'</span>], | |
| num_rows: <span class="hljs-number">50000</span> | |
| }) | |
| })`,wrap:!1}}),sl=new h({props:{code:"c2FtcGxlJTIwJTNEJTIwaW1kYl9kYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQuc2h1ZmZsZShzZWVkJTNENDIpLnNlbGVjdChyYW5nZSgzKSklMEElMEFmb3IlMjByb3clMjBpbiUyMHNhbXBsZSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjIlNUNuJyUzRSUzRSUzRSUyMFJldmlldyUzQSUyMCU3QnJvdyU1Qid0ZXh0JyU1RCU3RCclMjIpJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiclM0UlM0UlM0UlMjBMYWJlbCUzQSUyMCU3QnJvdyU1QidsYWJlbCclNUQlN0QnJTIyKQ==",highlighted:`sample = imdb_dataset[<span class="hljs-string">"train"</span>].shuffle(seed=<span class="hljs-number">42</span>).select(<span class="hljs-built_in">range</span>(<span class="hljs-number">3</span>)) | |
| <span class="hljs-keyword">for</span> row <span class="hljs-keyword">in</span> sample: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"\\n'>>> Review: <span class="hljs-subst">{row[<span class="hljs-string">'text'</span>]}</span>'"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> Label: <span class="hljs-subst">{row[<span class="hljs-string">'label'</span>]}</span>'"</span>)`,wrap:!1}}),nl=new h({props:{code:"JTBBJyUzRSUzRSUzRSUyMFJldmlldyUzQSUyMFRoaXMlMjBpcyUyMHlvdXIlMjB0eXBpY2FsJTIwUHJpeWFkYXJzaGFuJTIwbW92aWUtLWElMjBidW5jaCUyMG9mJTIwbG9vbnklMjBjaGFyYWN0ZXJzJTIwb3V0JTIwb24lMjBzb21lJTIwc2lsbHklMjBtaXNzaW9uLiUyMEhpcyUyMHNpZ25hdHVyZSUyMGNsaW1heCUyMGhhcyUyMHRoZSUyMGVudGlyZSUyMGNhc3QlMjBvZiUyMHRoZSUyMGZpbG0lMjBjb21pbmclMjB0b2dldGhlciUyMGFuZCUyMGZpZ2h0aW5nJTIwZWFjaCUyMG90aGVyJTIwaW4lMjBzb21lJTIwY3JhenklMjBtb3NocGl0JTIwb3ZlciUyMGhpZGRlbiUyMG1vbmV5LiUyMFdoZXRoZXIlMjBpdCUyMGlzJTIwYSUyMHdpbm5pbmclMjBsb3R0ZXJ5JTIwdGlja2V0JTIwaW4lMjBNYWxhbWFhbCUyMFdlZWtseSUyQyUyMGJsYWNrJTIwbW9uZXklMjBpbiUyMEhlcmElMjBQaGVyaSUyQyUyMCUyMmtvZG9rb28lMjIlMjBpbiUyMFBoaXIlMjBIZXJhJTIwUGhlcmklMkMlMjBldGMuJTJDJTIwZXRjLiUyQyUyMHRoZSUyMGRpcmVjdG9yJTIwaXMlMjBiZWNvbWluZyUyMHJpZGljdWxvdXNseSUyMHByZWRpY3RhYmxlLiUyMERvbiU1Qyd0JTIwZ2V0JTIwbWUlMjB3cm9uZyUzQiUyMGFzJTIwY2xpY2glQzMlQTlkJTIwYW5kJTIwcHJlcG9zdGVyb3VzJTIwaGlzJTIwbW92aWVzJTIwbWF5JTIwYmUlMkMlMjBJJTIwdXN1YWxseSUyMGVuZCUyMHVwJTIwZW5qb3lpbmclMjB0aGUlMjBjb21lZHkuJTIwSG93ZXZlciUyQyUyMGluJTIwbW9zdCUyMGhpcyUyMHByZXZpb3VzJTIwbW92aWVzJTIwdGhlcmUlMjBoYXMlMjBhY3R1YWxseSUyMGJlZW4lMjBzb21lJTIwZ29vZCUyMGh1bW9yJTJDJTIwKEh1bmdhbWElMjBhbmQlMjBIZXJhJTIwUGhlcmklMjBiZWluZyUyMG5vdGV3b3J0aHklMjBvbmVzKS4lMjBOb3clMkMlMjB0aGUlMjBoaWxhcml0eSUyMG9mJTIwaGlzJTIwZmlsbXMlMjBpcyUyMGZhZGluZyUyMGFzJTIwaGUlMjBpcyUyMHVzaW5nJTIwdGhlJTIwc2FtZSUyMGZvcm11bGElMjBvdmVyJTIwYW5kJTIwb3ZlciUyMGFnYWluLiUzQ2JyJTIwJTJGJTNFJTNDYnIlMjAlMkYlM0VTb25ncyUyMGFyZSUyMGdvb2QuJTIwVGFudXNocmVlJTIwRGF0dGElMjBsb29rcyUyMGF3ZXNvbWUuJTIwUmFqcGFsJTIwWWFkYXYlMjBpcyUyMGlycml0YXRpbmclMkMlMjBhbmQlMjBUdXNzaGFyJTIwaXMlMjBub3QlMjBhJTIwd2hvbGUlMjBsb3QlMjBiZXR0ZXIuJTIwS3VuYWwlMjBLaGVtdSUyMGlzJTIwT0slMkMlMjBhbmQlMjBTaGFybWFuJTIwSm9zaGklMjBpcyUyMHRoZSUyMGJlc3QuJyUwQSclM0UlM0UlM0UlMjBMYWJlbCUzQSUyMDAnJTBBJTBBJyUzRSUzRSUzRSUyMFJldmlldyUzQSUyME9rYXklMkMlMjB0aGUlMjBzdG9yeSUyMG1ha2VzJTIwbm8lMjBzZW5zZSUyQyUyMHRoZSUyMGNoYXJhY3RlcnMlMjBsYWNrJTIwYW55JTIwZGltZW5zaW9uYWxseSUyQyUyMHRoZSUyMGJlc3QlMjBkaWFsb2d1ZSUyMGlzJTIwYWQtbGlicyUyMGFib3V0JTIwdGhlJTIwbG93JTIwcXVhbGl0eSUyMG9mJTIwbW92aWUlMkMlMjB0aGUlMjBjaW5lbWF0b2dyYXBoeSUyMGlzJTIwZGlzbWFsJTJDJTIwYW5kJTIwb25seSUyMGVkaXRpbmclMjBzYXZlcyUyMGElMjBiaXQlMjBvZiUyMHRoZSUyMG11ZGRsZSUyQyUyMGJ1dCUyMFNhbSUyMiUyMFBlY2tpbnBhaCUyMGRpcmVjdGVkJTIwdGhlJTIwZmlsbS4lMjBTb21laG93JTJDJTIwaGlzJTIwZGlyZWN0aW9uJTIwaXMlMjBub3QlMjBlbm91Z2guJTIwRm9yJTIwdGhvc2UlMjB3aG8lMjBhcHByZWNpYXRlJTIwUGVja2lucGFoJTIwYW5kJTIwaGlzJTIwZ3JlYXQlMjB3b3JrJTJDJTIwdGhpcyUyMG1vdmllJTIwaXMlMjBhJTIwZGlzYXBwb2ludG1lbnQuJTIwRXZlbiUyMGElMjBncmVhdCUyMGNhc3QlMjBjYW5ub3QlMjByZWRlZW0lMjB0aGUlMjB0aW1lJTIwdGhlJTIwdmlld2VyJTIwd2FzdGVzJTIwd2l0aCUyMHRoaXMlMjBtaW5pbWFsJTIwZWZmb3J0LiUzQ2JyJTIwJTJGJTNFJTNDYnIlMjAlMkYlM0VUaGUlMjBwcm9wZXIlMjByZXNwb25zZSUyMHRvJTIwdGhlJTIwbW92aWUlMjBpcyUyMHRoZSUyMGNvbnRlbXB0JTIwdGhhdCUyMHRoZSUyMGRpcmVjdG9yJTIwU2FuJTIwUGVja2lucGFoJTJDJTIwSmFtZXMlMjBDYWFuJTJDJTIwUm9iZXJ0JTIwRHV2YWxsJTJDJTIwQnVydCUyMFlvdW5nJTJDJTIwQm8lMjBIb3BraW5zJTJDJTIwQXJ0aHVyJTIwSGlsbCUyQyUyMGFuZCUyMGV2ZW4lMjBHaWclMjBZb3VuZyUyMGJyaW5nJTIwdG8lMjB0aGVpciUyMHdvcmsuJTIwV2F0Y2glMjB0aGUlMjBncmVhdCUyMFBlY2tpbnBhaCUyMGZpbG1zLiUyMFNraXAlMjB0aGlzJTIwbWVzcy4nJTBBJyUzRSUzRSUzRSUyMExhYmVsJTNBJTIwMCclMEElMEEnJTNFJTNFJTNFJTIwUmV2aWV3JTNBJTIwSSUyMHNhdyUyMHRoaXMlMjBtb3ZpZSUyMGF0JTIwdGhlJTIwdGhlYXRlcnMlMjB3aGVuJTIwSSUyMHdhcyUyMGFib3V0JTIwNiUyMG9yJTIwNyUyMHllYXJzJTIwb2xkLiUyMEklMjBsb3ZlZCUyMGl0JTIwdGhlbiUyQyUyMGFuZCUyMGhhdmUlMjByZWNlbnRseSUyMGNvbWUlMjB0byUyMG93biUyMGElMjBWSFMlMjB2ZXJzaW9uLiUyMCUzQ2JyJTIwJTJGJTNFJTNDYnIlMjAlMkYlM0VNeSUyMDQlMjBhbmQlMjA2JTIweWVhciUyMG9sZCUyMGNoaWxkcmVuJTIwbG92ZSUyMHRoaXMlMjBtb3ZpZSUyMGFuZCUyMGhhdmUlMjBiZWVuJTIwYXNraW5nJTIwYWdhaW4lMjBhbmQlMjBhZ2FpbiUyMHRvJTIwd2F0Y2glMjBpdC4lMjAlM0NiciUyMCUyRiUzRSUzQ2JyJTIwJTJGJTNFSSUyMGhhdmUlMjBlbmpveWVkJTIwd2F0Y2hpbmclMjBpdCUyMGFnYWluJTIwdG9vLiUyMFRob3VnaCUyMEklMjBoYXZlJTIwdG8lMjBhZG1pdCUyMGl0JTIwaXMlMjBub3QlMjBhcyUyMGdvb2QlMjBvbiUyMGElMjBsaXR0bGUlMjBUVi4lM0NiciUyMCUyRiUzRSUzQ2JyJTIwJTJGJTNFSSUyMGRvJTIwbm90JTIwaGF2ZSUyMG9sZGVyJTIwY2hpbGRyZW4lMjBzbyUyMEklMjBkbyUyMG5vdCUyMGtub3clMjB3aGF0JTIwdGhleSUyMHdvdWxkJTIwdGhpbmslMjBvZiUyMGl0LiUyMCUzQ2JyJTIwJTJGJTNFJTNDYnIlMjAlMkYlM0VUaGUlMjBzb25ncyUyMGFyZSUyMHZlcnklMjBjdXRlLiUyME15JTIwZGF1Z2h0ZXIlMjBrZWVwcyUyMHNpbmdpbmclMjB0aGVtJTIwb3ZlciUyMGFuZCUyMG92ZXIuJTNDYnIlMjAlMkYlM0UlM0NiciUyMCUyRiUzRUhvcGUlMjB0aGlzJTIwaGVscHMuJyUwQSclM0UlM0UlM0UlMjBMYWJlbCUzQSUyMDEn",highlighted:` | |
| <span class="hljs-string">'>>> Review: This is your typical Priyadarshan movie--a bunch of loony characters out on some silly mission. His signature climax has the entire cast of the film coming together and fighting each other in some crazy moshpit over hidden money. Whether it is a winning lottery ticket in Malamaal Weekly, black money in Hera Pheri, "kodokoo" in Phir Hera Pheri, etc., etc., the director is becoming ridiculously predictable. Don\\'t get me wrong; as clichéd and preposterous his movies may be, I usually end up enjoying the comedy. However, in most his previous movies there has actually been some good humor, (Hungama and Hera Pheri being noteworthy ones). Now, the hilarity of his films is fading as he is using the same formula over and over again.<br /><br />Songs are good. Tanushree Datta looks awesome. Rajpal Yadav is irritating, and Tusshar is not a whole lot better. Kunal Khemu is OK, and Sharman Joshi is the best.'</span> | |
| <span class="hljs-string">'>>> Label: 0'</span> | |
| <span class="hljs-string">'>>> Review: Okay, the story makes no sense, the characters lack any dimensionally, the best dialogue is ad-libs about the low quality of movie, the cinematography is dismal, and only editing saves a bit of the muddle, but Sam" Peckinpah directed the film. Somehow, his direction is not enough. For those who appreciate Peckinpah and his great work, this movie is a disappointment. Even a great cast cannot redeem the time the viewer wastes with this minimal effort.<br /><br />The proper response to the movie is the contempt that the director San Peckinpah, James Caan, Robert Duvall, Burt Young, Bo Hopkins, Arthur Hill, and even Gig Young bring to their work. Watch the great Peckinpah films. Skip this mess.'</span> | |
| <span class="hljs-string">'>>> Label: 0'</span> | |
| <span class="hljs-string">'>>> Review: I saw this movie at the theaters when I was about 6 or 7 years old. I loved it then, and have recently come to own a VHS version. <br /><br />My 4 and 6 year old children love this movie and have been asking again and again to watch it. <br /><br />I have enjoyed watching it again too. Though I have to admit it is not as good on a little TV.<br /><br />I do not have older children so I do not know what they would think of it. <br /><br />The songs are very cute. My daughter keeps singing them over and over.<br /><br />Hope this helps.'</span> | |
| <span class="hljs-string">'>>> Label: 1'</span>`,wrap:!1}}),We=new Ye({props:{$$slots:{default:[bi]},$$scope:{ctx:I}}}),Ml=new Se({props:{title:"Preprocesarea datelor",local:"preprocessing-the-data",headingTag:"h2"}}),pl=new zs({props:{id:"8PmhEIXhBvI"}}),ol=new h({props:{code:"ZGVmJTIwdG9rZW5pemVfZnVuY3Rpb24oZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwcmVzdWx0JTIwJTNEJTIwdG9rZW5pemVyKGV4YW1wbGVzJTVCJTIydGV4dCUyMiU1RCklMEElMjAlMjAlMjAlMjBpZiUyMHRva2VuaXplci5pc19mYXN0JTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmVzdWx0JTVCJTIyd29yZF9pZHMlMjIlNUQlMjAlM0QlMjAlNUJyZXN1bHQud29yZF9pZHMoaSklMjBmb3IlMjBpJTIwaW4lMjByYW5nZShsZW4ocmVzdWx0JTVCJTIyaW5wdXRfaWRzJTIyJTVEKSklNUQlMEElMjAlMjAlMjAlMjByZXR1cm4lMjByZXN1bHQlMEElMEElMEElMjMlMjBVdGlsaXphJUM4JTlCaSUyMGJhdGNoZWQlM0RUcnVlJTIwcGVudHJ1JTIwYSUyMGFjdGl2YSUyMG11bHRpdGhyZWFkaW5ndWwhJTBBdG9rZW5pemVkX2RhdGFzZXRzJTIwJTNEJTIwaW1kYl9kYXRhc2V0Lm1hcCglMEElMjAlMjAlMjAlMjB0b2tlbml6ZV9mdW5jdGlvbiUyQyUyMGJhdGNoZWQlM0RUcnVlJTJDJTIwcmVtb3ZlX2NvbHVtbnMlM0QlNUIlMjJ0ZXh0JTIyJTJDJTIwJTIybGFiZWwlMjIlNUQlMEEpJTBBdG9rZW5pemVkX2RhdGFzZXRz",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_function</span>(<span class="hljs-params">examples</span>): | |
| result = tokenizer(examples[<span class="hljs-string">"text"</span>]) | |
| <span class="hljs-keyword">if</span> tokenizer.is_fast: | |
| result[<span class="hljs-string">"word_ids"</span>] = [result.word_ids(i) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(result[<span class="hljs-string">"input_ids"</span>]))] | |
| <span class="hljs-keyword">return</span> result | |
| <span class="hljs-comment"># Utilizați batched=True pentru a activa multithreadingul!</span> | |
| tokenized_datasets = imdb_dataset.<span class="hljs-built_in">map</span>( | |
| tokenize_function, batched=<span class="hljs-literal">True</span>, remove_columns=[<span class="hljs-string">"text"</span>, <span class="hljs-string">"label"</span>] | |
| ) | |
| tokenized_datasets`,wrap:!1}}),dl=new h({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpbnB1dF9pZHMnJTJDJTIwJ3dvcmRfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMjUwMDAlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTIwJTIwJTIwJTIwdGVzdCUzQSUyMERhdGFzZXQoJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZmVhdHVyZXMlM0ElMjAlNUInYXR0ZW50aW9uX21hc2snJTJDJTIwJ2lucHV0X2lkcyclMkMlMjAnd29yZF9pZHMnJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbnVtX3Jvd3MlM0ElMjAyNTAwMCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB1bnN1cGVydmlzZWQlM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpbnB1dF9pZHMnJTJDJTIwJ3dvcmRfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNTAwMDAlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">25000</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">25000</span> | |
| }) | |
| unsupervised: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">50000</span> | |
| }) | |
| })`,wrap:!1}}),Jl=new h({props:{code:"dG9rZW5pemVyLm1vZGVsX21heF9sZW5ndGg=",highlighted:"tokenizer.model_max_length",wrap:!1}}),Ul=new h({props:{code:"NTEy",highlighted:'<span class="hljs-number">512</span>',wrap:!1}}),Ve=new Ye({props:{$$slots:{default:[hi]},$$scope:{ctx:I}}}),jl=new h({props:{code:"Y2h1bmtfc2l6ZSUyMCUzRCUyMDEyOA==",highlighted:'chunk_size = <span class="hljs-number">128</span>',wrap:!1}}),xe=new Ye({props:{warning:!0,$$slots:{default:[fi]},$$scope:{ctx:I}}}),hl=new h({props:{code:"JTIzJTIwU2xpY2luZ3VsJTIwcHJvZHVjZSUyMG8lMjBsaXN0JUM0JTgzJTIwZGUlMjBsaXN0ZSUyMHBlbnRydSUyMGZpZWNhcmUlMjBjYXJhY3RlcmlzdGljJUM0JTgzJTBBdG9rZW5pemVkX3NhbXBsZXMlMjAlM0QlMjB0b2tlbml6ZWRfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCU1QiUzQTMlNUQlMEElMEFmb3IlMjBpZHglMkMlMjBzYW1wbGUlMjBpbiUyMGVudW1lcmF0ZSh0b2tlbml6ZWRfc2FtcGxlcyU1QiUyMmlucHV0X2lkcyUyMiU1RCklM0ElMEElMjAlMjAlMjAlMjBwcmludChmJTIyJyUzRSUzRSUzRSUyMFJldmlldyUyMCU3QmlkeCU3RCUyMGxlbmd0aCUzQSUyMCU3QmxlbihzYW1wbGUpJTdEJyUyMik=",highlighted:`<span class="hljs-comment"># Slicingul produce o listă de liste pentru fiecare caracteristică</span> | |
| tokenized_samples = tokenized_datasets[<span class="hljs-string">"train"</span>][:<span class="hljs-number">3</span>] | |
| <span class="hljs-keyword">for</span> idx, sample <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(tokenized_samples[<span class="hljs-string">"input_ids"</span>]): | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> Review <span class="hljs-subst">{idx}</span> length: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(sample)}</span>'"</span>)`,wrap:!1}}),fl=new h({props:{code:"JyUzRSUzRSUzRSUyMFJldmlldyUyMDAlMjBsZW5ndGglM0ElMjAyMDAnJTBBJyUzRSUzRSUzRSUyMFJldmlldyUyMDElMjBsZW5ndGglM0ElMjA1NTknJTBBJyUzRSUzRSUzRSUyMFJldmlldyUyMDIlMjBsZW5ndGglM0ElMjAxOTIn",highlighted:`<span class="hljs-string">'>>> Review 0 length: 200'</span> | |
| <span class="hljs-string">'>>> Review 1 length: 559'</span> | |
| <span class="hljs-string">'>>> Review 2 length: 192'</span>`,wrap:!1}}),Cl=new h({props:{code:"Y29uY2F0ZW5hdGVkX2V4YW1wbGVzJTIwJTNEJTIwJTdCJTBBJTIwJTIwJTIwJTIwayUzQSUyMHN1bSh0b2tlbml6ZWRfc2FtcGxlcyU1QmslNUQlMkMlMjAlNUIlNUQpJTIwZm9yJTIwayUyMGluJTIwdG9rZW5pemVkX3NhbXBsZXMua2V5cygpJTBBJTdEJTBBdG90YWxfbGVuZ3RoJTIwJTNEJTIwbGVuKGNvbmNhdGVuYXRlZF9leGFtcGxlcyU1QiUyMmlucHV0X2lkcyUyMiU1RCklMEFwcmludChmJTIyJyUzRSUzRSUzRSUyMENvbmNhdGVuYXRlZCUyMHJldmlld3MlMjBsZW5ndGglM0ElMjAlN0J0b3RhbF9sZW5ndGglN0QnJTIyKQ==",highlighted:`concatenated_examples = { | |
| k: <span class="hljs-built_in">sum</span>(tokenized_samples[k], []) <span class="hljs-keyword">for</span> k <span class="hljs-keyword">in</span> tokenized_samples.keys() | |
| } | |
| total_length = <span class="hljs-built_in">len</span>(concatenated_examples[<span class="hljs-string">"input_ids"</span>]) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> Concatenated reviews length: <span class="hljs-subst">{total_length}</span>'"</span>)`,wrap:!1}}),_l=new h({props:{code:"JyUzRSUzRSUzRSUyMENvbmNhdGVuYXRlZCUyMHJldmlld3MlMjBsZW5ndGglM0ElMjA5NTEn",highlighted:'<span class="hljs-string">'>>> Concatenated reviews length: 951'</span>',wrap:!1}}),Gl=new h({props:{code:"Y2h1bmtzJTIwJTNEJTIwJTdCJTBBJTIwJTIwJTIwJTIwayUzQSUyMCU1QnQlNUJpJTIwJTNBJTIwaSUyMCUyQiUyMGNodW5rX3NpemUlNUQlMjBmb3IlMjBpJTIwaW4lMjByYW5nZSgwJTJDJTIwdG90YWxfbGVuZ3RoJTJDJTIwY2h1bmtfc2l6ZSklNUQlMEElMjAlMjAlMjAlMjBmb3IlMjBrJTJDJTIwdCUyMGluJTIwY29uY2F0ZW5hdGVkX2V4YW1wbGVzLml0ZW1zKCklMEElN0QlMEElMEFmb3IlMjBjaHVuayUyMGluJTIwY2h1bmtzJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiclM0UlM0UlM0UlMjBDaHVuayUyMGxlbmd0aCUzQSUyMCU3QmxlbihjaHVuayklN0QnJTIyKQ==",highlighted:`chunks = { | |
| k: [t[i : i + chunk_size] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, total_length, chunk_size)] | |
| <span class="hljs-keyword">for</span> k, t <span class="hljs-keyword">in</span> concatenated_examples.items() | |
| } | |
| <span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> chunks[<span class="hljs-string">"input_ids"</span>]: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"'>>> Chunk length: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(chunk)}</span>'"</span>)`,wrap:!1}}),Zl=new h({props:{code:"JyUzRSUzRSUzRSUyMENodW5rJTIwbGVuZ3RoJTNBJTIwMTI4JyUwQSclM0UlM0UlM0UlMjBDaHVuayUyMGxlbmd0aCUzQSUyMDEyOCclMEEnJTNFJTNFJTNFJTIwQ2h1bmslMjBsZW5ndGglM0ElMjAxMjgnJTBBJyUzRSUzRSUzRSUyMENodW5rJTIwbGVuZ3RoJTNBJTIwMTI4JyUwQSclM0UlM0UlM0UlMjBDaHVuayUyMGxlbmd0aCUzQSUyMDEyOCclMEEnJTNFJTNFJTNFJTIwQ2h1bmslMjBsZW5ndGglM0ElMjAxMjgnJTBBJyUzRSUzRSUzRSUyMENodW5rJTIwbGVuZ3RoJTNBJTIwMTI4JyUwQSclM0UlM0UlM0UlMjBDaHVuayUyMGxlbmd0aCUzQSUyMDU1Jw==",highlighted:`<span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 128'</span> | |
| <span class="hljs-string">'>>> Chunk length: 55'</span>`,wrap:!1}}),vl=new h({props:{code:"ZGVmJTIwZ3JvdXBfdGV4dHMoZXhhbXBsZXMpJTNBJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ29uY2F0ZW5hcmVhJTIwdHV0dXJvciUyMHRleHRlbG9yJTBBJTIwJTIwJTIwJTIwY29uY2F0ZW5hdGVkX2V4YW1wbGVzJTIwJTNEJTIwJTdCayUzQSUyMHN1bShleGFtcGxlcyU1QmslNUQlMkMlMjAlNUIlNUQpJTIwZm9yJTIwayUyMGluJTIwZXhhbXBsZXMua2V5cygpJTdEJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ2FsY3VsYXJlYSUyMGx1bmdpbWlpJTIwdGV4dGVsb3IlMjBjb25jYXRlbmF0ZSUwQSUyMCUyMCUyMCUyMHRvdGFsX2xlbmd0aCUyMCUzRCUyMGxlbihjb25jYXRlbmF0ZWRfZXhhbXBsZXMlNUJsaXN0KGV4YW1wbGVzLmtleXMoKSklNUIwJTVEJTVEKSUwQSUyMCUyMCUyMCUyMCUyMyUyMFJlbnVuJUM4JTlCJUM0JTgzbSUyMGxhJTIwdWx0aW11bCUyMGNodW5rJTIwZGFjJUM0JTgzJTIwZXN0ZSUyMG1haSUyMG1pYyUyMGRlYyVDMyVBMnQlMjBjaHVua19zaXplJTBBJTIwJTIwJTIwJTIwdG90YWxfbGVuZ3RoJTIwJTNEJTIwKHRvdGFsX2xlbmd0aCUyMCUyRiUyRiUyMGNodW5rX3NpemUpJTIwKiUyMGNodW5rX3NpemUlMEElMjAlMjAlMjAlMjAlMjMlMjAlQzMlOEVtcCVDNCU4M3IlQzglOUJpJUM4JTlCaSUyMHBlJTIwYnVjJUM0JTgzJUM4JTlCaSUyMGRlJTIwbWF4X2xlbiUwQSUyMCUyMCUyMCUyMHJlc3VsdCUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGslM0ElMjAlNUJ0JTVCaSUyMCUzQSUyMGklMjAlMkIlMjBjaHVua19zaXplJTVEJTIwZm9yJTIwaSUyMGluJTIwcmFuZ2UoMCUyQyUyMHRvdGFsX2xlbmd0aCUyQyUyMGNodW5rX3NpemUpJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwayUyQyUyMHQlMjBpbiUyMGNvbmNhdGVuYXRlZF9leGFtcGxlcy5pdGVtcygpJTBBJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIzJTIwQ3JlYSVDOCU5QmklMjBvJTIwbm91JUM0JTgzJTIwY29sb2FuJUM0JTgzJTIwZGUlMjBsYWJlbHVyaSUwQSUyMCUyMCUyMCUyMHJlc3VsdCU1QiUyMmxhYmVscyUyMiU1RCUyMCUzRCUyMHJlc3VsdCU1QiUyMmlucHV0X2lkcyUyMiU1RC5jb3B5KCklMEElMjAlMjAlMjAlMjByZXR1cm4lMjByZXN1bHQ=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">group_texts</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-comment"># Concatenarea tuturor textelor</span> | |
| concatenated_examples = {k: <span class="hljs-built_in">sum</span>(examples[k], []) <span class="hljs-keyword">for</span> k <span class="hljs-keyword">in</span> examples.keys()} | |
| <span class="hljs-comment"># Calcularea lungimii textelor concatenate</span> | |
| total_length = <span class="hljs-built_in">len</span>(concatenated_examples[<span class="hljs-built_in">list</span>(examples.keys())[<span class="hljs-number">0</span>]]) | |
| <span class="hljs-comment"># Renunțăm la ultimul chunk dacă este mai mic decât chunk_size</span> | |
| total_length = (total_length // chunk_size) * chunk_size | |
| <span class="hljs-comment"># Împărțiți pe bucăți de max_len</span> | |
| result = { | |
| k: [t[i : i + chunk_size] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, total_length, chunk_size)] | |
| <span class="hljs-keyword">for</span> k, t <span class="hljs-keyword">in</span> concatenated_examples.items() | |
| } | |
| <span class="hljs-comment"># Creați o nouă coloană de labeluri</span> | |
| result[<span class="hljs-string">"labels"</span>] = result[<span class="hljs-string">"input_ids"</span>].copy() | |
| <span class="hljs-keyword">return</span> result`,wrap:!1}}),$l=new h({props:{code:"bG1fZGF0YXNldHMlMjAlM0QlMjB0b2tlbml6ZWRfZGF0YXNldHMubWFwKGdyb3VwX3RleHRzJTJDJTIwYmF0Y2hlZCUzRFRydWUpJTBBbG1fZGF0YXNldHM=",highlighted:`lm_datasets = tokenized_datasets.<span class="hljs-built_in">map</span>(group_texts, batched=<span class="hljs-literal">True</span>) | |
| lm_datasets`,wrap:!1}}),Wl=new h({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpbnB1dF9pZHMnJTJDJTIwJ2xhYmVscyclMkMlMjAnd29yZF9pZHMnJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbnVtX3Jvd3MlM0ElMjA2MTI4OSUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB0ZXN0JTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidhdHRlbnRpb25fbWFzayclMkMlMjAnaW5wdXRfaWRzJyUyQyUyMCdsYWJlbHMnJTJDJTIwJ3dvcmRfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNTk5MDUlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTIwJTIwJTIwJTIwdW5zdXBlcnZpc2VkJTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidhdHRlbnRpb25fbWFzayclMkMlMjAnaW5wdXRfaWRzJyUyQyUyMCdsYWJlbHMnJTJDJTIwJ3dvcmRfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMTIyOTYzJTBBJTIwJTIwJTIwJTIwJTdEKSUwQSU3RCk=",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">61289</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">59905</span> | |
| }) | |
| unsupervised: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">122963</span> | |
| }) | |
| })`,wrap:!1}}),xl=new h({props:{code:"dG9rZW5pemVyLmRlY29kZShsbV9kYXRhc2V0cyU1QiUyMnRyYWluJTIyJTVEJTVCMSU1RCU1QiUyMmlucHV0X2lkcyUyMiU1RCk=",highlighted:'tokenizer.decode(lm_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-number">1</span>][<span class="hljs-string">"input_ids"</span>])',wrap:!1}}),Al=new h({props:{code:"JTIyLi4uLiUyMGF0Li4uLi4uLi4uLiUyMGhpZ2guJTIwYSUyMGNsYXNzaWMlMjBsaW5lJTIwJTNBJTIwaW5zcGVjdG9yJTIwJTNBJTIwaSdtJTIwaGVyZSUyMHRvJTIwc2FjayUyMG9uZSUyMG9mJTIweW91ciUyMHRlYWNoZXJzLiUyMHN0dWRlbnQlMjAlM0ElMjB3ZWxjb21lJTIwdG8lMjBicm9td2VsbCUyMGhpZ2guJTIwaSUyMGV4cGVjdCUyMHRoYXQlMjBtYW55JTIwYWR1bHRzJTIwb2YlMjBteSUyMGFnZSUyMHRoaW5rJTIwdGhhdCUyMGJyb213ZWxsJTIwaGlnaCUyMGlzJTIwZmFyJTIwZmV0Y2hlZC4lMjB3aGF0JTIwYSUyMHBpdHklMjB0aGF0JTIwaXQlMjBpc24ndCElMjAlNUJTRVAlNUQlMjAlNUJDTFMlNUQlMjBob21lbGVzc25lc3MlMjAoJTIwb3IlMjBob3VzZWxlc3NuZXNzJTIwYXMlMjBnZW9yZ2UlMjBjYXJsaW4lMjBzdGF0ZWQlMjApJTIwaGFzJTIwYmVlbiUyMGFuJTIwaXNzdWUlMjBmb3IlMjB5ZWFycyUyMGJ1dCUyMG5ldmVyJTIwYSUyMHBsYW4lMjB0byUyMGhlbHAlMjB0aG9zZSUyMG9uJTIwdGhlJTIwc3RyZWV0JTIwdGhhdCUyMHdlcmUlMjBvbmNlJTIwY29uc2lkZXJlZCUyMGh1bWFuJTIwd2hvJTIwZGlkJTIwZXZlcnl0aGluZyUyMGZyb20lMjBnb2luZyUyMHRvJTIwc2Nob29sJTJDJTIwd29yayUyQyUyMG9yJTIwdm90ZSUyMGZvciUyMHRoZSUyMG1hdHRlci4lMjBtb3N0JTIwcGVvcGxlJTIwdGhpbmslMjBvZiUyMHRoZSUyMGhvbWVsZXNzJTIy",highlighted:'<span class="hljs-string">".... at.......... high. a classic line : inspector : i'm here to sack one of your teachers. student : welcome to bromwell high. i expect that many adults of my age think that bromwell high is far fetched. what a pity that it isn't! [SEP] [CLS] homelessness ( or houselessness as george carlin stated ) has been an issue for years but never a plan to help those on the street that were once considered human who did everything from going to school, work, or vote for the matter. most people think of the homeless"</span>',wrap:!1}}),Ql=new h({props:{code:"dG9rZW5pemVyLmRlY29kZShsbV9kYXRhc2V0cyU1QiUyMnRyYWluJTIyJTVEJTVCMSU1RCU1QiUyMmxhYmVscyUyMiU1RCk=",highlighted:'tokenizer.decode(lm_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-number">1</span>][<span class="hljs-string">"labels"</span>])',wrap:!1}}),Fl=new h({props:{code:"JTIyLi4uLiUyMGF0Li4uLi4uLi4uLiUyMGhpZ2guJTIwYSUyMGNsYXNzaWMlMjBsaW5lJTIwJTNBJTIwaW5zcGVjdG9yJTIwJTNBJTIwaSdtJTIwaGVyZSUyMHRvJTIwc2FjayUyMG9uZSUyMG9mJTIweW91ciUyMHRlYWNoZXJzLiUyMHN0dWRlbnQlMjAlM0ElMjB3ZWxjb21lJTIwdG8lMjBicm9td2VsbCUyMGhpZ2guJTIwaSUyMGV4cGVjdCUyMHRoYXQlMjBtYW55JTIwYWR1bHRzJTIwb2YlMjBteSUyMGFnZSUyMHRoaW5rJTIwdGhhdCUyMGJyb213ZWxsJTIwaGlnaCUyMGlzJTIwZmFyJTIwZmV0Y2hlZC4lMjB3aGF0JTIwYSUyMHBpdHklMjB0aGF0JTIwaXQlMjBpc24ndCElMjAlNUJTRVAlNUQlMjAlNUJDTFMlNUQlMjBob21lbGVzc25lc3MlMjAoJTIwb3IlMjBob3VzZWxlc3NuZXNzJTIwYXMlMjBnZW9yZ2UlMjBjYXJsaW4lMjBzdGF0ZWQlMjApJTIwaGFzJTIwYmVlbiUyMGFuJTIwaXNzdWUlMjBmb3IlMjB5ZWFycyUyMGJ1dCUyMG5ldmVyJTIwYSUyMHBsYW4lMjB0byUyMGhlbHAlMjB0aG9zZSUyMG9uJTIwdGhlJTIwc3RyZWV0JTIwdGhhdCUyMHdlcmUlMjBvbmNlJTIwY29uc2lkZXJlZCUyMGh1bWFuJTIwd2hvJTIwZGlkJTIwZXZlcnl0aGluZyUyMGZyb20lMjBnb2luZyUyMHRvJTIwc2Nob29sJTJDJTIwd29yayUyQyUyMG9yJTIwdm90ZSUyMGZvciUyMHRoZSUyMG1hdHRlci4lMjBtb3N0JTIwcGVvcGxlJTIwdGhpbmslMjBvZiUyMHRoZSUyMGhvbWVsZXNzJTIy",highlighted:'<span class="hljs-string">".... at.......... high. a classic line : inspector : i'm here to sack one of your teachers. student : welcome to bromwell high. i expect that many adults of my age think that bromwell high is far fetched. what a pity that it isn't! [SEP] [CLS] homelessness ( or houselessness as george carlin stated ) has been an issue for years but never a plan to help those on the street that were once considered human who did everything from going to school, work, or vote for the matter. most people think of the homeless"</span>',wrap:!1}}),Sl=new Se({props:{title:"Fine-tuningul asupra DistilBERT cu API-ul Trainer",local:"fine-tuning-distilbert-with-the-trainer-api",headingTag:"h2"}}),Hl=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvckZvckxhbmd1YWdlTW9kZWxpbmclMEElMEFkYXRhX2NvbGxhdG9yJTIwJTNEJTIwRGF0YUNvbGxhdG9yRm9yTGFuZ3VhZ2VNb2RlbGluZyh0b2tlbml6ZXIlM0R0b2tlbml6ZXIlMkMlMjBtbG1fcHJvYmFiaWxpdHklM0QwLjE1KQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForLanguageModeling | |
| data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=<span class="hljs-number">0.15</span>)`,wrap:!1}}),Dl=new h({props:{code:"c2FtcGxlcyUyMCUzRCUyMCU1QmxtX2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUJpJTVEJTIwZm9yJTIwaSUyMGluJTIwcmFuZ2UoMiklNUQlMEFmb3IlMjBzYW1wbGUlMjBpbiUyMHNhbXBsZXMlM0ElMEElMjAlMjAlMjAlMjBfJTIwJTNEJTIwc2FtcGxlLnBvcCglMjJ3b3JkX2lkcyUyMiklMEElMEFmb3IlMjBjaHVuayUyMGluJTIwZGF0YV9jb2xsYXRvcihzYW1wbGVzKSU1QiUyMmlucHV0X2lkcyUyMiU1RCUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjIlNUNuJyUzRSUzRSUzRSUyMCU3QnRva2VuaXplci5kZWNvZGUoY2h1bmspJTdEJyUyMik=",highlighted:`samples = [lm_datasets[<span class="hljs-string">"train"</span>][i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">2</span>)] | |
| <span class="hljs-keyword">for</span> sample <span class="hljs-keyword">in</span> samples: | |
| _ = sample.pop(<span class="hljs-string">"word_ids"</span>) | |
| <span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> data_collator(samples)[<span class="hljs-string">"input_ids"</span>]: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"\\n'>>> <span class="hljs-subst">{tokenizer.decode(chunk)}</span>'"</span>)`,wrap:!1}}),ql=new h({props:{code:"JyUzRSUzRSUzRSUyMCU1QkNMUyU1RCUyMGJyb213ZWxsJTIwJTVCTUFTSyU1RCUyMGlzJTIwYSUyMGNhcnRvb24lMjBjb21lZHkuJTIwaXQlMjByYW4lMjBhdCUyMHRoZSUyMHNhbWUlMjAlNUJNQVNLJTVEJTIwYXMlMjBzb21lJTIwb3RoZXIlMjAlNUJNQVNLJTVEJTIwYWJvdXQlMjBzY2hvb2wlMjBsaWZlJTJDJTIwJTVCTUFTSyU1RCUyMGFzJTIwJTIyJTIwdGVhY2hlcnMlMjAlMjIuJTIwJTVCTUFTSyU1RCUyMCU1Qk1BU0slNUQlMjAlNUJNQVNLJTVEJTIwaW4lMjB0aGUlMjB0ZWFjaGluZyUyMCU1Qk1BU0slNUQlMjBsZWFkJTIwJTVCTUFTSyU1RCUyMHRvJTIwYmVsaWV2ZSUyMHRoYXQlMjBicm9td2VsbCUyMGhpZ2glNUMnJTVCTUFTSyU1RCUyMHNhdGlyZSUyMGlzJTIwbXVjaCUyMGNsb3NlciUyMHRvJTIwcmVhbGl0eSUyMHRoYW4lMjBpcyUyMCUyMiUyMHRlYWNoZXJzJTIwJTIyLiUyMHRoZSUyMHNjcmFtYmxlJTIwJTVCTUFTSyU1RCUyMCU1Qk1BU0slNUQlMjBmaW5hbmNpYWxseSUyQyUyMHRoZSUyMCU1Qk1BU0slNURmdWwlMjBzdHVkZW50cyUyMHdob2duJTIwJTVCTUFTSyU1RCUyMHJpZ2h0JTIwdGhyb3VnaCUyMCU1Qk1BU0slNUQlMjBwYXRoZXRpYyUyMHRlYWNoZXJzJTVDJ3BvbXAlMkMlMjB0aGUlMjBwZXR0aW5lc3MlMjBvZiUyMHRoZSUyMHdob2xlJTIwc2l0dWF0aW9uJTJDJTIwZGlzdGluY3Rpb24lMjByZW1pbmQlMjBtZSUyMG9mJTIwdGhlJTIwc2Nob29scyUyMGklMjBrbmV3JTIwYW5kJTIwdGhlaXIlMjBzdHVkZW50cy4lMjB3aGVuJTIwaSUyMHNhdyUyMCU1Qk1BU0slNUQlMjBlcGlzb2RlJTIwaW4lMjAlNUJNQVNLJTVEJTIwYSUyMHN0dWRlbnQlMjByZXBlYXRlZGx5JTIwdHJpZWQlMjB0byUyMGJ1cm4lMjBkb3duJTIwdGhlJTIwc2Nob29sJTJDJTIwJTVCTUFTSyU1RCUyMGltbWVkaWF0ZWx5JTIwcmVjYWxsZWQuJTIwJTVCTUFTSyU1RC4uLiclMEElMEEnJTNFJTNFJTNFJTIwLi4uLiUyMGF0Li4lMjAlNUJNQVNLJTVELi4uJTIwJTVCTUFTSyU1RC4uLiUyMGhpZ2guJTIwYSUyMGNsYXNzaWMlMjBsaW5lJTIwcGx1Y2tlZCUyMGluc3BlY3RvciUyMCUzQSUyMGklNUMnJTVCTUFTSyU1RCUyMGhlcmUlMjB0byUyMCU1Qk1BU0slNUQlMjBvbmUlMjBvZiUyMHlvdXIlMjAlNUJNQVNLJTVELiUyMHN0dWRlbnQlMjAlM0ElMjB3ZWxjb21lJTIwdG8lMjBicm9td2VsbCUyMCU1Qk1BU0slNUQuJTIwaSUyMGV4cGVjdCUyMHRoYXQlMjBtYW55JTIwYWR1bHRzJTIwb2YlMjBteSUyMGFnZSUyMHRoaW5rJTIwdGhhdCUyMCU1Qk1BU0slNURtd2VsbCUyMCU1Qk1BU0slNUQlMjBpcyUyMCU1Qk1BU0slNUQlMjBmZXRjaGVkLiUyMHdoYXQlMjBhJTIwcGl0eSUyMHRoYXQlMjBpdCUyMGlzbiU1Qyd0ISUyMCU1QlNFUCU1RCUyMCU1QkNMUyU1RCUyMCU1Qk1BU0slNURuZXNzJTIwKCUyMG9yJTIwJTVCTUFTSyU1RGxlc3NuZXNzJTIwYXMlMjBnZW9yZ2UlMjAlRTUlQUUlODdpbiUyMHN0YXRlZCUyMCklRTUlODUlQUMlMjBiZWVuJTIwYW4lMjBpc3N1ZSUyMGZvciUyMHllYXJzJTIwYnV0JTIwbmV2ZXIlMjAlNUJNQVNLJTVEJTIwcGxhbiUyMHRvJTIwaGVscCUyMHRob3NlJTIwb24lMjB0aGUlMjBzdHJlZXQlMjB0aGF0JTIwd2VyZSUyMG9uY2UlMjBjb25zaWRlcmVkJTIwaHVtYW4lMjAlNUJNQVNLJTVEJTIwZGlkJTIwZXZlcnl0aGluZyUyMGZyb20lMjBnb2luZyUyMHRvJTIwc2Nob29sJTJDJTIwJTVCTUFTSyU1RCUyQyUyMCU1Qk1BU0slNUQlMjB2b3RlJTIwZm9yJTIwdGhlJTIwbWF0dGVyLiUyMG1vc3QlMjBwZW9wbGUlMjB0aGluayUyMCU1Qk1BU0slNUQlMjB0aGUlMjBob21lbGVzcyc=",highlighted:`<span class="hljs-string">'>>> [CLS] bromwell [MASK] is a cartoon comedy. it ran at the same [MASK] as some other [MASK] about school life, [MASK] as " teachers ". [MASK] [MASK] [MASK] in the teaching [MASK] lead [MASK] to believe that bromwell high\\'[MASK] satire is much closer to reality than is " teachers ". the scramble [MASK] [MASK] financially, the [MASK]ful students whogn [MASK] right through [MASK] pathetic teachers\\'pomp, the pettiness of the whole situation, distinction remind me of the schools i knew and their students. when i saw [MASK] episode in [MASK] a student repeatedly tried to burn down the school, [MASK] immediately recalled. [MASK]...'</span> | |
| <span class="hljs-string">'>>> .... at.. [MASK]... [MASK]... high. a classic line plucked inspector : i\\'[MASK] here to [MASK] one of your [MASK]. student : welcome to bromwell [MASK]. i expect that many adults of my age think that [MASK]mwell [MASK] is [MASK] fetched. what a pity that it isn\\'t! [SEP] [CLS] [MASK]ness ( or [MASK]lessness as george 宇in stated )公 been an issue for years but never [MASK] plan to help those on the street that were once considered human [MASK] did everything from going to school, [MASK], [MASK] vote for the matter. most people think [MASK] the homeless'</span>`,wrap:!1}}),Ae=new Ye({props:{$$slots:{default:[Ii]},$$scope:{ctx:I}}});let le=I[0]==="pt"&&li();const $n=[_i,Ci],kt=[];function Wn(e,l){return e[0]==="pt"?0:1}oe=Wn(I),de=kt[oe]=$n[oe](I),et=new h({props:{code:"c2FtcGxlcyUyMCUzRCUyMCU1QmxtX2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUJpJTVEJTIwZm9yJTIwaSUyMGluJTIwcmFuZ2UoMiklNUQlMEFiYXRjaCUyMCUzRCUyMHdob2xlX3dvcmRfbWFza2luZ19kYXRhX2NvbGxhdG9yKHNhbXBsZXMpJTBBJTBBZm9yJTIwY2h1bmslMjBpbiUyMGJhdGNoJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiU1Q24nJTNFJTNFJTNFJTIwJTdCdG9rZW5pemVyLmRlY29kZShjaHVuayklN0QnJTIyKQ==",highlighted:`samples = [lm_datasets[<span class="hljs-string">"train"</span>][i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">2</span>)] | |
| batch = whole_word_masking_data_collator(samples) | |
| <span class="hljs-keyword">for</span> chunk <span class="hljs-keyword">in</span> batch[<span class="hljs-string">"input_ids"</span>]: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"\\n'>>> <span class="hljs-subst">{tokenizer.decode(chunk)}</span>'"</span>)`,wrap:!1}}),lt=new h({props:{code:"JyUzRSUzRSUzRSUyMCU1QkNMUyU1RCUyMGJyb213ZWxsJTIwaGlnaCUyMGlzJTIwYSUyMGNhcnRvb24lMjBjb21lZHklMjAlNUJNQVNLJTVEJTIwaXQlMjByYW4lMjBhdCUyMHRoZSUyMHNhbWUlMjB0aW1lJTIwYXMlMjBzb21lJTIwb3RoZXIlMjBwcm9ncmFtcyUyMGFib3V0JTIwc2Nob29sJTIwbGlmZSUyQyUyMHN1Y2glMjBhcyUyMCUyMiUyMHRlYWNoZXJzJTIwJTIyLiUyMG15JTIwMzUlMjB5ZWFycyUyMGluJTIwdGhlJTIwdGVhY2hpbmclMjBwcm9mZXNzaW9uJTIwbGVhZCUyMG1lJTIwdG8lMjBiZWxpZXZlJTIwdGhhdCUyMGJyb213ZWxsJTIwaGlnaCU1QydzJTIwc2F0aXJlJTIwaXMlMjBtdWNoJTIwY2xvc2VyJTIwdG8lMjByZWFsaXR5JTIwdGhhbiUyMGlzJTIwJTIyJTIwdGVhY2hlcnMlMjAlMjIuJTIwdGhlJTIwc2NyYW1ibGUlMjB0byUyMHN1cnZpdmUlMjBmaW5hbmNpYWxseSUyQyUyMHRoZSUyMGluc2lnaHRmdWwlMjBzdHVkZW50cyUyMHdobyUyMGNhbiUyMHNlZSUyMHJpZ2h0JTIwdGhyb3VnaCUyMHRoZWlyJTIwcGF0aGV0aWMlMjB0ZWFjaGVycyU1Qydwb21wJTJDJTIwdGhlJTIwcGV0dGluZXNzJTIwb2YlMjB0aGUlMjB3aG9sZSUyMHNpdHVhdGlvbiUyQyUyMGFsbCUyMHJlbWluZCUyMG1lJTIwb2YlMjB0aGUlMjBzY2hvb2xzJTIwaSUyMGtuZXclMjBhbmQlMjB0aGVpciUyMHN0dWRlbnRzLiUyMHdoZW4lMjBpJTIwc2F3JTIwdGhlJTIwZXBpc29kZSUyMGluJTIwd2hpY2glMjBhJTIwc3R1ZGVudCUyMHJlcGVhdGVkbHklMjB0cmllZCUyMHRvJTIwYnVybiUyMGRvd24lMjB0aGUlMjBzY2hvb2wlMkMlMjBpJTIwaW1tZWRpYXRlbHklMjByZWNhbGxlZC4uLi4uJyUwQSUwQSclM0UlM0UlM0UlMjAuLi4uJTIwJTVCTUFTSyU1RCUyMCU1Qk1BU0slNUQlMjAlNUJNQVNLJTVEJTIwJTVCTUFTSyU1RC4uLi4uLi4lMjBoaWdoLiUyMGElMjBjbGFzc2ljJTIwbGluZSUyMCUzQSUyMGluc3BlY3RvciUyMCUzQSUyMGklNUMnbSUyMGhlcmUlMjB0byUyMHNhY2slMjBvbmUlMjBvZiUyMHlvdXIlMjB0ZWFjaGVycy4lMjBzdHVkZW50JTIwJTNBJTIwd2VsY29tZSUyMHRvJTIwYnJvbXdlbGwlMjBoaWdoLiUyMGklMjBleHBlY3QlMjB0aGF0JTIwbWFueSUyMGFkdWx0cyUyMG9mJTIwbXklMjBhZ2UlMjB0aGluayUyMHRoYXQlMjBicm9td2VsbCUyMGhpZ2glMjBpcyUyMGZhciUyMGZldGNoZWQuJTIwd2hhdCUyMGElMjBwaXR5JTIwdGhhdCUyMGl0JTIwaXNuJTVDJ3QhJTIwJTVCU0VQJTVEJTIwJTVCQ0xTJTVEJTIwaG9tZWxlc3NuZXNzJTIwKCUyMG9yJTIwaG91c2VsZXNzbmVzcyUyMGFzJTIwZ2VvcmdlJTIwY2FybGluJTIwc3RhdGVkJTIwKSUyMGhhcyUyMGJlZW4lMjBhbiUyMGlzc3VlJTIwZm9yJTIweWVhcnMlMjBidXQlMjBuZXZlciUyMGElMjBwbGFuJTIwdG8lMjBoZWxwJTIwdGhvc2UlMjBvbiUyMHRoZSUyMHN0cmVldCUyMHRoYXQlMjB3ZXJlJTIwb25jZSUyMGNvbnNpZGVyZWQlMjBodW1hbiUyMHdobyUyMGRpZCUyMGV2ZXJ5dGhpbmclMjBmcm9tJTIwZ29pbmclMjB0byUyMHNjaG9vbCUyQyUyMHdvcmslMkMlMjBvciUyMHZvdGUlMjBmb3IlMjB0aGUlMjBtYXR0ZXIuJTIwbW9zdCUyMHBlb3BsZSUyMHRoaW5rJTIwb2YlMjB0aGUlMjBob21lbGVzcyc=",highlighted:`<span class="hljs-string">'>>> [CLS] bromwell high is a cartoon comedy [MASK] it ran at the same time as some other programs about school life, such as " teachers ". my 35 years in the teaching profession lead me to believe that bromwell high\\'s satire is much closer to reality than is " teachers ". the scramble to survive financially, the insightful students who can see right through their pathetic teachers\\'pomp, the pettiness of the whole situation, all remind me of the schools i knew and their students. when i saw the episode in which a student repeatedly tried to burn down the school, i immediately recalled.....'</span> | |
| <span class="hljs-string">'>>> .... [MASK] [MASK] [MASK] [MASK]....... high. a classic line : inspector : i\\'m here to sack one of your teachers. student : welcome to bromwell high. i expect that many adults of my age think that bromwell high is far fetched. what a pity that it isn\\'t! [SEP] [CLS] homelessness ( or houselessness as george carlin stated ) has been an issue for years but never a plan to help those on the street that were once considered human who did everything from going to school, work, or vote for the matter. most people think of the homeless'</span>`,wrap:!1}}),Ne=new Ye({props:{$$slots:{default:[gi]},$$scope:{ctx:I}}}),at=new h({props:{code:"dHJhaW5fc2l6ZSUyMCUzRCUyMDEwXzAwMCUwQXRlc3Rfc2l6ZSUyMCUzRCUyMGludCgwLjElMjAqJTIwdHJhaW5fc2l6ZSklMEElMEFkb3duc2FtcGxlZF9kYXRhc2V0JTIwJTNEJTIwbG1fZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RC50cmFpbl90ZXN0X3NwbGl0KCUwQSUyMCUyMCUyMCUyMHRyYWluX3NpemUlM0R0cmFpbl9zaXplJTJDJTIwdGVzdF9zaXplJTNEdGVzdF9zaXplJTJDJTIwc2VlZCUzRDQyJTBBKSUwQWRvd25zYW1wbGVkX2RhdGFzZXQ=",highlighted:`train_size = <span class="hljs-number">10_000</span> | |
| test_size = <span class="hljs-built_in">int</span>(<span class="hljs-number">0.1</span> * train_size) | |
| downsampled_dataset = lm_datasets[<span class="hljs-string">"train"</span>].train_test_split( | |
| train_size=train_size, test_size=test_size, seed=<span class="hljs-number">42</span> | |
| ) | |
| downsampled_dataset`,wrap:!1}}),st=new h({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpbnB1dF9pZHMnJTJDJTIwJ2xhYmVscyclMkMlMjAnd29yZF9pZHMnJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbnVtX3Jvd3MlM0ElMjAxMDAwMCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB0ZXN0JTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidhdHRlbnRpb25fbWFzayclMkMlMjAnaW5wdXRfaWRzJyUyQyUyMCdsYWJlbHMnJTJDJTIwJ3dvcmRfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMTAwMCUwQSUyMCUyMCUyMCUyMCU3RCklMEElN0Qp",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">10000</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'labels'</span>, <span class="hljs-string">'word_ids'</span>], | |
| num_rows: <span class="hljs-number">1000</span> | |
| }) | |
| })`,wrap:!1}}),it=new h({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMG5vdGVib29rX2xvZ2luJTBBJTBBbm90ZWJvb2tfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| notebook_login()`,wrap:!1}}),Mt=new h({props:{code:"aHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4=",highlighted:'huggingface-<span class="hljs-keyword">cli</span> login',wrap:!1}});const Vn=[Zi,Gi],Rt=[];function xn(e,l){return e[0]==="tf"?0:1}me=xn(I),ye=Rt[me]=Vn[me](I),ct=new Se({props:{title:"Perplexity pentru language models",local:"perplexity-for-language-models",headingTag:"h3"}}),ut=new zs({props:{id:"NURcDHhYe98"}});const An=[ki,Bi],vt=[];function Nn(e,l){return e[0]==="pt"?0:1}Je=Nn(I),Ue=vt[Je]=An[Je](I),dt=new h({props:{code:"UGVycGxleGl0eSUzQSUyMDIxLjc1",highlighted:'<span class="hljs-meta">>>> </span>Perplexity: <span class="hljs-number">21.75</span>',wrap:!1}});const Qn=[vi,Ri],zt=[];function Fn(e,l){return e[0]==="pt"?0:1}Te=Fn(I),we=zt[Te]=Qn[Te](I);const Yn=[Xi,zi],Xt=[];function Sn(e,l){return e[0]==="pt"?0:1}je=Sn(I),be=Xt[je]=Yn[je](I),Jt=new h({props:{code:"UGVycGxleGl0eSUzQSUyMDExLjMy",highlighted:'<span class="hljs-meta">>>> </span>Perplexity: <span class="hljs-number">11.32</span>',wrap:!1}});let A=I[0]==="pt"&&ti();Qe=new Ye({props:{$$slots:{default:[$i]},$$scope:{ctx:I}}});let N=I[0]==="pt"&&ai();return Tt=new Se({props:{title:"Utilizarea modelului fine-tuned",local:"using-our-fine-tuned-model",headingTag:"h2"}}),jt=new h({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBJTBBbWFza19maWxsZXIlMjAlM0QlMjBwaXBlbGluZSglMEElMjAlMjAlMjAlMjAlMjJmaWxsLW1hc2slMjIlMkMlMjBtb2RlbCUzRCUyMmh1Z2dpbmdmYWNlLWNvdXJzZSUyRmRpc3RpbGJlcnQtYmFzZS11bmNhc2VkLWZpbmV0dW5lZC1pbWRiJTIyJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| mask_filler = pipeline( | |
| <span class="hljs-string">"fill-mask"</span>, model=<span class="hljs-string">"huggingface-course/distilbert-base-uncased-finetuned-imdb"</span> | |
| )`,wrap:!1}}),ht=new h({props:{code:"cHJlZHMlMjAlM0QlMjBtYXNrX2ZpbGxlcih0ZXh0KSUwQSUwQWZvciUyMHByZWQlMjBpbiUyMHByZWRzJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiUzRSUzRSUzRSUyMCU3QnByZWQlNUInc2VxdWVuY2UnJTVEJTdEJTIyKQ==",highlighted:`preds = mask_filler(text) | |
| <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f">>> <span class="hljs-subst">{pred[<span class="hljs-string">'sequence'</span>]}</span>"</span>)`,wrap:!1}}),ft=new h({props:{code:"JyUzRSUzRSUzRSUyMHRoaXMlMjBpcyUyMGElMjBncmVhdCUyMG1vdmllLiclMEEnJTNFJTNFJTNFJTIwdGhpcyUyMGlzJTIwYSUyMGdyZWF0JTIwZmlsbS4nJTBBJyUzRSUzRSUzRSUyMHRoaXMlMjBpcyUyMGElMjBncmVhdCUyMHN0b3J5LiclMEEnJTNFJTNFJTNFJTIwdGhpcyUyMGlzJTIwYSUyMGdyZWF0JTIwbW92aWVzLiclMEEnJTNFJTNFJTNFJTIwdGhpcyUyMGlzJTIwYSUyMGdyZWF0JTIwY2hhcmFjdGVyLic=",highlighted:`<span class="hljs-string">'>>> this is a great movie.'</span> | |
| <span class="hljs-string">'>>> this is a great film.'</span> | |
| <span class="hljs-string">'>>> this is a great story.'</span> | |
| <span class="hljs-string">'>>> this is a great movies.'</span> | |
| <span class="hljs-string">'>>> this is a great character.'</span>`,wrap:!1}}),Ct=new zs({props:{id:"0Oxphw4Q9fo"}}),Fe=new Ye({props:{$$slots:{default:[Wi]},$$scope:{ctx:I}}}),gt=new di({props:{source:"https://github.com/huggingface/course/blob/main/chapters/rum/chapter7/3.mdx"}}),{c(){s=U("meta"),o=i(),n=U("p"),u=i(),d(g.$$.fragment),w=i(),d(R.$$.fragment),z=i(),v.c(),k=i(),B=U("p"),B.textContent=b,G=i(),W=U("p"),W.textContent=Q,C=i(),X=U("p"),X.innerHTML=E,Z=i(),$=U("div"),$.innerHTML=fe,Ge=i(),H=U("p"),H.innerHTML=Yt,Ze=i(),V=U("iframe"),ae=i(),Ie=U("p"),Ie.textContent=Le,se=i(),d(ne.$$.fragment),ie=i(),d(ce.$$.fragment),Be=i(),d(L.$$.fragment),ke=i(),D=U("p"),D.innerHTML=St,Re=i(),F=U("div"),F.innerHTML=De,re=i(),Ce=U("p"),Ce.innerHTML=qe,Me=i(),x.c(),ue=i(),q=U("p"),q.textContent=Et,ve=i(),d(K.$$.fragment),ze=i(),P=U("p"),P.innerHTML=Ht,Xe=i(),d(O.$$.fragment),$e=i(),ee=U("p"),ee.textContent=Ke,pe=i(),S.c(),M=i(),d(f.$$.fragment),Dt=i(),Pe=U("p"),Pe.textContent=Ys,qt=i(),d(Oe.$$.fragment),Kt=i(),el=U("p"),el.innerHTML=Ss,Pt=i(),d(ll.$$.fragment),Ot=i(),d(tl.$$.fragment),ea=i(),al=U("p"),al.innerHTML=Es,la=i(),d(sl.$$.fragment),ta=i(),d(nl.$$.fragment),aa=i(),il=U("p"),il.innerHTML=Hs,sa=i(),d(We.$$.fragment),na=i(),rl=U("p"),rl.innerHTML=Ls,ia=i(),d(Ml.$$.fragment),ra=i(),d(pl.$$.fragment),Ma=i(),cl=U("p"),cl.textContent=Ds,pa=i(),ul=U("p"),ul.innerHTML=qs,ca=i(),d(ol.$$.fragment),ua=i(),d(dl.$$.fragment),oa=i(),ml=U("p"),ml.innerHTML=Ks,da=i(),yl=U("p"),yl.innerHTML=Ps,ma=i(),d(Jl.$$.fragment),ya=i(),d(Ul.$$.fragment),Ja=i(),Tl=U("p"),Tl.innerHTML=Os,Ua=i(),d(Ve.$$.fragment),Ta=i(),wl=U("p"),wl.textContent=en,wa=i(),d(jl.$$.fragment),ja=i(),d(xe.$$.fragment),ba=i(),bl=U("p"),bl.textContent=ln,ha=i(),d(hl.$$.fragment),fa=i(),d(fl.$$.fragment),Ia=i(),Il=U("p"),Il.textContent=tn,Ca=i(),d(Cl.$$.fragment),_a=i(),d(_l.$$.fragment),ga=i(),gl=U("p"),gl.innerHTML=an,Ga=i(),d(Gl.$$.fragment),Za=i(),d(Zl.$$.fragment),Ba=i(),Bl=U("p"),Bl.textContent=sn,ka=i(),kl=U("ul"),kl.innerHTML=nn,Ra=i(),Rl=U("p"),Rl.textContent=rn,va=i(),d(vl.$$.fragment),za=i(),zl=U("p"),zl.innerHTML=Mn,Xa=i(),Xl=U("p"),Xl.innerHTML=pn,$a=i(),d($l.$$.fragment),Wa=i(),d(Wl.$$.fragment),Va=i(),Vl=U("p"),Vl.innerHTML=cn,xa=i(),d(xl.$$.fragment),Aa=i(),d(Al.$$.fragment),Na=i(),Nl=U("p"),Nl.textContent=un,Qa=i(),d(Ql.$$.fragment),Fa=i(),d(Fl.$$.fragment),Ya=i(),Yl=U("p"),Yl.innerHTML=on,Sa=i(),d(Sl.$$.fragment),Ea=i(),El=U("p"),El.innerHTML=dn,Ha=i(),d(Hl.$$.fragment),La=i(),Ll=U("p"),Ll.innerHTML=mn,Da=i(),d(Dl.$$.fragment),qa=i(),d(ql.$$.fragment),Ka=i(),Kl=U("p"),Kl.innerHTML=yn,Pa=i(),d(Ae.$$.fragment),Oa=i(),le&&le.c(),$t=i(),Pl=U("p"),Pl.innerHTML=Jn,es=i(),de.c(),Wt=i(),Ol=U("p"),Ol.textContent=Un,ls=i(),d(et.$$.fragment),ts=i(),d(lt.$$.fragment),as=i(),d(Ne.$$.fragment),ss=i(),tt=U("p"),tt.innerHTML=Tn,ns=i(),d(at.$$.fragment),is=i(),d(st.$$.fragment),rs=i(),nt=U("p"),nt.innerHTML=wn,Ms=i(),d(it.$$.fragment),ps=i(),rt=U("p"),rt.textContent=jn,cs=i(),d(Mt.$$.fragment),us=i(),pt=U("p"),pt.textContent=bn,os=i(),ye.c(),Vt=i(),d(ct.$$.fragment),ds=i(),d(ut.$$.fragment),ms=i(),ot=U("p"),ot.textContent=hn,ys=i(),Ue.c(),xt=i(),d(dt.$$.fragment),Js=i(),mt=U("p"),mt.textContent=fn,Us=i(),we.c(),At=i(),yt=U("p"),yt.textContent=In,Ts=i(),be.c(),Nt=i(),d(Jt.$$.fragment),ws=i(),Ut=U("p"),Ut.textContent=Cn,js=i(),A&&A.c(),Qt=i(),d(Qe.$$.fragment),bs=i(),N&&N.c(),Ft=i(),d(Tt.$$.fragment),hs=i(),wt=U("p"),wt.innerHTML=_n,fs=i(),d(jt.$$.fragment),Is=i(),bt=U("p"),bt.textContent=gn,Cs=i(),d(ht.$$.fragment),_s=i(),d(ft.$$.fragment),gs=i(),It=U("p"),It.textContent=Gn,Gs=i(),d(Ct.$$.fragment),Zs=i(),_t=U("p"),_t.innerHTML=Zn,Bs=i(),d(Fe.$$.fragment),ks=i(),d(gt.$$.fragment),Rs=i(),Lt=U("p"),this.h()},l(e){const l=ci("svelte-u9bgzb",document.head);s=T(l,"META",{name:!0,content:!0}),l.forEach(t),o=r(e),n=T(e,"P",{}),Fs(n).forEach(t),u=r(e),m(g.$$.fragment,e),w=r(e),m(R.$$.fragment,e),z=r(e),v.l(e),k=r(e),B=T(e,"P",{"data-svelte-h":!0}),j(B)!=="svelte-1glyo4x"&&(B.textContent=b),G=r(e),W=T(e,"P",{"data-svelte-h":!0}),j(W)!=="svelte-9afh7"&&(W.textContent=Q),C=r(e),X=T(e,"P",{"data-svelte-h":!0}),j(X)!=="svelte-f8z56n"&&(X.innerHTML=E),Z=r(e),$=T(e,"DIV",{class:!0,"data-svelte-h":!0}),j($)!=="svelte-1lvgpuv"&&($.innerHTML=fe),Ge=r(e),H=T(e,"P",{"data-svelte-h":!0}),j(H)!=="svelte-1ks0ykh"&&(H.innerHTML=Yt),Ze=r(e),V=T(e,"IFRAME",{src:!0,frameborder:!0,height:!0,title:!0,class:!0,allow:!0,sandbox:!0}),Fs(V).forEach(t),ae=r(e),Ie=T(e,"P",{"data-svelte-h":!0}),j(Ie)!=="svelte-zrrr7l"&&(Ie.textContent=Le),se=r(e),m(ne.$$.fragment,e),ie=r(e),m(ce.$$.fragment,e),Be=r(e),m(L.$$.fragment,e),ke=r(e),D=T(e,"P",{"data-svelte-h":!0}),j(D)!=="svelte-19tpmhb"&&(D.innerHTML=St),Re=r(e),F=T(e,"DIV",{class:!0,"data-svelte-h":!0}),j(F)!=="svelte-zc90xi"&&(F.innerHTML=De),re=r(e),Ce=T(e,"P",{"data-svelte-h":!0}),j(Ce)!=="svelte-t3m32n"&&(Ce.innerHTML=qe),Me=r(e),x.l(e),ue=r(e),q=T(e,"P",{"data-svelte-h":!0}),j(q)!=="svelte-1uazy4q"&&(q.textContent=Et),ve=r(e),m(K.$$.fragment,e),ze=r(e),P=T(e,"P",{"data-svelte-h":!0}),j(P)!=="svelte-1q0eyba"&&(P.innerHTML=Ht),Xe=r(e),m(O.$$.fragment,e),$e=r(e),ee=T(e,"P",{"data-svelte-h":!0}),j(ee)!=="svelte-1wiyj16"&&(ee.textContent=Ke),pe=r(e),S.l(e),M=r(e),m(f.$$.fragment,e),Dt=r(e),Pe=T(e,"P",{"data-svelte-h":!0}),j(Pe)!=="svelte-1hnwm2v"&&(Pe.textContent=Ys),qt=r(e),m(Oe.$$.fragment,e),Kt=r(e),el=T(e,"P",{"data-svelte-h":!0}),j(el)!=="svelte-12bkabn"&&(el.innerHTML=Ss),Pt=r(e),m(ll.$$.fragment,e),Ot=r(e),m(tl.$$.fragment,e),ea=r(e),al=T(e,"P",{"data-svelte-h":!0}),j(al)!=="svelte-j1i9tv"&&(al.innerHTML=Es),la=r(e),m(sl.$$.fragment,e),ta=r(e),m(nl.$$.fragment,e),aa=r(e),il=T(e,"P",{"data-svelte-h":!0}),j(il)!=="svelte-hkwtqx"&&(il.innerHTML=Hs),sa=r(e),m(We.$$.fragment,e),na=r(e),rl=T(e,"P",{"data-svelte-h":!0}),j(rl)!=="svelte-1m0mhpj"&&(rl.innerHTML=Ls),ia=r(e),m(Ml.$$.fragment,e),ra=r(e),m(pl.$$.fragment,e),Ma=r(e),cl=T(e,"P",{"data-svelte-h":!0}),j(cl)!=="svelte-hdf18f"&&(cl.textContent=Ds),pa=r(e),ul=T(e,"P",{"data-svelte-h":!0}),j(ul)!=="svelte-lnwk3h"&&(ul.innerHTML=qs),ca=r(e),m(ol.$$.fragment,e),ua=r(e),m(dl.$$.fragment,e),oa=r(e),ml=T(e,"P",{"data-svelte-h":!0}),j(ml)!=="svelte-wfbgxq"&&(ml.innerHTML=Ks),da=r(e),yl=T(e,"P",{"data-svelte-h":!0}),j(yl)!=="svelte-17qokwh"&&(yl.innerHTML=Ps),ma=r(e),m(Jl.$$.fragment,e),ya=r(e),m(Ul.$$.fragment,e),Ja=r(e),Tl=T(e,"P",{"data-svelte-h":!0}),j(Tl)!=="svelte-th3ra0"&&(Tl.innerHTML=Os),Ua=r(e),m(Ve.$$.fragment,e),Ta=r(e),wl=T(e,"P",{"data-svelte-h":!0}),j(wl)!=="svelte-1bnf1r0"&&(wl.textContent=en),wa=r(e),m(jl.$$.fragment,e),ja=r(e),m(xe.$$.fragment,e),ba=r(e),bl=T(e,"P",{"data-svelte-h":!0}),j(bl)!=="svelte-y17wu1"&&(bl.textContent=ln),ha=r(e),m(hl.$$.fragment,e),fa=r(e),m(fl.$$.fragment,e),Ia=r(e),Il=T(e,"P",{"data-svelte-h":!0}),j(Il)!=="svelte-cvzlep"&&(Il.textContent=tn),Ca=r(e),m(Cl.$$.fragment,e),_a=r(e),m(_l.$$.fragment,e),ga=r(e),gl=T(e,"P",{"data-svelte-h":!0}),j(gl)!=="svelte-1a13xg3"&&(gl.innerHTML=an),Ga=r(e),m(Gl.$$.fragment,e),Za=r(e),m(Zl.$$.fragment,e),Ba=r(e),Bl=T(e,"P",{"data-svelte-h":!0}),j(Bl)!=="svelte-1a4v5sr"&&(Bl.textContent=sn),ka=r(e),kl=T(e,"UL",{"data-svelte-h":!0}),j(kl)!=="svelte-wr64b9"&&(kl.innerHTML=nn),Ra=r(e),Rl=T(e,"P",{"data-svelte-h":!0}),j(Rl)!=="svelte-162vh3a"&&(Rl.textContent=rn),va=r(e),m(vl.$$.fragment,e),za=r(e),zl=T(e,"P",{"data-svelte-h":!0}),j(zl)!=="svelte-1anfhnt"&&(zl.innerHTML=Mn),Xa=r(e),Xl=T(e,"P",{"data-svelte-h":!0}),j(Xl)!=="svelte-1qifxd"&&(Xl.innerHTML=pn),$a=r(e),m($l.$$.fragment,e),Wa=r(e),m(Wl.$$.fragment,e),Va=r(e),Vl=T(e,"P",{"data-svelte-h":!0}),j(Vl)!=="svelte-3xtpi5"&&(Vl.innerHTML=cn),xa=r(e),m(xl.$$.fragment,e),Aa=r(e),m(Al.$$.fragment,e),Na=r(e),Nl=T(e,"P",{"data-svelte-h":!0}),j(Nl)!=="svelte-1hlwqy6"&&(Nl.textContent=un),Qa=r(e),m(Ql.$$.fragment,e),Fa=r(e),m(Fl.$$.fragment,e),Ya=r(e),Yl=T(e,"P",{"data-svelte-h":!0}),j(Yl)!=="svelte-crcf21"&&(Yl.innerHTML=on),Sa=r(e),m(Sl.$$.fragment,e),Ea=r(e),El=T(e,"P",{"data-svelte-h":!0}),j(El)!=="svelte-166bscs"&&(El.innerHTML=dn),Ha=r(e),m(Hl.$$.fragment,e),La=r(e),Ll=T(e,"P",{"data-svelte-h":!0}),j(Ll)!=="svelte-1pbpata"&&(Ll.innerHTML=mn),Da=r(e),m(Dl.$$.fragment,e),qa=r(e),m(ql.$$.fragment,e),Ka=r(e),Kl=T(e,"P",{"data-svelte-h":!0}),j(Kl)!=="svelte-emlzho"&&(Kl.innerHTML=yn),Pa=r(e),m(Ae.$$.fragment,e),Oa=r(e),le&&le.l(e),$t=r(e),Pl=T(e,"P",{"data-svelte-h":!0}),j(Pl)!=="svelte-6avu40"&&(Pl.innerHTML=Jn),es=r(e),de.l(e),Wt=r(e),Ol=T(e,"P",{"data-svelte-h":!0}),j(Ol)!=="svelte-h1lcm8"&&(Ol.textContent=Un),ls=r(e),m(et.$$.fragment,e),ts=r(e),m(lt.$$.fragment,e),as=r(e),m(Ne.$$.fragment,e),ss=r(e),tt=T(e,"P",{"data-svelte-h":!0}),j(tt)!=="svelte-17aj1q6"&&(tt.innerHTML=Tn),ns=r(e),m(at.$$.fragment,e),is=r(e),m(st.$$.fragment,e),rs=r(e),nt=T(e,"P",{"data-svelte-h":!0}),j(nt)!=="svelte-1wg7gde"&&(nt.innerHTML=wn),Ms=r(e),m(it.$$.fragment,e),ps=r(e),rt=T(e,"P",{"data-svelte-h":!0}),j(rt)!=="svelte-1863eha"&&(rt.textContent=jn),cs=r(e),m(Mt.$$.fragment,e),us=r(e),pt=T(e,"P",{"data-svelte-h":!0}),j(pt)!=="svelte-1sc64uc"&&(pt.textContent=bn),os=r(e),ye.l(e),Vt=r(e),m(ct.$$.fragment,e),ds=r(e),m(ut.$$.fragment,e),ms=r(e),ot=T(e,"P",{"data-svelte-h":!0}),j(ot)!=="svelte-1sjjttb"&&(ot.textContent=hn),ys=r(e),Ue.l(e),xt=r(e),m(dt.$$.fragment,e),Js=r(e),mt=T(e,"P",{"data-svelte-h":!0}),j(mt)!=="svelte-xn33jc"&&(mt.textContent=fn),Us=r(e),we.l(e),At=r(e),yt=T(e,"P",{"data-svelte-h":!0}),j(yt)!=="svelte-tizs3m"&&(yt.textContent=In),Ts=r(e),be.l(e),Nt=r(e),m(Jt.$$.fragment,e),ws=r(e),Ut=T(e,"P",{"data-svelte-h":!0}),j(Ut)!=="svelte-1j9d15c"&&(Ut.textContent=Cn),js=r(e),A&&A.l(e),Qt=r(e),m(Qe.$$.fragment,e),bs=r(e),N&&N.l(e),Ft=r(e),m(Tt.$$.fragment,e),hs=r(e),wt=T(e,"P",{"data-svelte-h":!0}),j(wt)!=="svelte-jjpvfc"&&(wt.innerHTML=_n),fs=r(e),m(jt.$$.fragment,e),Is=r(e),bt=T(e,"P",{"data-svelte-h":!0}),j(bt)!=="svelte-iz3i8z"&&(bt.textContent=gn),Cs=r(e),m(ht.$$.fragment,e),_s=r(e),m(ft.$$.fragment,e),gs=r(e),It=T(e,"P",{"data-svelte-h":!0}),j(It)!=="svelte-d9qtzg"&&(It.textContent=Gn),Gs=r(e),m(Ct.$$.fragment,e),Zs=r(e),_t=T(e,"P",{"data-svelte-h":!0}),j(_t)!=="svelte-dfc1tg"&&(_t.innerHTML=Zn),Bs=r(e),m(Fe.$$.fragment,e),ks=r(e),m(gt.$$.fragment,e),Rs=r(e),Lt=T(e,"P",{}),Fs(Lt).forEach(t),this.h()},h(){he(s,"name","hf:doc:metadata"),he(s,"content",xi),he($,"class","flex justify-center"),ii(V.src,He="https://course-demos-distilbert-base-uncased-finetuned-imdb.hf.space")||he(V,"src",He),he(V,"frameborder","0"),he(V,"height","300"),he(V,"title","Gradio app"),he(V,"class","block dark:hidden container p-0 flex-grow space-iframe"),he(V,"allow","accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking"),he(V,"sandbox","allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"),he(F,"class","flex justify-center")},m(e,l){ui(document.head,s),a(e,o,l),a(e,n,l),a(e,u,l),y(g,e,l),a(e,w,l),y(R,e,l),a(e,z,l),Gt[_].m(e,l),a(e,k,l),a(e,B,l),a(e,G,l),a(e,W,l),a(e,C,l),a(e,X,l),a(e,Z,l),a(e,$,l),a(e,Ge,l),a(e,H,l),a(e,Ze,l),a(e,V,l),a(e,ae,l),a(e,Ie,l),a(e,se,l),y(ne,e,l),a(e,ie,l),y(ce,e,l),a(e,Be,l),y(L,e,l),a(e,ke,l),a(e,D,l),a(e,Re,l),a(e,F,l),a(e,re,l),a(e,Ce,l),a(e,Me,l),Zt[Y].m(e,l),a(e,ue,l),a(e,q,l),a(e,ve,l),y(K,e,l),a(e,ze,l),a(e,P,l),a(e,Xe,l),y(O,e,l),a(e,$e,l),a(e,ee,l),a(e,pe,l),Bt[te].m(e,l),a(e,M,l),y(f,e,l),a(e,Dt,l),a(e,Pe,l),a(e,qt,l),y(Oe,e,l),a(e,Kt,l),a(e,el,l),a(e,Pt,l),y(ll,e,l),a(e,Ot,l),y(tl,e,l),a(e,ea,l),a(e,al,l),a(e,la,l),y(sl,e,l),a(e,ta,l),y(nl,e,l),a(e,aa,l),a(e,il,l),a(e,sa,l),y(We,e,l),a(e,na,l),a(e,rl,l),a(e,ia,l),y(Ml,e,l),a(e,ra,l),y(pl,e,l),a(e,Ma,l),a(e,cl,l),a(e,pa,l),a(e,ul,l),a(e,ca,l),y(ol,e,l),a(e,ua,l),y(dl,e,l),a(e,oa,l),a(e,ml,l),a(e,da,l),a(e,yl,l),a(e,ma,l),y(Jl,e,l),a(e,ya,l),y(Ul,e,l),a(e,Ja,l),a(e,Tl,l),a(e,Ua,l),y(Ve,e,l),a(e,Ta,l),a(e,wl,l),a(e,wa,l),y(jl,e,l),a(e,ja,l),y(xe,e,l),a(e,ba,l),a(e,bl,l),a(e,ha,l),y(hl,e,l),a(e,fa,l),y(fl,e,l),a(e,Ia,l),a(e,Il,l),a(e,Ca,l),y(Cl,e,l),a(e,_a,l),y(_l,e,l),a(e,ga,l),a(e,gl,l),a(e,Ga,l),y(Gl,e,l),a(e,Za,l),y(Zl,e,l),a(e,Ba,l),a(e,Bl,l),a(e,ka,l),a(e,kl,l),a(e,Ra,l),a(e,Rl,l),a(e,va,l),y(vl,e,l),a(e,za,l),a(e,zl,l),a(e,Xa,l),a(e,Xl,l),a(e,$a,l),y($l,e,l),a(e,Wa,l),y(Wl,e,l),a(e,Va,l),a(e,Vl,l),a(e,xa,l),y(xl,e,l),a(e,Aa,l),y(Al,e,l),a(e,Na,l),a(e,Nl,l),a(e,Qa,l),y(Ql,e,l),a(e,Fa,l),y(Fl,e,l),a(e,Ya,l),a(e,Yl,l),a(e,Sa,l),y(Sl,e,l),a(e,Ea,l),a(e,El,l),a(e,Ha,l),y(Hl,e,l),a(e,La,l),a(e,Ll,l),a(e,Da,l),y(Dl,e,l),a(e,qa,l),y(ql,e,l),a(e,Ka,l),a(e,Kl,l),a(e,Pa,l),y(Ae,e,l),a(e,Oa,l),le&&le.m(e,l),a(e,$t,l),a(e,Pl,l),a(e,es,l),kt[oe].m(e,l),a(e,Wt,l),a(e,Ol,l),a(e,ls,l),y(et,e,l),a(e,ts,l),y(lt,e,l),a(e,as,l),y(Ne,e,l),a(e,ss,l),a(e,tt,l),a(e,ns,l),y(at,e,l),a(e,is,l),y(st,e,l),a(e,rs,l),a(e,nt,l),a(e,Ms,l),y(it,e,l),a(e,ps,l),a(e,rt,l),a(e,cs,l),y(Mt,e,l),a(e,us,l),a(e,pt,l),a(e,os,l),Rt[me].m(e,l),a(e,Vt,l),y(ct,e,l),a(e,ds,l),y(ut,e,l),a(e,ms,l),a(e,ot,l),a(e,ys,l),vt[Je].m(e,l),a(e,xt,l),y(dt,e,l),a(e,Js,l),a(e,mt,l),a(e,Us,l),zt[Te].m(e,l),a(e,At,l),a(e,yt,l),a(e,Ts,l),Xt[je].m(e,l),a(e,Nt,l),y(Jt,e,l),a(e,ws,l),a(e,Ut,l),a(e,js,l),A&&A.m(e,l),a(e,Qt,l),y(Qe,e,l),a(e,bs,l),N&&N.m(e,l),a(e,Ft,l),y(Tt,e,l),a(e,hs,l),a(e,wt,l),a(e,fs,l),y(jt,e,l),a(e,Is,l),a(e,bt,l),a(e,Cs,l),y(ht,e,l),a(e,_s,l),y(ft,e,l),a(e,gs,l),a(e,It,l),a(e,Gs,l),y(Ct,e,l),a(e,Zs,l),a(e,_t,l),a(e,Bs,l),y(Fe,e,l),a(e,ks,l),y(gt,e,l),a(e,Rs,l),a(e,Lt,l),vs=!0},p(e,[l]){const En={};l&1&&(En.fw=e[0]),g.$set(En);let Xs=_;_=kn(e),_!==Xs&&(ge(),c(Gt[Xs],1,1,()=>{Gt[Xs]=null}),_e(),v=Gt[_],v||(v=Gt[_]=Bn[_](e),v.c()),p(v,1),v.m(k.parentNode,k));const Hn={};l&2&&(Hn.$$scope={dirty:l,ctx:e}),ce.$set(Hn);let $s=Y;Y=vn(e),Y!==$s&&(ge(),c(Zt[$s],1,1,()=>{Zt[$s]=null}),_e(),x=Zt[Y],x||(x=Zt[Y]=Rn[Y](e),x.c()),p(x,1),x.m(ue.parentNode,ue));let Ws=te;te=Xn(e),te!==Ws&&(ge(),c(Bt[Ws],1,1,()=>{Bt[Ws]=null}),_e(),S=Bt[te],S||(S=Bt[te]=zn[te](e),S.c()),p(S,1),S.m(M.parentNode,M));const Ln={};l&2&&(Ln.$$scope={dirty:l,ctx:e}),We.$set(Ln);const Dn={};l&2&&(Dn.$$scope={dirty:l,ctx:e}),Ve.$set(Dn);const qn={};l&2&&(qn.$$scope={dirty:l,ctx:e}),xe.$set(qn);const Kn={};l&2&&(Kn.$$scope={dirty:l,ctx:e}),Ae.$set(Kn),e[0]==="pt"?le||(le=li(),le.c(),le.m($t.parentNode,$t)):le&&(le.d(1),le=null);let Vs=oe;oe=Wn(e),oe!==Vs&&(ge(),c(kt[Vs],1,1,()=>{kt[Vs]=null}),_e(),de=kt[oe],de||(de=kt[oe]=$n[oe](e),de.c()),p(de,1),de.m(Wt.parentNode,Wt));const Pn={};l&2&&(Pn.$$scope={dirty:l,ctx:e}),Ne.$set(Pn);let xs=me;me=xn(e),me!==xs&&(ge(),c(Rt[xs],1,1,()=>{Rt[xs]=null}),_e(),ye=Rt[me],ye||(ye=Rt[me]=Vn[me](e),ye.c()),p(ye,1),ye.m(Vt.parentNode,Vt));let As=Je;Je=Nn(e),Je!==As&&(ge(),c(vt[As],1,1,()=>{vt[As]=null}),_e(),Ue=vt[Je],Ue||(Ue=vt[Je]=An[Je](e),Ue.c()),p(Ue,1),Ue.m(xt.parentNode,xt));let Ns=Te;Te=Fn(e),Te!==Ns&&(ge(),c(zt[Ns],1,1,()=>{zt[Ns]=null}),_e(),we=zt[Te],we||(we=zt[Te]=Qn[Te](e),we.c()),p(we,1),we.m(At.parentNode,At));let Qs=je;je=Sn(e),je!==Qs&&(ge(),c(Xt[Qs],1,1,()=>{Xt[Qs]=null}),_e(),be=Xt[je],be||(be=Xt[je]=Yn[je](e),be.c()),p(be,1),be.m(Nt.parentNode,Nt)),e[0]==="pt"?A?l&1&&p(A,1):(A=ti(),A.c(),p(A,1),A.m(Qt.parentNode,Qt)):A&&(ge(),c(A,1,1,()=>{A=null}),_e());const On={};l&2&&(On.$$scope={dirty:l,ctx:e}),Qe.$set(On),e[0]==="pt"?N?l&1&&p(N,1):(N=ai(),N.c(),p(N,1),N.m(Ft.parentNode,Ft)):N&&(ge(),c(N,1,1,()=>{N=null}),_e());const ei={};l&2&&(ei.$$scope={dirty:l,ctx:e}),Fe.$set(ei)},i(e){vs||(p(g.$$.fragment,e),p(R.$$.fragment,e),p(v),p(ne.$$.fragment,e),p(ce.$$.fragment,e),p(L.$$.fragment,e),p(x),p(K.$$.fragment,e),p(O.$$.fragment,e),p(S),p(f.$$.fragment,e),p(Oe.$$.fragment,e),p(ll.$$.fragment,e),p(tl.$$.fragment,e),p(sl.$$.fragment,e),p(nl.$$.fragment,e),p(We.$$.fragment,e),p(Ml.$$.fragment,e),p(pl.$$.fragment,e),p(ol.$$.fragment,e),p(dl.$$.fragment,e),p(Jl.$$.fragment,e),p(Ul.$$.fragment,e),p(Ve.$$.fragment,e),p(jl.$$.fragment,e),p(xe.$$.fragment,e),p(hl.$$.fragment,e),p(fl.$$.fragment,e),p(Cl.$$.fragment,e),p(_l.$$.fragment,e),p(Gl.$$.fragment,e),p(Zl.$$.fragment,e),p(vl.$$.fragment,e),p($l.$$.fragment,e),p(Wl.$$.fragment,e),p(xl.$$.fragment,e),p(Al.$$.fragment,e),p(Ql.$$.fragment,e),p(Fl.$$.fragment,e),p(Sl.$$.fragment,e),p(Hl.$$.fragment,e),p(Dl.$$.fragment,e),p(ql.$$.fragment,e),p(Ae.$$.fragment,e),p(de),p(et.$$.fragment,e),p(lt.$$.fragment,e),p(Ne.$$.fragment,e),p(at.$$.fragment,e),p(st.$$.fragment,e),p(it.$$.fragment,e),p(Mt.$$.fragment,e),p(ye),p(ct.$$.fragment,e),p(ut.$$.fragment,e),p(Ue),p(dt.$$.fragment,e),p(we),p(be),p(Jt.$$.fragment,e),p(A),p(Qe.$$.fragment,e),p(N),p(Tt.$$.fragment,e),p(jt.$$.fragment,e),p(ht.$$.fragment,e),p(ft.$$.fragment,e),p(Ct.$$.fragment,e),p(Fe.$$.fragment,e),p(gt.$$.fragment,e),vs=!0)},o(e){c(g.$$.fragment,e),c(R.$$.fragment,e),c(v),c(ne.$$.fragment,e),c(ce.$$.fragment,e),c(L.$$.fragment,e),c(x),c(K.$$.fragment,e),c(O.$$.fragment,e),c(S),c(f.$$.fragment,e),c(Oe.$$.fragment,e),c(ll.$$.fragment,e),c(tl.$$.fragment,e),c(sl.$$.fragment,e),c(nl.$$.fragment,e),c(We.$$.fragment,e),c(Ml.$$.fragment,e),c(pl.$$.fragment,e),c(ol.$$.fragment,e),c(dl.$$.fragment,e),c(Jl.$$.fragment,e),c(Ul.$$.fragment,e),c(Ve.$$.fragment,e),c(jl.$$.fragment,e),c(xe.$$.fragment,e),c(hl.$$.fragment,e),c(fl.$$.fragment,e),c(Cl.$$.fragment,e),c(_l.$$.fragment,e),c(Gl.$$.fragment,e),c(Zl.$$.fragment,e),c(vl.$$.fragment,e),c($l.$$.fragment,e),c(Wl.$$.fragment,e),c(xl.$$.fragment,e),c(Al.$$.fragment,e),c(Ql.$$.fragment,e),c(Fl.$$.fragment,e),c(Sl.$$.fragment,e),c(Hl.$$.fragment,e),c(Dl.$$.fragment,e),c(ql.$$.fragment,e),c(Ae.$$.fragment,e),c(de),c(et.$$.fragment,e),c(lt.$$.fragment,e),c(Ne.$$.fragment,e),c(at.$$.fragment,e),c(st.$$.fragment,e),c(it.$$.fragment,e),c(Mt.$$.fragment,e),c(ye),c(ct.$$.fragment,e),c(ut.$$.fragment,e),c(Ue),c(dt.$$.fragment,e),c(we),c(be),c(Jt.$$.fragment,e),c(A),c(Qe.$$.fragment,e),c(N),c(Tt.$$.fragment,e),c(jt.$$.fragment,e),c(ht.$$.fragment,e),c(ft.$$.fragment,e),c(Ct.$$.fragment,e),c(Fe.$$.fragment,e),c(gt.$$.fragment,e),vs=!1},d(e){e&&(t(o),t(n),t(u),t(w),t(z),t(k),t(B),t(G),t(W),t(C),t(X),t(Z),t($),t(Ge),t(H),t(Ze),t(V),t(ae),t(Ie),t(se),t(ie),t(Be),t(ke),t(D),t(Re),t(F),t(re),t(Ce),t(Me),t(ue),t(q),t(ve),t(ze),t(P),t(Xe),t($e),t(ee),t(pe),t(M),t(Dt),t(Pe),t(qt),t(Kt),t(el),t(Pt),t(Ot),t(ea),t(al),t(la),t(ta),t(aa),t(il),t(sa),t(na),t(rl),t(ia),t(ra),t(Ma),t(cl),t(pa),t(ul),t(ca),t(ua),t(oa),t(ml),t(da),t(yl),t(ma),t(ya),t(Ja),t(Tl),t(Ua),t(Ta),t(wl),t(wa),t(ja),t(ba),t(bl),t(ha),t(fa),t(Ia),t(Il),t(Ca),t(_a),t(ga),t(gl),t(Ga),t(Za),t(Ba),t(Bl),t(ka),t(kl),t(Ra),t(Rl),t(va),t(za),t(zl),t(Xa),t(Xl),t($a),t(Wa),t(Va),t(Vl),t(xa),t(Aa),t(Na),t(Nl),t(Qa),t(Fa),t(Ya),t(Yl),t(Sa),t(Ea),t(El),t(Ha),t(La),t(Ll),t(Da),t(qa),t(Ka),t(Kl),t(Pa),t(Oa),t($t),t(Pl),t(es),t(Wt),t(Ol),t(ls),t(ts),t(as),t(ss),t(tt),t(ns),t(is),t(rs),t(nt),t(Ms),t(ps),t(rt),t(cs),t(us),t(pt),t(os),t(Vt),t(ds),t(ms),t(ot),t(ys),t(xt),t(Js),t(mt),t(Us),t(At),t(yt),t(Ts),t(Nt),t(ws),t(Ut),t(js),t(Qt),t(bs),t(Ft),t(hs),t(wt),t(fs),t(Is),t(bt),t(Cs),t(_s),t(gs),t(It),t(Gs),t(Zs),t(_t),t(Bs),t(ks),t(Rs),t(Lt)),t(s),J(g,e),J(R,e),Gt[_].d(e),J(ne,e),J(ce,e),J(L,e),Zt[Y].d(e),J(K,e),J(O,e),Bt[te].d(e),J(f,e),J(Oe,e),J(ll,e),J(tl,e),J(sl,e),J(nl,e),J(We,e),J(Ml,e),J(pl,e),J(ol,e),J(dl,e),J(Jl,e),J(Ul,e),J(Ve,e),J(jl,e),J(xe,e),J(hl,e),J(fl,e),J(Cl,e),J(_l,e),J(Gl,e),J(Zl,e),J(vl,e),J($l,e),J(Wl,e),J(xl,e),J(Al,e),J(Ql,e),J(Fl,e),J(Sl,e),J(Hl,e),J(Dl,e),J(ql,e),J(Ae,e),le&&le.d(e),kt[oe].d(e),J(et,e),J(lt,e),J(Ne,e),J(at,e),J(st,e),J(it,e),J(Mt,e),Rt[me].d(e),J(ct,e),J(ut,e),vt[Je].d(e),J(dt,e),zt[Te].d(e),Xt[je].d(e),J(Jt,e),A&&A.d(e),J(Qe,e),N&&N.d(e),J(Tt,e),J(jt,e),J(ht,e),J(ft,e),J(Ct,e),J(Fe,e),J(gt,e)}}}const xi='{"title":"Fine-tuningul la un masked language model","local":"fine-tuning-a-masked-language-model","sections":[{"title":"Alegerea unui model preantrenat pentru masked language modeling","local":"picking-a-pretrained-model-for-masked-language-modeling","sections":[],"depth":2},{"title":"Datasetul","local":"the-dataset","sections":[],"depth":2},{"title":"Preprocesarea datelor","local":"preprocessing-the-data","sections":[],"depth":2},{"title":"Fine-tuningul asupra DistilBERT cu API-ul Trainer","local":"fine-tuning-distilbert-with-the-trainer-api","sections":[{"title":"Perplexity pentru language models","local":"perplexity-for-language-models","sections":[],"depth":3}],"depth":2},{"title":"Fine-tuningul DistilBERT cu 🤗 Accelerate","local":"fine-tuning-distilbert-with-accelerate","sections":[],"depth":2},{"title":"Utilizarea modelului fine-tuned","local":"using-our-fine-tuned-model","sections":[],"depth":2}],"depth":1}';function Ai(I,s,o){let n="pt";return ri(()=>{const u=new URLSearchParams(window.location.search);o(0,n=u.get("fw")||"pt")}),[n]}class Di extends Mi{constructor(s){super(),pi(this,s,Ai,Vi,ni,{})}}export{Di as component}; | |
Xet Storage Details
- Size:
- 157 kB
- Xet hash:
- c9a4ed6a7967d7c171a4a6f783ace01d14a28e7d198dd6db14d575eaba4e133c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.