Buckets:

rtrm's picture
download
raw
29.3 kB
import{s as Fe,o as Ee,n as oe}from"../chunks/scheduler.d586627e.js";import{S as He,i as Be,g as w,s as m,r as u,A as qe,h as T,f as l,c,j as Ie,u as f,x as J,k as Ye,y as Ne,a as n,v as $,d as h,t as g,w as M}from"../chunks/index.8589a59c.js";import{T as pe}from"../chunks/Tip.84e2336e.js";import{Y as Qe}from"../chunks/Youtube.49101e7b.js";import{C as z}from"../chunks/CodeBlock.47c46d2c.js";import{F as Ve,M as ke}from"../chunks/Markdown.67fc2fa9.js";import{H as Ue,E as Le}from"../chunks/EditOnGithub.073dfa26.js";function Ae(j){let t,r='Consulte a <a href="https://huggingface.co/tasks/text-classification" rel="nofollow">página de tarefas de classificação de texto</a> para obter mais informações sobre outras formas de classificação de texto e seus modelos, conjuntos de dados e métricas associados.';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-19mu0yp"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function De(j){let t,r;return t=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplcik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding
<span class="hljs-meta">&gt;&gt;&gt; </span>data_collator = DataCollatorWithPadding(tokenizer=tokenizer)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p:oe,i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Se(j){let t,r;return t=new ke({props:{$$slots:{default:[De]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Pe(j){let t,r;return t=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplciUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding
<span class="hljs-meta">&gt;&gt;&gt; </span>data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p:oe,i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Ke(j){let t,r;return t=new ke({props:{$$slots:{default:[Pe]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Oe(j){let t,r='Se você não estiver familiarizado com o fine-tuning de um modelo com o <code>Trainer</code>, dê uma olhada no tutorial básico <a href="../training#finetune-with-trainer">aqui</a>!';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-796e4s"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function et(j){let t,r="O <code>Trainer</code> aplicará o preenchimento dinâmico por padrão quando você definir o argumento <code>tokenizer</code> dele. Nesse caso, você não precisa especificar um data collator explicitamente.";return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-yhi952"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function tt(j){let t,r="Carregue o DistilBERT com <code>AutoModelForSequenceClassification</code> junto com o número de rótulos esperados:",s,p,b,_,C,k,I="Nesse ponto, restam apenas três passos:",Y,v,R="<li>Definir seus hiperparâmetros de treinamento em <code>TrainingArguments</code>.</li> <li>Passar os argumentos de treinamento para o <code>Trainer</code> junto com o modelo, conjunto de dados, tokenizador e o data collator.</li> <li>Chamar a função <code>train()</code> para executar o fine-tuning do seu modelo.</li>",W,G,Z,U,x;return p=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMkMlMjBUcmFpbmluZ0FyZ3VtZW50cyUyQyUyMFRyYWluZXIlMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmRpc3RpbGJlcnQlMkZkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZCUyMiUyQyUyMG51bV9sYWJlbHMlM0QyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification, TrainingArguments, Trainer
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForSequenceClassification.from_pretrained(<span class="hljs-string">&quot;distilbert/distilbert-base-uncased&quot;</span>, num_labels=<span class="hljs-number">2</span>)`,wrap:!1}}),_=new pe({props:{$$slots:{default:[Oe]},$$scope:{ctx:j}}}),G=new z({props:{code:"dHJhaW5pbmdfYXJncyUyMCUzRCUyMFRyYWluaW5nQXJndW1lbnRzKCUwQSUyMCUyMCUyMCUyMG91dHB1dF9kaXIlM0QlMjIuJTJGcmVzdWx0cyUyMiUyQyUwQSUyMCUyMCUyMCUyMGxlYXJuaW5nX3JhdGUlM0QyZS01JTJDJTBBJTIwJTIwJTIwJTIwcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTNEMTYlMkMlMEElMjAlMjAlMjAlMjBwZXJfZGV2aWNlX2V2YWxfYmF0Y2hfc2l6ZSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwbnVtX3RyYWluX2Vwb2NocyUzRDUlMkMlMEElMjAlMjAlMjAlMjB3ZWlnaHRfZGVjYXklM0QwLjAxJTJDJTBBKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0R0b2tlbml6ZWRfaW1kYiU1QiUyMnRyYWluJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbF9kYXRhc2V0JTNEdG9rZW5pemVkX2ltZGIlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwZGF0YV9jb2xsYXRvciUzRGRhdGFfY29sbGF0b3IlMkMlMEEpJTBBJTBBdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>training_args = TrainingArguments(
<span class="hljs-meta">... </span> output_dir=<span class="hljs-string">&quot;./results&quot;</span>,
<span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">2e-5</span>,
<span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> per_device_eval_batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">5</span>,
<span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">0.01</span>,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer = Trainer(
<span class="hljs-meta">... </span> model=model,
<span class="hljs-meta">... </span> args=training_args,
<span class="hljs-meta">... </span> train_dataset=tokenized_imdb[<span class="hljs-string">&quot;train&quot;</span>],
<span class="hljs-meta">... </span> eval_dataset=tokenized_imdb[<span class="hljs-string">&quot;test&quot;</span>],
<span class="hljs-meta">... </span> tokenizer=tokenizer,
<span class="hljs-meta">... </span> data_collator=data_collator,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.train()`,wrap:!1}}),U=new pe({props:{$$slots:{default:[et]},$$scope:{ctx:j}}}),{c(){t=w("p"),t.innerHTML=r,s=m(),u(p.$$.fragment),b=m(),u(_.$$.fragment),C=m(),k=w("p"),k.textContent=I,Y=m(),v=w("ol"),v.innerHTML=R,W=m(),u(G.$$.fragment),Z=m(),u(U.$$.fragment)},l(i){t=T(i,"P",{"data-svelte-h":!0}),J(t)!=="svelte-1u0fbfd"&&(t.innerHTML=r),s=c(i),f(p.$$.fragment,i),b=c(i),f(_.$$.fragment,i),C=c(i),k=T(i,"P",{"data-svelte-h":!0}),J(k)!=="svelte-1dd35iq"&&(k.textContent=I),Y=c(i),v=T(i,"OL",{"data-svelte-h":!0}),J(v)!=="svelte-1aiypan"&&(v.innerHTML=R),W=c(i),f(G.$$.fragment,i),Z=c(i),f(U.$$.fragment,i)},m(i,y){n(i,t,y),n(i,s,y),$(p,i,y),n(i,b,y),$(_,i,y),n(i,C,y),n(i,k,y),n(i,Y,y),n(i,v,y),n(i,W,y),$(G,i,y),n(i,Z,y),$(U,i,y),x=!0},p(i,y){const B={};y&2&&(B.$$scope={dirty:y,ctx:i}),_.$set(B);const X={};y&2&&(X.$$scope={dirty:y,ctx:i}),U.$set(X)},i(i){x||(h(p.$$.fragment,i),h(_.$$.fragment,i),h(G.$$.fragment,i),h(U.$$.fragment,i),x=!0)},o(i){g(p.$$.fragment,i),g(_.$$.fragment,i),g(G.$$.fragment,i),g(U.$$.fragment,i),x=!1},d(i){i&&(l(t),l(s),l(b),l(C),l(k),l(Y),l(v),l(W),l(Z)),M(p,i),M(_,i),M(G,i),M(U,i)}}}function st(j){let t,r;return t=new ke({props:{$$slots:{default:[tt]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function at(j){let t,r='Se você não estiver familiarizado com o fine-tuning de um modelo com o Keras, dê uma olhada no tutorial básico <a href="training#finetune-with-keras">aqui</a>!';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-1vtmrxe"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function lt(j){let t,r='Para executar o fine-tuning de um modelo no TensorFlow, comece convertendo seu conjunto de dados para o formato <code>tf.data.Dataset</code> com <a href="https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.to_tf_dataset" rel="nofollow"><code>to_tf_dataset</code></a>. Nessa execução você deverá especificar as entradas e rótulos (no parâmetro <code>columns</code>), se deseja embaralhar o conjunto de dados, o tamanho do batch e o data collator:',s,p,b,_,C,k,I="Configure o otimizador e alguns hiperparâmetros de treinamento:",Y,v,R,W,G="Carregue o DistilBERT com <code>TFAutoModelForSequenceClassification</code> junto com o número de rótulos esperados:",Z,U,x,i,y='Configure o modelo para treinamento com o método <a href="https://keras.io/api/models/model_training_apis/#compile-method" rel="nofollow"><code>compile</code></a>:',B,X,V,F,q='Chame o método <a href="https://keras.io/api/models/model_training_apis/#fit-method" rel="nofollow"><code>fit</code></a> para executar o fine-tuning do modelo:',A,E,H;return p=new z({props:{code:"dGZfdHJhaW5fc2V0JTIwJTNEJTIwdG9rZW5pemVkX2ltZGIlNUIlMjJ0cmFpbiUyMiU1RC50b190Zl9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMGNvbHVtbnMlM0QlNUIlMjJhdHRlbnRpb25fbWFzayUyMiUyQyUyMCUyMmlucHV0X2lkcyUyMiUyQyUyMCUyMmxhYmVsJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwc2h1ZmZsZSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBiYXRjaF9zaXplJTNEMTYlMkMlMEElMjAlMjAlMjAlMjBjb2xsYXRlX2ZuJTNEZGF0YV9jb2xsYXRvciUyQyUwQSklMEElMEF0Zl92YWxpZGF0aW9uX3NldCUyMCUzRCUyMHRva2VuaXplZF9pbWRiJTVCJTIydGVzdCUyMiU1RC50b190Zl9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMGNvbHVtbnMlM0QlNUIlMjJhdHRlbnRpb25fbWFzayUyMiUyQyUyMCUyMmlucHV0X2lkcyUyMiUyQyUyMCUyMmxhYmVsJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwc2h1ZmZsZSUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwYmF0Y2hfc2l6ZSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwY29sbGF0ZV9mbiUzRGRhdGFfY29sbGF0b3IlMkMlMEEp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>tf_train_set = tokenized_imdb[<span class="hljs-string">&quot;train&quot;</span>].to_tf_dataset(
<span class="hljs-meta">... </span> columns=[<span class="hljs-string">&quot;attention_mask&quot;</span>, <span class="hljs-string">&quot;input_ids&quot;</span>, <span class="hljs-string">&quot;label&quot;</span>],
<span class="hljs-meta">... </span> shuffle=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> collate_fn=data_collator,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>tf_validation_set = tokenized_imdb[<span class="hljs-string">&quot;test&quot;</span>].to_tf_dataset(
<span class="hljs-meta">... </span> columns=[<span class="hljs-string">&quot;attention_mask&quot;</span>, <span class="hljs-string">&quot;input_ids&quot;</span>, <span class="hljs-string">&quot;label&quot;</span>],
<span class="hljs-meta">... </span> shuffle=<span class="hljs-literal">False</span>,
<span class="hljs-meta">... </span> batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> collate_fn=data_collator,
<span class="hljs-meta">... </span>)`,wrap:!1}}),_=new pe({props:{$$slots:{default:[at]},$$scope:{ctx:j}}}),v=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGNyZWF0ZV9vcHRpbWl6ZXIlMEFpbXBvcnQlMjB0ZW5zb3JmbG93JTIwYXMlMjB0ZiUwQSUwQWJhdGNoX3NpemUlMjAlM0QlMjAxNiUwQW51bV9lcG9jaHMlMjAlM0QlMjA1JTBBYmF0Y2hlc19wZXJfZXBvY2glMjAlM0QlMjBsZW4odG9rZW5pemVkX2ltZGIlNUIlMjJ0cmFpbiUyMiU1RCklMjAlMkYlMkYlMjBiYXRjaF9zaXplJTBBdG90YWxfdHJhaW5fc3RlcHMlMjAlM0QlMjBpbnQoYmF0Y2hlc19wZXJfZXBvY2glMjAqJTIwbnVtX2Vwb2NocyklMEFvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZSUyMCUzRCUyMGNyZWF0ZV9vcHRpbWl6ZXIoaW5pdF9sciUzRDJlLTUlMkMlMjBudW1fd2FybXVwX3N0ZXBzJTNEMCUyQyUyMG51bV90cmFpbl9zdGVwcyUzRHRvdGFsX3RyYWluX3N0ZXBzKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> create_optimizer
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
<span class="hljs-meta">&gt;&gt;&gt; </span>batch_size = <span class="hljs-number">16</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>num_epochs = <span class="hljs-number">5</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>batches_per_epoch = <span class="hljs-built_in">len</span>(tokenized_imdb[<span class="hljs-string">&quot;train&quot;</span>]) // batch_size
<span class="hljs-meta">&gt;&gt;&gt; </span>total_train_steps = <span class="hljs-built_in">int</span>(batches_per_epoch * num_epochs)
<span class="hljs-meta">&gt;&gt;&gt; </span>optimizer, schedule = create_optimizer(init_lr=<span class="hljs-number">2e-5</span>, num_warmup_steps=<span class="hljs-number">0</span>, num_train_steps=total_train_steps)`,wrap:!1}}),U=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQW1vZGVsJTIwJTNEJTIwVEZBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJkaXN0aWxiZXJ0JTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQlMjIlMkMlMjBudW1fbGFiZWxzJTNEMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TFAutoModelForSequenceClassification
<span class="hljs-meta">&gt;&gt;&gt; </span>model = TFAutoModelForSequenceClassification.from_pretrained(<span class="hljs-string">&quot;distilbert/distilbert-base-uncased&quot;</span>, num_labels=<span class="hljs-number">2</span>)`,wrap:!1}}),X=new z({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEElMEFtb2RlbC5jb21waWxlKG9wdGltaXplciUzRG9wdGltaXplcik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
<span class="hljs-meta">&gt;&gt;&gt; </span>model.<span class="hljs-built_in">compile</span>(optimizer=optimizer)`,wrap:!1}}),E=new z({props:{code:"bW9kZWwuZml0KHglM0R0Zl90cmFpbl9zZXQlMkMlMjB2YWxpZGF0aW9uX2RhdGElM0R0Zl92YWxpZGF0aW9uX3NldCUyQyUyMGVwb2NocyUzRDMp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=<span class="hljs-number">3</span>)',wrap:!1}}),{c(){t=w("p"),t.innerHTML=r,s=m(),u(p.$$.fragment),b=m(),u(_.$$.fragment),C=m(),k=w("p"),k.textContent=I,Y=m(),u(v.$$.fragment),R=m(),W=w("p"),W.innerHTML=G,Z=m(),u(U.$$.fragment),x=m(),i=w("p"),i.innerHTML=y,B=m(),u(X.$$.fragment),V=m(),F=w("p"),F.innerHTML=q,A=m(),u(E.$$.fragment)},l(a){t=T(a,"P",{"data-svelte-h":!0}),J(t)!=="svelte-lavla8"&&(t.innerHTML=r),s=c(a),f(p.$$.fragment,a),b=c(a),f(_.$$.fragment,a),C=c(a),k=T(a,"P",{"data-svelte-h":!0}),J(k)!=="svelte-xdplem"&&(k.textContent=I),Y=c(a),f(v.$$.fragment,a),R=c(a),W=T(a,"P",{"data-svelte-h":!0}),J(W)!=="svelte-t0i5j"&&(W.innerHTML=G),Z=c(a),f(U.$$.fragment,a),x=c(a),i=T(a,"P",{"data-svelte-h":!0}),J(i)!=="svelte-espbvd"&&(i.innerHTML=y),B=c(a),f(X.$$.fragment,a),V=c(a),F=T(a,"P",{"data-svelte-h":!0}),J(F)!=="svelte-t6x1cw"&&(F.innerHTML=q),A=c(a),f(E.$$.fragment,a)},m(a,d){n(a,t,d),n(a,s,d),$(p,a,d),n(a,b,d),$(_,a,d),n(a,C,d),n(a,k,d),n(a,Y,d),$(v,a,d),n(a,R,d),n(a,W,d),n(a,Z,d),$(U,a,d),n(a,x,d),n(a,i,d),n(a,B,d),$(X,a,d),n(a,V,d),n(a,F,d),n(a,A,d),$(E,a,d),H=!0},p(a,d){const ie={};d&2&&(ie.$$scope={dirty:d,ctx:a}),_.$set(ie)},i(a){H||(h(p.$$.fragment,a),h(_.$$.fragment,a),h(v.$$.fragment,a),h(U.$$.fragment,a),h(X.$$.fragment,a),h(E.$$.fragment,a),H=!0)},o(a){g(p.$$.fragment,a),g(_.$$.fragment,a),g(v.$$.fragment,a),g(U.$$.fragment,a),g(X.$$.fragment,a),g(E.$$.fragment,a),H=!1},d(a){a&&(l(t),l(s),l(b),l(C),l(k),l(Y),l(R),l(W),l(Z),l(x),l(i),l(B),l(V),l(F),l(A)),M(p,a),M(_,a),M(v,a),M(U,a),M(X,a),M(E,a)}}}function nt(j){let t,r;return t=new ke({props:{$$slots:{default:[lt]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function ot(j){let t,r='Para obter um exemplo mais aprofundado de como executar o fine-tuning de um modelo para classificação de texto, dê uma olhada nesse <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb" rel="nofollow">notebook utilizando PyTorch</a> ou nesse <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification-tf.ipynb" rel="nofollow">notebook utilizando TensorFlow</a>.';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-f4msrq"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function it(j){let t,r,s,p,b,_,C,k,I,Y="A classificação de texto é uma tarefa comum de NLP que atribui um rótulo ou classe a um texto. Existem muitas aplicações práticas de classificação de texto amplamente utilizadas em produção por algumas das maiores empresas da atualidade. Uma das formas mais populares de classificação de texto é a análise de sentimento, que atribui um rótulo como positivo, negativo ou neutro a um texto.",v,R,W='Este guia mostrará como realizar o fine-tuning do <a href="https://huggingface.co/distilbert/distilbert-base-uncased" rel="nofollow">DistilBERT</a> no conjunto de dados <a href="https://huggingface.co/datasets/imdb" rel="nofollow">IMDb</a> para determinar se a crítica de filme é positiva ou negativa.',G,Z,U,x,i,y,B="Carregue o conjunto de dados IMDb utilizando a biblioteca 🤗 Datasets:",X,V,F,q,A="Em seguida, dê uma olhada em um exemplo:",E,H,a,d,ie="Existem dois campos neste dataset:",me,D,ve="<li><code>text</code>: uma string contendo o texto da crítica do filme.</li> <li><code>label</code>: um valor que pode ser <code>0</code> para uma crítica negativa ou <code>1</code> para uma crítica positiva.</li>",ce,S,de,P,Ze="Carregue o tokenizador do DistilBERT para processar o campo <code>text</code>:",ue,K,fe,O,Ce="Crie uma função de pré-processamento para tokenizar o campo <code>text</code> e truncar as sequências para que não sejam maiores que o comprimento máximo de entrada do DistilBERT:",$e,ee,he,te,xe='Use a função <a href="https://huggingface.co/docs/datasets/process#map" rel="nofollow"><code>map</code></a> do 🤗 Datasets para aplicar a função de pré-processamento em todo o conjunto de dados. Você pode acelerar a função <code>map</code> definindo <code>batched=True</code> para processar vários elementos do conjunto de dados de uma só vez:',ge,se,Me,ae,Re="Use o <code>DataCollatorWithPadding</code> para criar um batch de exemplos. Ele também <em>preencherá dinamicamente</em> seu texto até o comprimento do elemento mais longo em seu batch, para que os exemplos do batch tenham um comprimento uniforme. Embora seja possível preencher seu texto com a função <code>tokenizer</code> definindo <code>padding=True</code>, o preenchimento dinâmico utilizando um data collator é mais eficiente.",be,N,ye,le,je,Q,we,L,Te,ne,Je,re,_e;return b=new Ue({props:{title:"Classificação de texto",local:"classificação-de-texto",headingTag:"h1"}}),C=new Qe({props:{id:"leNG9fN9FQU"}}),Z=new pe({props:{$$slots:{default:[Ae]},$$scope:{ctx:j}}}),x=new Ue({props:{title:"Carregue o conjunto de dados IMDb",local:"carregue-o-conjunto-de-dados-imdb",headingTag:"h2"}}),V=new z({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBaW1kYiUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWRiJTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>imdb = load_dataset(<span class="hljs-string">&quot;imdb&quot;</span>)`,wrap:!1}}),H=new z({props:{code:"aW1kYiU1QiUyMnRlc3QlMjIlNUQlNUIwJTVE",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>imdb[<span class="hljs-string">&quot;test&quot;</span>][<span class="hljs-number">0</span>]
{
<span class="hljs-string">&quot;label&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&quot;text&quot;</span>: <span class="hljs-string">&quot;I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn&#x27;t match the background, and painfully one-dimensional characters cannot be overcome with a &#x27;sci-fi&#x27; setting. (I&#x27;m sure there are those of you out there who think Babylon 5 is good sci-fi TV. It&#x27;s not. It&#x27;s clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It&#x27;s really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it&#x27;s rubbish as they have to always say \\&quot;Gene Roddenberry&#x27;s Earth...\\&quot; otherwise people would not continue watching. Roddenberry&#x27;s ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again.&quot;</span>,
}`,wrap:!1}}),S=new Ue({props:{title:"Pré-processamento dos dados",local:"pré-processamento-dos-dados",headingTag:"h2"}}),K=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJkaXN0aWxiZXJ0JTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQlMjIp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;distilbert/distilbert-base-uncased&quot;</span>)`,wrap:!1}}),ee=new z({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoZXhhbXBsZXMlNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> tokenizer(examples[<span class="hljs-string">&quot;text&quot;</span>], truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),se=new z({props:{code:"dG9rZW5pemVkX2ltZGIlMjAlM0QlMjBpbWRiLm1hcChwcmVwcm9jZXNzX2Z1bmN0aW9uJTJDJTIwYmF0Y2hlZCUzRFRydWUp",highlighted:'tokenized_imdb = imdb.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>)',wrap:!1}}),N=new Ve({props:{pytorch:!0,tensorflow:!0,jax:!1,$$slots:{tensorflow:[Ke],pytorch:[Se]},$$scope:{ctx:j}}}),le=new Ue({props:{title:"Train",local:"train",headingTag:"h2"}}),Q=new Ve({props:{pytorch:!0,tensorflow:!0,jax:!1,$$slots:{tensorflow:[nt],pytorch:[st]},$$scope:{ctx:j}}}),L=new pe({props:{$$slots:{default:[ot]},$$scope:{ctx:j}}}),ne=new Le({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/pt/tasks/sequence_classification.md"}}),{c(){t=w("meta"),r=m(),s=w("p"),p=m(),u(b.$$.fragment),_=m(),u(C.$$.fragment),k=m(),I=w("p"),I.textContent=Y,v=m(),R=w("p"),R.innerHTML=W,G=m(),u(Z.$$.fragment),U=m(),u(x.$$.fragment),i=m(),y=w("p"),y.textContent=B,X=m(),u(V.$$.fragment),F=m(),q=w("p"),q.textContent=A,E=m(),u(H.$$.fragment),a=m(),d=w("p"),d.textContent=ie,me=m(),D=w("ul"),D.innerHTML=ve,ce=m(),u(S.$$.fragment),de=m(),P=w("p"),P.innerHTML=Ze,ue=m(),u(K.$$.fragment),fe=m(),O=w("p"),O.innerHTML=Ce,$e=m(),u(ee.$$.fragment),he=m(),te=w("p"),te.innerHTML=xe,ge=m(),u(se.$$.fragment),Me=m(),ae=w("p"),ae.innerHTML=Re,be=m(),u(N.$$.fragment),ye=m(),u(le.$$.fragment),je=m(),u(Q.$$.fragment),we=m(),u(L.$$.fragment),Te=m(),u(ne.$$.fragment),Je=m(),re=w("p"),this.h()},l(e){const o=qe("svelte-u9bgzb",document.head);t=T(o,"META",{name:!0,content:!0}),o.forEach(l),r=c(e),s=T(e,"P",{}),Ie(s).forEach(l),p=c(e),f(b.$$.fragment,e),_=c(e),f(C.$$.fragment,e),k=c(e),I=T(e,"P",{"data-svelte-h":!0}),J(I)!=="svelte-hyu4ic"&&(I.textContent=Y),v=c(e),R=T(e,"P",{"data-svelte-h":!0}),J(R)!=="svelte-5coszk"&&(R.innerHTML=W),G=c(e),f(Z.$$.fragment,e),U=c(e),f(x.$$.fragment,e),i=c(e),y=T(e,"P",{"data-svelte-h":!0}),J(y)!=="svelte-2grkld"&&(y.textContent=B),X=c(e),f(V.$$.fragment,e),F=c(e),q=T(e,"P",{"data-svelte-h":!0}),J(q)!=="svelte-wv20hq"&&(q.textContent=A),E=c(e),f(H.$$.fragment,e),a=c(e),d=T(e,"P",{"data-svelte-h":!0}),J(d)!=="svelte-a71bu2"&&(d.textContent=ie),me=c(e),D=T(e,"UL",{"data-svelte-h":!0}),J(D)!=="svelte-cjdh2k"&&(D.innerHTML=ve),ce=c(e),f(S.$$.fragment,e),de=c(e),P=T(e,"P",{"data-svelte-h":!0}),J(P)!=="svelte-z180i9"&&(P.innerHTML=Ze),ue=c(e),f(K.$$.fragment,e),fe=c(e),O=T(e,"P",{"data-svelte-h":!0}),J(O)!=="svelte-3rjb3c"&&(O.innerHTML=Ce),$e=c(e),f(ee.$$.fragment,e),he=c(e),te=T(e,"P",{"data-svelte-h":!0}),J(te)!=="svelte-1uox5bf"&&(te.innerHTML=xe),ge=c(e),f(se.$$.fragment,e),Me=c(e),ae=T(e,"P",{"data-svelte-h":!0}),J(ae)!=="svelte-purifw"&&(ae.innerHTML=Re),be=c(e),f(N.$$.fragment,e),ye=c(e),f(le.$$.fragment,e),je=c(e),f(Q.$$.fragment,e),we=c(e),f(L.$$.fragment,e),Te=c(e),f(ne.$$.fragment,e),Je=c(e),re=T(e,"P",{}),Ie(re).forEach(l),this.h()},h(){Ye(t,"name","hf:doc:metadata"),Ye(t,"content",rt)},m(e,o){Ne(document.head,t),n(e,r,o),n(e,s,o),n(e,p,o),$(b,e,o),n(e,_,o),$(C,e,o),n(e,k,o),n(e,I,o),n(e,v,o),n(e,R,o),n(e,G,o),$(Z,e,o),n(e,U,o),$(x,e,o),n(e,i,o),n(e,y,o),n(e,X,o),$(V,e,o),n(e,F,o),n(e,q,o),n(e,E,o),$(H,e,o),n(e,a,o),n(e,d,o),n(e,me,o),n(e,D,o),n(e,ce,o),$(S,e,o),n(e,de,o),n(e,P,o),n(e,ue,o),$(K,e,o),n(e,fe,o),n(e,O,o),n(e,$e,o),$(ee,e,o),n(e,he,o),n(e,te,o),n(e,ge,o),$(se,e,o),n(e,Me,o),n(e,ae,o),n(e,be,o),$(N,e,o),n(e,ye,o),$(le,e,o),n(e,je,o),$(Q,e,o),n(e,we,o),$(L,e,o),n(e,Te,o),$(ne,e,o),n(e,Je,o),n(e,re,o),_e=!0},p(e,[o]){const We={};o&2&&(We.$$scope={dirty:o,ctx:e}),Z.$set(We);const Ge={};o&2&&(Ge.$$scope={dirty:o,ctx:e}),N.$set(Ge);const Xe={};o&2&&(Xe.$$scope={dirty:o,ctx:e}),Q.$set(Xe);const ze={};o&2&&(ze.$$scope={dirty:o,ctx:e}),L.$set(ze)},i(e){_e||(h(b.$$.fragment,e),h(C.$$.fragment,e),h(Z.$$.fragment,e),h(x.$$.fragment,e),h(V.$$.fragment,e),h(H.$$.fragment,e),h(S.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(se.$$.fragment,e),h(N.$$.fragment,e),h(le.$$.fragment,e),h(Q.$$.fragment,e),h(L.$$.fragment,e),h(ne.$$.fragment,e),_e=!0)},o(e){g(b.$$.fragment,e),g(C.$$.fragment,e),g(Z.$$.fragment,e),g(x.$$.fragment,e),g(V.$$.fragment,e),g(H.$$.fragment,e),g(S.$$.fragment,e),g(K.$$.fragment,e),g(ee.$$.fragment,e),g(se.$$.fragment,e),g(N.$$.fragment,e),g(le.$$.fragment,e),g(Q.$$.fragment,e),g(L.$$.fragment,e),g(ne.$$.fragment,e),_e=!1},d(e){e&&(l(r),l(s),l(p),l(_),l(k),l(I),l(v),l(R),l(G),l(U),l(i),l(y),l(X),l(F),l(q),l(E),l(a),l(d),l(me),l(D),l(ce),l(de),l(P),l(ue),l(fe),l(O),l($e),l(he),l(te),l(ge),l(Me),l(ae),l(be),l(ye),l(je),l(we),l(Te),l(Je),l(re)),l(t),M(b,e),M(C,e),M(Z,e),M(x,e),M(V,e),M(H,e),M(S,e),M(K,e),M(ee,e),M(se,e),M(N,e),M(le,e),M(Q,e),M(L,e),M(ne,e)}}}const rt='{"title":"Classificação de texto","local":"classificação-de-texto","sections":[{"title":"Carregue o conjunto de dados IMDb","local":"carregue-o-conjunto-de-dados-imdb","sections":[],"depth":2},{"title":"Pré-processamento dos dados","local":"pré-processamento-dos-dados","sections":[],"depth":2},{"title":"Train","local":"train","sections":[],"depth":2}],"depth":1}';function pt(j){return Ee(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class gt extends He{constructor(t){super(),Be(this,t,pt,it,Fe,{})}}export{gt as component};

Xet Storage Details

Size:
29.3 kB
·
Xet hash:
1cd5bb068584816039caf854f954ac1fc49b789bdfe67a990754d3e85b4d1d0f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.