Buckets:
| import{s as Fe,o as Ee,n as oe}from"../chunks/scheduler.d586627e.js";import{S as He,i as Be,g as w,s as m,r as u,A as qe,h as T,f as l,c,j as Ie,u as f,x as J,k as Ye,y as Ne,a as n,v as $,d as h,t as g,w as M}from"../chunks/index.8589a59c.js";import{T as pe}from"../chunks/Tip.84e2336e.js";import{Y as Qe}from"../chunks/Youtube.49101e7b.js";import{C as z}from"../chunks/CodeBlock.47c46d2c.js";import{F as Ve,M as ke}from"../chunks/Markdown.67fc2fa9.js";import{H as Ue,E as Le}from"../chunks/EditOnGithub.073dfa26.js";function Ae(j){let t,r='Consulte a <a href="https://huggingface.co/tasks/text-classification" rel="nofollow">página de tarefas de classificação de texto</a> para obter mais informações sobre outras formas de classificação de texto e seus modelos, conjuntos de dados e métricas associados.';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-19mu0yp"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function De(j){let t,r;return t=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplcik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding | |
| <span class="hljs-meta">>>> </span>data_collator = DataCollatorWithPadding(tokenizer=tokenizer)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p:oe,i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Se(j){let t,r;return t=new ke({props:{$$slots:{default:[De]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Pe(j){let t,r;return t=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplciUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding | |
| <span class="hljs-meta">>>> </span>data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors=<span class="hljs-string">"tf"</span>)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p:oe,i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Ke(j){let t,r;return t=new ke({props:{$$slots:{default:[Pe]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function Oe(j){let t,r='Se você não estiver familiarizado com o fine-tuning de um modelo com o <code>Trainer</code>, dê uma olhada no tutorial básico <a href="../training#finetune-with-trainer">aqui</a>!';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-796e4s"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function et(j){let t,r="O <code>Trainer</code> aplicará o preenchimento dinâmico por padrão quando você definir o argumento <code>tokenizer</code> dele. Nesse caso, você não precisa especificar um data collator explicitamente.";return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-yhi952"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function tt(j){let t,r="Carregue o DistilBERT com <code>AutoModelForSequenceClassification</code> junto com o número de rótulos esperados:",s,p,b,_,C,k,I="Nesse ponto, restam apenas três passos:",Y,v,R="<li>Definir seus hiperparâmetros de treinamento em <code>TrainingArguments</code>.</li> <li>Passar os argumentos de treinamento para o <code>Trainer</code> junto com o modelo, conjunto de dados, tokenizador e o data collator.</li> <li>Chamar a função <code>train()</code> para executar o fine-tuning do seu modelo.</li>",W,G,Z,U,x;return p=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMkMlMjBUcmFpbmluZ0FyZ3VtZW50cyUyQyUyMFRyYWluZXIlMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmRpc3RpbGJlcnQlMkZkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZCUyMiUyQyUyMG51bV9sYWJlbHMlM0QyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification, TrainingArguments, Trainer | |
| <span class="hljs-meta">>>> </span>model = AutoModelForSequenceClassification.from_pretrained(<span class="hljs-string">"distilbert/distilbert-base-uncased"</span>, num_labels=<span class="hljs-number">2</span>)`,wrap:!1}}),_=new pe({props:{$$slots:{default:[Oe]},$$scope:{ctx:j}}}),G=new z({props:{code:"dHJhaW5pbmdfYXJncyUyMCUzRCUyMFRyYWluaW5nQXJndW1lbnRzKCUwQSUyMCUyMCUyMCUyMG91dHB1dF9kaXIlM0QlMjIuJTJGcmVzdWx0cyUyMiUyQyUwQSUyMCUyMCUyMCUyMGxlYXJuaW5nX3JhdGUlM0QyZS01JTJDJTBBJTIwJTIwJTIwJTIwcGVyX2RldmljZV90cmFpbl9iYXRjaF9zaXplJTNEMTYlMkMlMEElMjAlMjAlMjAlMjBwZXJfZGV2aWNlX2V2YWxfYmF0Y2hfc2l6ZSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwbnVtX3RyYWluX2Vwb2NocyUzRDUlMkMlMEElMjAlMjAlMjAlMjB3ZWlnaHRfZGVjYXklM0QwLjAxJTJDJTBBKSUwQSUwQXRyYWluZXIlMjAlM0QlMjBUcmFpbmVyKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEbW9kZWwlMkMlMEElMjAlMjAlMjAlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0R0b2tlbml6ZWRfaW1kYiU1QiUyMnRyYWluJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwZXZhbF9kYXRhc2V0JTNEdG9rZW5pemVkX2ltZGIlNUIlMjJ0ZXN0JTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwdG9rZW5pemVyJTNEdG9rZW5pemVyJTJDJTBBJTIwJTIwJTIwJTIwZGF0YV9jb2xsYXRvciUzRGRhdGFfY29sbGF0b3IlMkMlMEEpJTBBJTBBdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-meta">>>> </span>training_args = TrainingArguments( | |
| <span class="hljs-meta">... </span> output_dir=<span class="hljs-string">"./results"</span>, | |
| <span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">2e-5</span>, | |
| <span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> per_device_eval_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">5</span>, | |
| <span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">0.01</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer = Trainer( | |
| <span class="hljs-meta">... </span> model=model, | |
| <span class="hljs-meta">... </span> args=training_args, | |
| <span class="hljs-meta">... </span> train_dataset=tokenized_imdb[<span class="hljs-string">"train"</span>], | |
| <span class="hljs-meta">... </span> eval_dataset=tokenized_imdb[<span class="hljs-string">"test"</span>], | |
| <span class="hljs-meta">... </span> tokenizer=tokenizer, | |
| <span class="hljs-meta">... </span> data_collator=data_collator, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer.train()`,wrap:!1}}),U=new pe({props:{$$slots:{default:[et]},$$scope:{ctx:j}}}),{c(){t=w("p"),t.innerHTML=r,s=m(),u(p.$$.fragment),b=m(),u(_.$$.fragment),C=m(),k=w("p"),k.textContent=I,Y=m(),v=w("ol"),v.innerHTML=R,W=m(),u(G.$$.fragment),Z=m(),u(U.$$.fragment)},l(i){t=T(i,"P",{"data-svelte-h":!0}),J(t)!=="svelte-1u0fbfd"&&(t.innerHTML=r),s=c(i),f(p.$$.fragment,i),b=c(i),f(_.$$.fragment,i),C=c(i),k=T(i,"P",{"data-svelte-h":!0}),J(k)!=="svelte-1dd35iq"&&(k.textContent=I),Y=c(i),v=T(i,"OL",{"data-svelte-h":!0}),J(v)!=="svelte-1aiypan"&&(v.innerHTML=R),W=c(i),f(G.$$.fragment,i),Z=c(i),f(U.$$.fragment,i)},m(i,y){n(i,t,y),n(i,s,y),$(p,i,y),n(i,b,y),$(_,i,y),n(i,C,y),n(i,k,y),n(i,Y,y),n(i,v,y),n(i,W,y),$(G,i,y),n(i,Z,y),$(U,i,y),x=!0},p(i,y){const B={};y&2&&(B.$$scope={dirty:y,ctx:i}),_.$set(B);const X={};y&2&&(X.$$scope={dirty:y,ctx:i}),U.$set(X)},i(i){x||(h(p.$$.fragment,i),h(_.$$.fragment,i),h(G.$$.fragment,i),h(U.$$.fragment,i),x=!0)},o(i){g(p.$$.fragment,i),g(_.$$.fragment,i),g(G.$$.fragment,i),g(U.$$.fragment,i),x=!1},d(i){i&&(l(t),l(s),l(b),l(C),l(k),l(Y),l(v),l(W),l(Z)),M(p,i),M(_,i),M(G,i),M(U,i)}}}function st(j){let t,r;return t=new ke({props:{$$slots:{default:[tt]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function at(j){let t,r='Se você não estiver familiarizado com o fine-tuning de um modelo com o Keras, dê uma olhada no tutorial básico <a href="training#finetune-with-keras">aqui</a>!';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-1vtmrxe"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function lt(j){let t,r='Para executar o fine-tuning de um modelo no TensorFlow, comece convertendo seu conjunto de dados para o formato <code>tf.data.Dataset</code> com <a href="https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.to_tf_dataset" rel="nofollow"><code>to_tf_dataset</code></a>. Nessa execução você deverá especificar as entradas e rótulos (no parâmetro <code>columns</code>), se deseja embaralhar o conjunto de dados, o tamanho do batch e o data collator:',s,p,b,_,C,k,I="Configure o otimizador e alguns hiperparâmetros de treinamento:",Y,v,R,W,G="Carregue o DistilBERT com <code>TFAutoModelForSequenceClassification</code> junto com o número de rótulos esperados:",Z,U,x,i,y='Configure o modelo para treinamento com o método <a href="https://keras.io/api/models/model_training_apis/#compile-method" rel="nofollow"><code>compile</code></a>:',B,X,V,F,q='Chame o método <a href="https://keras.io/api/models/model_training_apis/#fit-method" rel="nofollow"><code>fit</code></a> para executar o fine-tuning do modelo:',A,E,H;return p=new z({props:{code:"dGZfdHJhaW5fc2V0JTIwJTNEJTIwdG9rZW5pemVkX2ltZGIlNUIlMjJ0cmFpbiUyMiU1RC50b190Zl9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMGNvbHVtbnMlM0QlNUIlMjJhdHRlbnRpb25fbWFzayUyMiUyQyUyMCUyMmlucHV0X2lkcyUyMiUyQyUyMCUyMmxhYmVsJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwc2h1ZmZsZSUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBiYXRjaF9zaXplJTNEMTYlMkMlMEElMjAlMjAlMjAlMjBjb2xsYXRlX2ZuJTNEZGF0YV9jb2xsYXRvciUyQyUwQSklMEElMEF0Zl92YWxpZGF0aW9uX3NldCUyMCUzRCUyMHRva2VuaXplZF9pbWRiJTVCJTIydGVzdCUyMiU1RC50b190Zl9kYXRhc2V0KCUwQSUyMCUyMCUyMCUyMGNvbHVtbnMlM0QlNUIlMjJhdHRlbnRpb25fbWFzayUyMiUyQyUyMCUyMmlucHV0X2lkcyUyMiUyQyUyMCUyMmxhYmVsJTIyJTVEJTJDJTBBJTIwJTIwJTIwJTIwc2h1ZmZsZSUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwYmF0Y2hfc2l6ZSUzRDE2JTJDJTBBJTIwJTIwJTIwJTIwY29sbGF0ZV9mbiUzRGRhdGFfY29sbGF0b3IlMkMlMEEp",highlighted:`<span class="hljs-meta">>>> </span>tf_train_set = tokenized_imdb[<span class="hljs-string">"train"</span>].to_tf_dataset( | |
| <span class="hljs-meta">... </span> columns=[<span class="hljs-string">"attention_mask"</span>, <span class="hljs-string">"input_ids"</span>, <span class="hljs-string">"label"</span>], | |
| <span class="hljs-meta">... </span> shuffle=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> collate_fn=data_collator, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>tf_validation_set = tokenized_imdb[<span class="hljs-string">"test"</span>].to_tf_dataset( | |
| <span class="hljs-meta">... </span> columns=[<span class="hljs-string">"attention_mask"</span>, <span class="hljs-string">"input_ids"</span>, <span class="hljs-string">"label"</span>], | |
| <span class="hljs-meta">... </span> shuffle=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span> batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> collate_fn=data_collator, | |
| <span class="hljs-meta">... </span>)`,wrap:!1}}),_=new pe({props:{$$slots:{default:[at]},$$scope:{ctx:j}}}),v=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMGNyZWF0ZV9vcHRpbWl6ZXIlMEFpbXBvcnQlMjB0ZW5zb3JmbG93JTIwYXMlMjB0ZiUwQSUwQWJhdGNoX3NpemUlMjAlM0QlMjAxNiUwQW51bV9lcG9jaHMlMjAlM0QlMjA1JTBBYmF0Y2hlc19wZXJfZXBvY2glMjAlM0QlMjBsZW4odG9rZW5pemVkX2ltZGIlNUIlMjJ0cmFpbiUyMiU1RCklMjAlMkYlMkYlMjBiYXRjaF9zaXplJTBBdG90YWxfdHJhaW5fc3RlcHMlMjAlM0QlMjBpbnQoYmF0Y2hlc19wZXJfZXBvY2glMjAqJTIwbnVtX2Vwb2NocyklMEFvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZSUyMCUzRCUyMGNyZWF0ZV9vcHRpbWl6ZXIoaW5pdF9sciUzRDJlLTUlMkMlMjBudW1fd2FybXVwX3N0ZXBzJTNEMCUyQyUyMG51bV90cmFpbl9zdGVwcyUzRHRvdGFsX3RyYWluX3N0ZXBzKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> create_optimizer | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-meta">>>> </span>batch_size = <span class="hljs-number">16</span> | |
| <span class="hljs-meta">>>> </span>num_epochs = <span class="hljs-number">5</span> | |
| <span class="hljs-meta">>>> </span>batches_per_epoch = <span class="hljs-built_in">len</span>(tokenized_imdb[<span class="hljs-string">"train"</span>]) // batch_size | |
| <span class="hljs-meta">>>> </span>total_train_steps = <span class="hljs-built_in">int</span>(batches_per_epoch * num_epochs) | |
| <span class="hljs-meta">>>> </span>optimizer, schedule = create_optimizer(init_lr=<span class="hljs-number">2e-5</span>, num_warmup_steps=<span class="hljs-number">0</span>, num_train_steps=total_train_steps)`,wrap:!1}}),U=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQW1vZGVsJTIwJTNEJTIwVEZBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJkaXN0aWxiZXJ0JTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQlMjIlMkMlMjBudW1fbGFiZWxzJTNEMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TFAutoModelForSequenceClassification | |
| <span class="hljs-meta">>>> </span>model = TFAutoModelForSequenceClassification.from_pretrained(<span class="hljs-string">"distilbert/distilbert-base-uncased"</span>, num_labels=<span class="hljs-number">2</span>)`,wrap:!1}}),X=new z({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEElMEFtb2RlbC5jb21waWxlKG9wdGltaXplciUzRG9wdGltaXplcik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-meta">>>> </span>model.<span class="hljs-built_in">compile</span>(optimizer=optimizer)`,wrap:!1}}),E=new z({props:{code:"bW9kZWwuZml0KHglM0R0Zl90cmFpbl9zZXQlMkMlMjB2YWxpZGF0aW9uX2RhdGElM0R0Zl92YWxpZGF0aW9uX3NldCUyQyUyMGVwb2NocyUzRDMp",highlighted:'<span class="hljs-meta">>>> </span>model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=<span class="hljs-number">3</span>)',wrap:!1}}),{c(){t=w("p"),t.innerHTML=r,s=m(),u(p.$$.fragment),b=m(),u(_.$$.fragment),C=m(),k=w("p"),k.textContent=I,Y=m(),u(v.$$.fragment),R=m(),W=w("p"),W.innerHTML=G,Z=m(),u(U.$$.fragment),x=m(),i=w("p"),i.innerHTML=y,B=m(),u(X.$$.fragment),V=m(),F=w("p"),F.innerHTML=q,A=m(),u(E.$$.fragment)},l(a){t=T(a,"P",{"data-svelte-h":!0}),J(t)!=="svelte-lavla8"&&(t.innerHTML=r),s=c(a),f(p.$$.fragment,a),b=c(a),f(_.$$.fragment,a),C=c(a),k=T(a,"P",{"data-svelte-h":!0}),J(k)!=="svelte-xdplem"&&(k.textContent=I),Y=c(a),f(v.$$.fragment,a),R=c(a),W=T(a,"P",{"data-svelte-h":!0}),J(W)!=="svelte-t0i5j"&&(W.innerHTML=G),Z=c(a),f(U.$$.fragment,a),x=c(a),i=T(a,"P",{"data-svelte-h":!0}),J(i)!=="svelte-espbvd"&&(i.innerHTML=y),B=c(a),f(X.$$.fragment,a),V=c(a),F=T(a,"P",{"data-svelte-h":!0}),J(F)!=="svelte-t6x1cw"&&(F.innerHTML=q),A=c(a),f(E.$$.fragment,a)},m(a,d){n(a,t,d),n(a,s,d),$(p,a,d),n(a,b,d),$(_,a,d),n(a,C,d),n(a,k,d),n(a,Y,d),$(v,a,d),n(a,R,d),n(a,W,d),n(a,Z,d),$(U,a,d),n(a,x,d),n(a,i,d),n(a,B,d),$(X,a,d),n(a,V,d),n(a,F,d),n(a,A,d),$(E,a,d),H=!0},p(a,d){const ie={};d&2&&(ie.$$scope={dirty:d,ctx:a}),_.$set(ie)},i(a){H||(h(p.$$.fragment,a),h(_.$$.fragment,a),h(v.$$.fragment,a),h(U.$$.fragment,a),h(X.$$.fragment,a),h(E.$$.fragment,a),H=!0)},o(a){g(p.$$.fragment,a),g(_.$$.fragment,a),g(v.$$.fragment,a),g(U.$$.fragment,a),g(X.$$.fragment,a),g(E.$$.fragment,a),H=!1},d(a){a&&(l(t),l(s),l(b),l(C),l(k),l(Y),l(R),l(W),l(Z),l(x),l(i),l(B),l(V),l(F),l(A)),M(p,a),M(_,a),M(v,a),M(U,a),M(X,a),M(E,a)}}}function nt(j){let t,r;return t=new ke({props:{$$slots:{default:[lt]},$$scope:{ctx:j}}}),{c(){u(t.$$.fragment)},l(s){f(t.$$.fragment,s)},m(s,p){$(t,s,p),r=!0},p(s,p){const b={};p&2&&(b.$$scope={dirty:p,ctx:s}),t.$set(b)},i(s){r||(h(t.$$.fragment,s),r=!0)},o(s){g(t.$$.fragment,s),r=!1},d(s){M(t,s)}}}function ot(j){let t,r='Para obter um exemplo mais aprofundado de como executar o fine-tuning de um modelo para classificação de texto, dê uma olhada nesse <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification.ipynb" rel="nofollow">notebook utilizando PyTorch</a> ou nesse <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/text_classification-tf.ipynb" rel="nofollow">notebook utilizando TensorFlow</a>.';return{c(){t=w("p"),t.innerHTML=r},l(s){t=T(s,"P",{"data-svelte-h":!0}),J(t)!=="svelte-f4msrq"&&(t.innerHTML=r)},m(s,p){n(s,t,p)},p:oe,d(s){s&&l(t)}}}function it(j){let t,r,s,p,b,_,C,k,I,Y="A classificação de texto é uma tarefa comum de NLP que atribui um rótulo ou classe a um texto. Existem muitas aplicações práticas de classificação de texto amplamente utilizadas em produção por algumas das maiores empresas da atualidade. Uma das formas mais populares de classificação de texto é a análise de sentimento, que atribui um rótulo como positivo, negativo ou neutro a um texto.",v,R,W='Este guia mostrará como realizar o fine-tuning do <a href="https://huggingface.co/distilbert/distilbert-base-uncased" rel="nofollow">DistilBERT</a> no conjunto de dados <a href="https://huggingface.co/datasets/imdb" rel="nofollow">IMDb</a> para determinar se a crítica de filme é positiva ou negativa.',G,Z,U,x,i,y,B="Carregue o conjunto de dados IMDb utilizando a biblioteca 🤗 Datasets:",X,V,F,q,A="Em seguida, dê uma olhada em um exemplo:",E,H,a,d,ie="Existem dois campos neste dataset:",me,D,ve="<li><code>text</code>: uma string contendo o texto da crítica do filme.</li> <li><code>label</code>: um valor que pode ser <code>0</code> para uma crítica negativa ou <code>1</code> para uma crítica positiva.</li>",ce,S,de,P,Ze="Carregue o tokenizador do DistilBERT para processar o campo <code>text</code>:",ue,K,fe,O,Ce="Crie uma função de pré-processamento para tokenizar o campo <code>text</code> e truncar as sequências para que não sejam maiores que o comprimento máximo de entrada do DistilBERT:",$e,ee,he,te,xe='Use a função <a href="https://huggingface.co/docs/datasets/process#map" rel="nofollow"><code>map</code></a> do 🤗 Datasets para aplicar a função de pré-processamento em todo o conjunto de dados. Você pode acelerar a função <code>map</code> definindo <code>batched=True</code> para processar vários elementos do conjunto de dados de uma só vez:',ge,se,Me,ae,Re="Use o <code>DataCollatorWithPadding</code> para criar um batch de exemplos. Ele também <em>preencherá dinamicamente</em> seu texto até o comprimento do elemento mais longo em seu batch, para que os exemplos do batch tenham um comprimento uniforme. Embora seja possível preencher seu texto com a função <code>tokenizer</code> definindo <code>padding=True</code>, o preenchimento dinâmico utilizando um data collator é mais eficiente.",be,N,ye,le,je,Q,we,L,Te,ne,Je,re,_e;return b=new Ue({props:{title:"Classificação de texto",local:"classificação-de-texto",headingTag:"h1"}}),C=new Qe({props:{id:"leNG9fN9FQU"}}),Z=new pe({props:{$$slots:{default:[Ae]},$$scope:{ctx:j}}}),x=new Ue({props:{title:"Carregue o conjunto de dados IMDb",local:"carregue-o-conjunto-de-dados-imdb",headingTag:"h2"}}),V=new z({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBaW1kYiUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJpbWRiJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>imdb = load_dataset(<span class="hljs-string">"imdb"</span>)`,wrap:!1}}),H=new z({props:{code:"aW1kYiU1QiUyMnRlc3QlMjIlNUQlNUIwJTVE",highlighted:`<span class="hljs-meta">>>> </span>imdb[<span class="hljs-string">"test"</span>][<span class="hljs-number">0</span>] | |
| { | |
| <span class="hljs-string">"label"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"text"</span>: <span class="hljs-string">"I love sci-fi and am willing to put up with a lot. Sci-fi movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original). Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi' setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV. It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may treat important issues, yet not as a serious philosophy. It's really difficult to care about the characters here as they are not simply foolish, just missing a spark of life. Their actions and reactions are wooden and predictable, often painful to watch. The makers of Earth KNOW it's rubbish as they have to always say \\"Gene Roddenberry's Earth...\\" otherwise people would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull, cheap, poorly edited (watching it without advert breaks really brings this home) trudging Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring him back as another actor. Jeeez! Dallas all over again."</span>, | |
| }`,wrap:!1}}),S=new Ue({props:{title:"Pré-processamento dos dados",local:"pré-processamento-dos-dados",headingTag:"h2"}}),K=new z({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJkaXN0aWxiZXJ0JTJGZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"distilbert/distilbert-base-uncased"</span>)`,wrap:!1}}),ee=new z({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc19mdW5jdGlvbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoZXhhbXBsZXMlNUIlMjJ0ZXh0JTIyJTVEJTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> tokenizer(examples[<span class="hljs-string">"text"</span>], truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),se=new z({props:{code:"dG9rZW5pemVkX2ltZGIlMjAlM0QlMjBpbWRiLm1hcChwcmVwcm9jZXNzX2Z1bmN0aW9uJTJDJTIwYmF0Y2hlZCUzRFRydWUp",highlighted:'tokenized_imdb = imdb.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>)',wrap:!1}}),N=new Ve({props:{pytorch:!0,tensorflow:!0,jax:!1,$$slots:{tensorflow:[Ke],pytorch:[Se]},$$scope:{ctx:j}}}),le=new Ue({props:{title:"Train",local:"train",headingTag:"h2"}}),Q=new Ve({props:{pytorch:!0,tensorflow:!0,jax:!1,$$slots:{tensorflow:[nt],pytorch:[st]},$$scope:{ctx:j}}}),L=new pe({props:{$$slots:{default:[ot]},$$scope:{ctx:j}}}),ne=new Le({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/pt/tasks/sequence_classification.md"}}),{c(){t=w("meta"),r=m(),s=w("p"),p=m(),u(b.$$.fragment),_=m(),u(C.$$.fragment),k=m(),I=w("p"),I.textContent=Y,v=m(),R=w("p"),R.innerHTML=W,G=m(),u(Z.$$.fragment),U=m(),u(x.$$.fragment),i=m(),y=w("p"),y.textContent=B,X=m(),u(V.$$.fragment),F=m(),q=w("p"),q.textContent=A,E=m(),u(H.$$.fragment),a=m(),d=w("p"),d.textContent=ie,me=m(),D=w("ul"),D.innerHTML=ve,ce=m(),u(S.$$.fragment),de=m(),P=w("p"),P.innerHTML=Ze,ue=m(),u(K.$$.fragment),fe=m(),O=w("p"),O.innerHTML=Ce,$e=m(),u(ee.$$.fragment),he=m(),te=w("p"),te.innerHTML=xe,ge=m(),u(se.$$.fragment),Me=m(),ae=w("p"),ae.innerHTML=Re,be=m(),u(N.$$.fragment),ye=m(),u(le.$$.fragment),je=m(),u(Q.$$.fragment),we=m(),u(L.$$.fragment),Te=m(),u(ne.$$.fragment),Je=m(),re=w("p"),this.h()},l(e){const o=qe("svelte-u9bgzb",document.head);t=T(o,"META",{name:!0,content:!0}),o.forEach(l),r=c(e),s=T(e,"P",{}),Ie(s).forEach(l),p=c(e),f(b.$$.fragment,e),_=c(e),f(C.$$.fragment,e),k=c(e),I=T(e,"P",{"data-svelte-h":!0}),J(I)!=="svelte-hyu4ic"&&(I.textContent=Y),v=c(e),R=T(e,"P",{"data-svelte-h":!0}),J(R)!=="svelte-5coszk"&&(R.innerHTML=W),G=c(e),f(Z.$$.fragment,e),U=c(e),f(x.$$.fragment,e),i=c(e),y=T(e,"P",{"data-svelte-h":!0}),J(y)!=="svelte-2grkld"&&(y.textContent=B),X=c(e),f(V.$$.fragment,e),F=c(e),q=T(e,"P",{"data-svelte-h":!0}),J(q)!=="svelte-wv20hq"&&(q.textContent=A),E=c(e),f(H.$$.fragment,e),a=c(e),d=T(e,"P",{"data-svelte-h":!0}),J(d)!=="svelte-a71bu2"&&(d.textContent=ie),me=c(e),D=T(e,"UL",{"data-svelte-h":!0}),J(D)!=="svelte-cjdh2k"&&(D.innerHTML=ve),ce=c(e),f(S.$$.fragment,e),de=c(e),P=T(e,"P",{"data-svelte-h":!0}),J(P)!=="svelte-z180i9"&&(P.innerHTML=Ze),ue=c(e),f(K.$$.fragment,e),fe=c(e),O=T(e,"P",{"data-svelte-h":!0}),J(O)!=="svelte-3rjb3c"&&(O.innerHTML=Ce),$e=c(e),f(ee.$$.fragment,e),he=c(e),te=T(e,"P",{"data-svelte-h":!0}),J(te)!=="svelte-1uox5bf"&&(te.innerHTML=xe),ge=c(e),f(se.$$.fragment,e),Me=c(e),ae=T(e,"P",{"data-svelte-h":!0}),J(ae)!=="svelte-purifw"&&(ae.innerHTML=Re),be=c(e),f(N.$$.fragment,e),ye=c(e),f(le.$$.fragment,e),je=c(e),f(Q.$$.fragment,e),we=c(e),f(L.$$.fragment,e),Te=c(e),f(ne.$$.fragment,e),Je=c(e),re=T(e,"P",{}),Ie(re).forEach(l),this.h()},h(){Ye(t,"name","hf:doc:metadata"),Ye(t,"content",rt)},m(e,o){Ne(document.head,t),n(e,r,o),n(e,s,o),n(e,p,o),$(b,e,o),n(e,_,o),$(C,e,o),n(e,k,o),n(e,I,o),n(e,v,o),n(e,R,o),n(e,G,o),$(Z,e,o),n(e,U,o),$(x,e,o),n(e,i,o),n(e,y,o),n(e,X,o),$(V,e,o),n(e,F,o),n(e,q,o),n(e,E,o),$(H,e,o),n(e,a,o),n(e,d,o),n(e,me,o),n(e,D,o),n(e,ce,o),$(S,e,o),n(e,de,o),n(e,P,o),n(e,ue,o),$(K,e,o),n(e,fe,o),n(e,O,o),n(e,$e,o),$(ee,e,o),n(e,he,o),n(e,te,o),n(e,ge,o),$(se,e,o),n(e,Me,o),n(e,ae,o),n(e,be,o),$(N,e,o),n(e,ye,o),$(le,e,o),n(e,je,o),$(Q,e,o),n(e,we,o),$(L,e,o),n(e,Te,o),$(ne,e,o),n(e,Je,o),n(e,re,o),_e=!0},p(e,[o]){const We={};o&2&&(We.$$scope={dirty:o,ctx:e}),Z.$set(We);const Ge={};o&2&&(Ge.$$scope={dirty:o,ctx:e}),N.$set(Ge);const Xe={};o&2&&(Xe.$$scope={dirty:o,ctx:e}),Q.$set(Xe);const ze={};o&2&&(ze.$$scope={dirty:o,ctx:e}),L.$set(ze)},i(e){_e||(h(b.$$.fragment,e),h(C.$$.fragment,e),h(Z.$$.fragment,e),h(x.$$.fragment,e),h(V.$$.fragment,e),h(H.$$.fragment,e),h(S.$$.fragment,e),h(K.$$.fragment,e),h(ee.$$.fragment,e),h(se.$$.fragment,e),h(N.$$.fragment,e),h(le.$$.fragment,e),h(Q.$$.fragment,e),h(L.$$.fragment,e),h(ne.$$.fragment,e),_e=!0)},o(e){g(b.$$.fragment,e),g(C.$$.fragment,e),g(Z.$$.fragment,e),g(x.$$.fragment,e),g(V.$$.fragment,e),g(H.$$.fragment,e),g(S.$$.fragment,e),g(K.$$.fragment,e),g(ee.$$.fragment,e),g(se.$$.fragment,e),g(N.$$.fragment,e),g(le.$$.fragment,e),g(Q.$$.fragment,e),g(L.$$.fragment,e),g(ne.$$.fragment,e),_e=!1},d(e){e&&(l(r),l(s),l(p),l(_),l(k),l(I),l(v),l(R),l(G),l(U),l(i),l(y),l(X),l(F),l(q),l(E),l(a),l(d),l(me),l(D),l(ce),l(de),l(P),l(ue),l(fe),l(O),l($e),l(he),l(te),l(ge),l(Me),l(ae),l(be),l(ye),l(je),l(we),l(Te),l(Je),l(re)),l(t),M(b,e),M(C,e),M(Z,e),M(x,e),M(V,e),M(H,e),M(S,e),M(K,e),M(ee,e),M(se,e),M(N,e),M(le,e),M(Q,e),M(L,e),M(ne,e)}}}const rt='{"title":"Classificação de texto","local":"classificação-de-texto","sections":[{"title":"Carregue o conjunto de dados IMDb","local":"carregue-o-conjunto-de-dados-imdb","sections":[],"depth":2},{"title":"Pré-processamento dos dados","local":"pré-processamento-dos-dados","sections":[],"depth":2},{"title":"Train","local":"train","sections":[],"depth":2}],"depth":1}';function pt(j){return Ee(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class gt extends He{constructor(t){super(),Be(this,t,pt,it,Fe,{})}}export{gt as component}; | |
Xet Storage Details
- Size:
- 29.3 kB
- Xet hash:
- 1cd5bb068584816039caf854f954ac1fc49b789bdfe67a990754d3e85b4d1d0f
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.