Buckets:
| import{s as Bl,o as Nl,n as g}from"../chunks/scheduler.37c15a92.js";import{S as Vl,i as Gl,g as u,s as n,r as i,A as Hl,h as J,f as t,c as a,j as Zl,u as r,x as o,k as vl,y as Xl,a as l,v as c,d as m,t as y,w as j}from"../chunks/index.2bf4358c.js";import{T as x}from"../chunks/Tip.363c041f.js";import{Y as Xt}from"../chunks/Youtube.1e50a667.js";import{C as T}from"../chunks/CodeBlock.4e987730.js";import{C as Wl}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{Q as pe}from"../chunks/Question.668688bc.js";import{H as b,E as zl}from"../chunks/getInferenceSnippets.24b50994.js";function El(d){let p,$='π‘ <strong>μΆκ° μλ£</strong>: λ λ§μ λ°μ΄ν° μΈνΈ λ‘λ© κΈ°λ²κ³Ό μμ λ₯Ό λ³΄λ €λ©΄ <a href="https://huggingface.co/docs/datasets/" rel="nofollow">π€ Datasets λ¬Έμ</a>λ₯Ό νμΈνμΈμ.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-qls13x"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Dl(d){let p,$="μ΄ λͺ λ Ήμ κΈ°λ³Έμ μΌλ‘ <em>~/.cache/huggingface/datasets</em>μ λ°μ΄ν° μΈνΈλ₯Ό λ€μ΄λ‘λνκ³ μΊμν©λλ€. 2μ₯μμ μΈκΈνλ―μ΄ <code>HF_HOME</code> νκ²½ λ³μλ₯Ό μ€μ νμ¬ μΊμ ν΄λλ₯Ό λ§μΆ€ μ€μ ν μ μμ΅λλ€.";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1t8miz5"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Sl(d){let p,$="βοΈ <strong>μ§μ ν΄λ³΄κΈ°!</strong> νλ ¨ μΈνΈμ 15λ²μ§Έ μμμ κ²μ¦ μΈνΈμ 87λ²μ§Έ μμλ₯Ό μ΄ν΄λ³΄μΈμ. κ·Έλ€μ λ μ΄λΈμ 무μμΈκ°μ?";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-hik0zm"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Ll(d){let p,$='π‘ <strong>μ¬ν νμ΅</strong>: λ κ³ κΈ ν ν°ν κΈ°λ²κ³Ό λ€μν ν ν¬λμ΄μ κ° μλνλ λ°©μμ μ΄ν΄νλ €λ©΄ <a href="https://huggingface.co/docs/transformers/main/en/tokenizer_summary" rel="nofollow">π€ Tokenizers λ¬Έμ</a>μ <a href="https://huggingface.co/learn/cookbook/en/advanced_rag#tokenization-strategies" rel="nofollow">μΏ‘λΆμ ν ν°ν κ°μ΄λ</a>λ₯Ό μ΄ν΄λ³΄μΈμ.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-wel7bo"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Yl(d){let p,$="βοΈ <strong>μ§μ ν΄λ³΄κΈ°!</strong> νλ ¨ μΈνΈμ 15λ²μ§Έ μμλ₯Ό κ°μ Έμμ λ λ¬Έμ₯μ λ°λ‘λ°λ‘ ν ν°ννκ³ μμΌλ‘λ ν ν°νν΄λ³΄μΈμ. λ κ²°κ³Όμ μ°¨μ΄μ μ 무μμΈκ°μ?";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-xfqr76"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Fl(d){let p,$='π <strong>μ±λ₯ ν</strong>: ν¨μ¨μ μΈ λ°μ΄ν° μ²λ¦¬ κΈ°λ²μ λν μμΈν λ΄μ©μ <a href="https://huggingface.co/docs/datasets/about_arrow" rel="nofollow">π€ Datasets μ±λ₯ κ°μ΄λ</a>μμ λ°°μΈ μ μμ΅λλ€.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-4fzn75"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function ql(d){let p,$='π <strong>μ΅μ ν κ°μ΄λ</strong>: ν¨λ© μ λ΅κ³Ό TPU κ³ λ €μ¬νμ ν¬ν¨ν νλ ¨ μ±λ₯ μ΅μ νμ λν μμΈν λ΄μ©μ <a href="https://huggingface.co/docs/transformers/main/en/performance" rel="nofollow">π€ Transformers μ±λ₯ λ¬Έμ</a>λ₯Ό μ°Έμ‘°νμΈμ.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-15yxb2i"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Pl(d){let p,$="βοΈ <strong>μ§μ ν΄λ³΄κΈ°!</strong> GLUE SST-2 λ°μ΄ν° μΈνΈμμ μ μ²λ¦¬λ₯Ό 볡μ ν΄λ³΄μΈμ. μμ΄ μλ λ¨μΌ λ¬Έμ₯μΌλ‘ ꡬμ±λμ΄ μμ΄ μ½κ° λ€λ₯΄μ§λ§, λλ¨Έμ§λ λμΌνκ² λ³΄μΌ κ²μ λλ€. λ μ΄λ €μ΄ λμ μ μν΄μλ GLUE μμ μ€ μ΄λ€ κ²μμλ μλνλ μ μ²λ¦¬ ν¨μλ₯Ό μμ±ν΄λ³΄μΈμ.",M,U,w='π <strong>μΆκ° μ°μ΅</strong>: <a href="https://huggingface.co/docs/transformers/main/en/notebooks" rel="nofollow">π€ Transformers μμ </a>μμ μ΄λ¬ν μ€μ΅ μμ λ€μ νμΈν΄λ³΄μΈμ.';return{c(){p=u("p"),p.innerHTML=$,M=n(),U=u("p"),U.innerHTML=w},l(f){p=J(f,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1qahcvz"&&(p.innerHTML=$),M=a(f),U=J(f,"P",{"data-svelte-h":!0}),o(U)!=="svelte-p3aocu"&&(U.innerHTML=w)},m(f,h){l(f,p,h),l(f,M,h),l(f,U,h)},p:g,d(f){f&&(t(p),t(M),t(U))}}}function Kl(d){let p,$="π‘ <strong>ν΅μ¬ μμ </strong>",M,U,w="<li>μ μ²λ¦¬λ₯Ό ν¨μ¬ λΉ λ₯΄κ² νλ €λ©΄ <code>Dataset.map()</code>μμ <code>batched=True</code>λ₯Ό μ¬μ©νμΈμ</li> <li><code>DataCollatorWithPadding</code>μ μ¬μ©ν λμ ν¨λ©μ΄ κ³ μ κΈΈμ΄ ν¨λ©λ³΄λ€ ν¨μ¨μ μ λλ€</li> <li>λͺ¨λΈμ μΆλ‘ κ²°κ³Όλ¬Ό(μμΉμ ν μ, μ¬λ°λ₯Έ μ΄ μ΄λ¦)μ λ§κ² νμ λ°μ΄ν°λ₯Ό μ μ²λ¦¬νμΈμ</li> <li>π€ Datasets λΌμ΄λΈλ¬λ¦¬λ λκ·λͺ¨ ν¨μ¨μ μΈ λ°μ΄ν° μ²λ¦¬λ₯Ό μν κ°λ ₯ν λꡬλ₯Ό μ 곡ν©λλ€</li>";return{c(){p=u("p"),p.innerHTML=$,M=n(),U=u("ul"),U.innerHTML=w},l(f){p=J(f,"P",{"data-svelte-h":!0}),o(p)!=="svelte-17xh5q0"&&(p.innerHTML=$),M=a(f),U=J(f,"UL",{"data-svelte-h":!0}),o(U)!=="svelte-mib1et"&&(U.innerHTML=w)},m(f,h){l(f,p,h),l(f,M,h),l(f,U,h)},p:g,d(f){f&&(t(p),t(M),t(U))}}}function Ol(d){let p,$,M,U,w,f,h,Me,B,Wt='<a href="/course/chapter2">μ΄μ μ±ν°</a>μ μμ μ μ΄μ΄μ, ν λ°°μΉμμ μνμ€ λΆλ₯κΈ°λ₯Ό νλ ¨νλ λ°©λ²μ λ€μκ³Ό κ°μ΅λλ€.',ie,N,re,V,zt="λ¬Όλ‘ λ λ¬Έμ₯λ§μΌλ‘ λͺ¨λΈμ νλ ¨νλ κ²μΌλ‘λ λ§€μ° μ’μ κ²°κ³Όλ₯Ό μ»μ μ μμ΅λλ€. λ λμ κ²°κ³Όλ₯Ό μ»μΌλ €λ©΄ λ ν° λ°μ΄ν° μΈνΈλ₯Ό μ€λΉν΄μΌ ν©λλ€.",ce,G,Et='μ΄ μΉμ μμλ William B. Dolanκ³Ό Chris Brockettμ <a href="https://www.aclweb.org/anthology/I05-5002.pdf" rel="nofollow">λ Όλ¬Έ</a>μμ μκ°λ MRPC(Microsoft Research Paraphrase Corpus) λ°μ΄ν° μΈνΈλ₯Ό μμ λ‘ μ¬μ©νκ² μ΅λλ€. μ΄ λ°μ΄ν° μΈνΈλ 5,801κ°μ λ¬Έμ₯ μμΌλ‘ ꡬμ±λμ΄ μμΌλ©°, κ° μμ΄ ν¨λ¬νλ μ΄μ¦μΈμ§ μλμ§λ₯Ό λνλ΄λ λ μ΄λΈμ΄ μμ΅λλ€(μ¦, λ λ¬Έμ₯μ΄ κ°μ μλ―ΈμΈμ§). μ΄ μ±ν°μμ μ΄ λ°μ΄ν° μΈνΈλ₯Ό μ νν μ΄μ λ μμ λ°μ΄ν° μΈνΈμ΄λ―λ‘ νλ ¨ μ€νμ νκΈ°μ μ½κΈ° λλ¬Έμ λλ€.',me,H,ye,X,je,W,Dt='Hubμλ λͺ¨λΈλΏλ§ μλλΌ λ€μν μΈμ΄λ‘ λ μ¬λ¬ λ°μ΄ν° μΈνΈλ μμ΅λλ€. <a href="https://huggingface.co/datasets" rel="nofollow">μ¬κΈ°</a>μμ λ°μ΄ν° μΈνΈλ₯Ό μ°Ύμλ³Ό μ μμΌλ©°, μ΄ μΉμ μ μλ£ν νμλ μλ‘μ΄ λ°μ΄ν° μΈνΈλ₯Ό λ‘λνκ³ μ²λ¦¬ν΄λ³΄λ κ²μ κΆμ₯ν©λλ€(<a href="https://huggingface.co/docs/datasets/loading" rel="nofollow">μ¬κΈ°</a>μμ μΌλ°μ μΈ λ¬Έμλ₯Ό μ°Έμ‘°νμΈμ). νμ§λ§ μ§κΈμ MRPC λ°μ΄ν° μΈνΈμ μ§μ€ν΄λ³΄κ² μ΅λλ€! μ΄κ²μ <a href="https://gluebenchmark.com/" rel="nofollow">GLUE λ²€μΉλ§ν¬</a>λ₯Ό ꡬμ±νλ 10κ° λ°μ΄ν° μΈνΈ μ€ νλλ‘, 10κ°μ μλ‘ λ€λ₯Έ ν μ€νΈ λΆλ₯ μμ μ κ±Έμ³ ML λͺ¨λΈμ μ±λ₯μ μΈ‘μ νλ λ° μ¬μ©λλ νμ μ λ²€μΉλ§ν¬μ λλ€.',ue,z,St="π€ Datasets λΌμ΄λΈλ¬λ¦¬λ Hubμμ λ°μ΄ν° μΈνΈλ₯Ό λ€μ΄λ‘λνκ³ μΊμνλ λ§€μ° κ°λ¨ν λͺ λ Ήμ μ 곡ν©λλ€. MRPC λ°μ΄ν° μΈνΈλ₯Ό λ€μκ³Ό κ°μ΄ λ€μ΄λ‘λν μ μμ΅λλ€.",Je,C,oe,E,Ue,D,$e,S,Lt="보μλ€μνΌ, νλ ¨ μΈνΈ, κ²μ¦ μΈνΈ, ν μ€νΈ μΈνΈκ° ν¬ν¨λ <code>DatasetDict</code> κ°μ²΄λ₯Ό μ»μ΅λλ€. κ°κ°μ μ¬λ¬ μ΄(<code>sentence1</code>, <code>sentence2</code>, <code>label</code>, <code>idx</code>)κ³Ό κ°λ³μ μΈ ν μλ₯Ό ν¬ν¨νλ©°, μ΄λ κ° μΈνΈμ μμ μμ λλ€(λ°λΌμ νλ ¨ μΈνΈμλ 3,668κ°μ λ¬Έμ₯ μμ΄, κ²μ¦ μΈνΈμλ 408κ°κ°, ν μ€νΈ μΈνΈμλ 1,725κ°κ° μμ΅λλ€).",fe,k,Te,L,Yt="λμ λ리μ²λΌ μΈλ±μ±νμ¬ <code>raw_datasets</code> κ°μ²΄μ κ° λ¬Έμ₯ μμ μ κ·Όν μ μμ΅λλ€.",de,Y,he,F,we,q,Ft="λ μ΄λΈμ΄ μ΄λ―Έ μ μλ‘ λμ΄ μμΌλ―λ‘ μ¬κΈ°μ μ μ²λ¦¬λ₯Ό ν νμκ° μμ΅λλ€. μ΄λ€ μ μκ° μ΄λ€ λ μ΄λΈμ ν΄λΉνλμ§ μμλ³΄λ €λ©΄ <code>raw_train_dataset</code>μ <code>features</code>λ₯Ό κ²μ¬νλ©΄ λ©λλ€. μ΄κ²μ κ° μ΄μ νμ μ μλ €μ€λλ€.",be,P,xe,K,ge,O,qt="λ΄λΆμ μΌλ‘ <code>label</code>μ <code>ClassLabel</code> νμ μ΄λ©°, μ μμ λ μ΄λΈ μ΄λ¦μ λ§€νμ΄ <em>names</em> ν΄λμ μ μ₯λμ΄ μμ΅λλ€. <code>0</code>μ <code>not_equivalent</code>μ, <code>1</code>μ <code>equivalent</code>μ ν΄λΉν©λλ€.",Ce,I,ke,ss,Ie,es,_e,ts,Pt='λ°μ΄ν° μΈνΈλ₯Ό μ μ²λ¦¬νλ €λ©΄ ν μ€νΈλ₯Ό λͺ¨λΈμ΄ μ΄ν΄ν μ μλ μ«μλ‘ λ³νν΄μΌ ν©λλ€. <a href="/course/chapter2">μ΄μ μ±ν°</a>μμ 보μλ―μ΄, μ΄λ ν ν¬λμ΄μ λ‘ μνλ©λλ€. ν ν¬λμ΄μ μ ν λ¬Έμ₯μ΄λ λ¬Έμ₯ λͺ©λ‘μ μ λ ₯ν μ μμΌλ―λ‘, λ€μκ³Ό κ°μ΄ κ° μμ λͺ¨λ 첫 λ²μ§Έ λ¬Έμ₯κ³Ό λͺ¨λ λ λ²μ§Έ λ¬Έμ₯μ μ§μ ν ν°νν μ μμ΅λλ€.',Ae,ls,Qe,_,Re,ns,Kt="νμ§λ§ λ μνμ€λ₯Ό λͺ¨λΈμ μ λ¬νκΈ°λ§ ν΄μλ λ λ¬Έμ₯μ΄ ν¨λ¬νλ μ΄μ¦μΈμ§ μλμ§ μμΈ‘ν μ μμ΅λλ€. λ μνμ€λ₯Ό μμΌλ‘ μ²λ¦¬νκ³ μ μ ν μ μ²λ¦¬λ₯Ό μ μ©ν΄μΌ ν©λλ€. λ€νν ν ν¬λμ΄μ λ ν μμ μνμ€λ₯Ό λ°μμ BERT λͺ¨λΈμ΄ κΈ°λνλ λ°©μμΌλ‘ μ€λΉν μλ μμ΅λλ€.",Ze,as,ve,ps,Be,Ms,Ot='<a href="/course/chapter2">2μ₯</a>μμ <code>input_ids</code>μ <code>attention_mask</code> ν€μ λν΄ λ Όμνμ§λ§, <code>token_type_ids</code>μ λν μ΄μΌκΈ°λ λ―Έλ€λμμ΅λλ€. μ΄ μμ μμ μ΄κ²μ μ λ ₯μ μ΄λ λΆλΆμ΄ 첫 λ²μ§Έ λ¬Έμ₯μ΄κ³ μ΄λ λΆλΆμ΄ λ λ²μ§Έ λ¬Έμ₯μΈμ§ λͺ¨λΈμ μλ €μ£Όλ μν μ ν©λλ€.',Ne,A,Ve,is,sl="<code>input_ids</code> μμ IDλ₯Ό λ€μ λ¨μ΄λ‘ λμ½λ©νλ©΄",Ge,rs,He,cs,el="λ€μμ μ»μ΅λλ€.",Xe,ms,We,ys,tl="λ°λΌμ λͺ¨λΈμ λ λ¬Έμ₯μ΄ μμ λ μ λ ₯μ΄ <code>[CLS] sentence1 [SEP] sentence2 [SEP]</code> ννμ΄κΈ°λ₯Ό κΈ°λνλ€λ κ²μ μ μ μμ΅λλ€. μ΄λ₯Ό <code>token_type_ids</code>μ λ§μΆ°λ³΄λ©΄",ze,js,Ee,us,ll="보μλ€μνΌ, <code>[CLS] sentence1 [SEP]</code>μ ν΄λΉνλ μ λ ₯ λΆλΆμ λͺ¨λ ν ν° νμ IDκ° <code>0</code>μ΄κ³ , <code>sentence2 [SEP]</code>μ ν΄λΉνλ λ€λ₯Έ λΆλΆλ€μ λͺ¨λ ν ν° νμ IDκ° <code>1</code>μ λλ€.",De,Js,nl="λ€λ₯Έ 체ν¬ν¬μΈνΈ(checkpoint)λ₯Ό μ ννλ©΄ ν ν°νλ μ λ ₯μ <code>token_type_ids</code>κ° λ°λμ μμ§λ μλ€λ μ μ μ£ΌμνμΈμ(μλ₯Ό λ€μ΄, DistilBERT λͺ¨λΈμ μ¬μ©νλ©΄ λ°νλμ§ μμ΅λλ€). λͺ¨λΈμ΄ μ¬μ νλ ¨ μ€μ μ΄λ₯Ό λ³Έ μ μ΄ μμ΄μ 무μμ ν΄μΌ ν μ§ μ λλ§ λ°νλ©λλ€.",Se,os,al='μ¬κΈ°μ BERTλ ν ν° νμ IDλ‘ μ¬μ νλ ¨λμμΌλ©°, <a href="/course/chapter1">1μ₯</a>μμ μ΄μΌκΈ°ν λ§μ€ν¬λ μΈμ΄ λͺ¨λΈλ§ λͺ©ν μΈμλ <em>λ€μ λ¬Έμ₯ μμΈ‘</em>μ΄λΌλ μΆκ° λͺ©νλ₯Ό κ°μ§κ³ μμ΅λλ€. μ΄ μμ μ λͺ©νλ λ¬Έμ₯ μ κ°μ κ΄κ³λ₯Ό λͺ¨λΈλ§νλ κ²μ λλ€.',Le,Us,pl="λ€μ λ¬Έμ₯ μμΈ‘μμλ λͺ¨λΈμ λ¬Έμ₯ μ(무μμλ‘ λ§μ€νΉλ ν ν°κ³Ό ν¨κ»)μ΄ μ 곡λκ³ λ λ²μ§Έ λ¬Έμ₯μ΄ μ²« λ²μ§Έ λ¬Έμ₯μ λ°λ₯΄λμ§ μμΈ‘νλλ‘ μμ²λ°μ΅λλ€. μμ μ μ½μ§ μκ² λ§λ€κΈ° μν΄, μ λ°μ κ²½μ°μλ λ¬Έμ₯λ€μ΄ μΆμΆλ μλ³Έ λ¬Έμμμ μλ‘λ₯Ό λ°λ₯΄κ³ , λλ¨Έμ§ μ λ°μ κ²½μ°μλ λ λ¬Έμ₯μ΄ μλ‘ λ€λ₯Έ λ¬Έμμμ λμ΅λλ€.",Ye,$s,Ml="μΌλ°μ μΌλ‘ ν ν°νλ μ λ ₯μ <code>token_type_ids</code>κ° μλμ§ μ¬λΆμ λν΄ κ±±μ ν νμλ μμ΅λλ€. ν ν¬λμ΄μ μ λͺ¨λΈμ λμΌν 체ν¬ν¬μΈνΈ(checkpoint)λ₯Ό μ¬μ©νλ ν, ν ν¬λμ΄μ κ° λͺ¨λΈμ μ 곡ν΄μΌ ν κ²μ μκ³ μμΌλ―λ‘ λͺ¨λ κ²μ΄ μ λ κ²μ λλ€.",Fe,fs,il='μ΄μ ν ν¬λμ΄μ κ° ν μμ λ¬Έμ₯μ μ΄λ»κ² μ²λ¦¬ν μ μλμ§ λ³΄μμΌλ―λ‘, μ΄λ₯Ό μ¬μ©νμ¬ μ 체 λ°μ΄ν° μΈνΈλ₯Ό ν ν°νν μ μμ΅λλ€: <a href="/course/chapter2">μ΄μ μ±ν°</a>μμμ²λΌ, 첫 λ²μ§Έ λ¬Έμ₯ λͺ©λ‘κ³Ό λ λ²μ§Έ λ¬Έμ₯ λͺ©λ‘μ μ 곡νμ¬ ν ν¬λμ΄μ μ λ¬Έμ₯ μ λͺ©λ‘μ μ λ ₯ν μ μμ΅λλ€. μ΄λ <a href="/course/chapter2">2μ₯</a>μμ λ³Έ ν¨λ©κ³Ό μλ΅ μ΅μ κ³Όλ νΈνλ©λλ€. λ°λΌμ νλ ¨ λ°μ΄ν° μΈνΈλ₯Ό μ μ²λ¦¬νλ ν κ°μ§ λ°©λ²μ',qe,Ts,Pe,ds,rl='μ΄κ²μ μ μλνμ§λ§, λμ λ리(ν€λ <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code>μ΄κ³ κ°μ λͺ©λ‘μ λͺ©λ‘)λ₯Ό λ°ννλ€λ λ¨μ μ΄ μμ΅λλ€. λν ν ν°ν μ€μ μ 체 λ°μ΄ν° μΈνΈλ₯Ό λ©λͺ¨λ¦¬μ μ μ₯ν μ μλ μΆ©λΆν RAMμ΄ μλ κ²½μ°μλ§ μλν©λλ€(π€ Datasets λΌμ΄λΈλ¬λ¦¬μ λ°μ΄ν° μΈνΈλ λμ€ν¬μ μ μ₯λ <a href="https://arrow.apache.org/" rel="nofollow">Apache Arrow</a> νμΌμ΄λ―λ‘, μμ²ν μνλ§ λ©λͺ¨λ¦¬μ λ‘λλ©λλ€).',Ke,hs,cl='λ°μ΄ν°λ₯Ό λ°μ΄ν° μΈνΈλ‘ μ μ§νλ €λ©΄ <a href="https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.map" rel="nofollow"><code>Dataset.map()</code></a> λ©μλλ₯Ό μ¬μ©νκ² μ΅λλ€. μ΄λ ν ν°ν μ΄μμ μ μ²λ¦¬κ° νμν κ²½μ° μΆκ°μ μΈ μ μ°μ±λ μ 곡ν©λλ€. <code>map()</code> λ©μλλ λ°μ΄ν° μΈνΈμ κ° μμμ ν¨μλ₯Ό μ μ©νμ¬ μλνλ―λ‘, μ λ ₯μ ν ν°ννλ ν¨μλ₯Ό μ μν΄λ³΄κ² μ΅λλ€.',Oe,ws,st,bs,ml='μ΄ ν¨μλ λμ λ리(λ°μ΄ν° μΈνΈμ νλͺ©κ³Ό κ°μ)λ₯Ό λ°μμ <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code> ν€κ° μλ μ λμ λ리λ₯Ό λ°νν©λλ€. <code>example</code> λμ λ리μ μ¬λ¬ μνμ΄ ν¬ν¨λμ΄ μμ΄λ(κ° ν€κ° λ¬Έμ₯ λͺ©λ‘μΌλ‘) μλνλ€λ μ μ μ£Όλͺ©νμΈμ. μμ λ³Έ κ²μ²λΌ <code>tokenizer</code>λ λ¬Έμ₯ μμ λͺ©λ‘μμ μλνκΈ° λλ¬Έμ λλ€. μ΄λ₯Ό ν΅ν΄ <code>map()</code> νΈμΆμμ <code>batched=True</code> μ΅μ μ μ¬μ©ν μ μμΌλ©°, μ΄λ ν ν°νλ₯Ό ν¬κ² κ°μνν κ²μ λλ€. <code>tokenizer</code>λ <a href="https://github.com/huggingface/tokenizers" rel="nofollow">π€ Tokenizers</a> λΌμ΄λΈλ¬λ¦¬μ Rustλ‘ μμ±λ ν ν¬λμ΄μ λ‘ λ·λ°μΉ¨λ©λλ€. μ΄ ν ν¬λμ΄μ λ λ§€μ° λΉ λ₯Ό μ μμ§λ§, ν λ²μ λ§μ μ λ ₯μ μ 곡ν΄μΌλ§ κ·Έλ μ΅λλ€.',et,xs,yl="μ§κΈμ ν ν°ν ν¨μμμ <code>padding</code> μΈμλ₯Ό λΉΌλ κ²μ μ£Όλͺ©νμΈμ. λͺ¨λ μνμ μ΅λ κΈΈμ΄λ‘ ν¨λ©νλ κ²μ ν¨μ¨μ μ΄μ§ μκΈ° λλ¬Έμ λλ€. λ°°μΉλ₯Ό λ§λ€ λ μνμ ν¨λ©νλ κ²μ΄ λ μ’μ΅λλ€. κ·Έλ¬λ©΄ ν΄λΉ λ°°μΉμ μ΅λ κΈΈμ΄κΉμ§λ§ ν¨λ©νλ©΄ λκ³ , μ 체 λ°μ΄ν° μΈνΈμ μ΅λ κΈΈμ΄κΉμ§ ν¨λ©ν νμκ° μκΈ° λλ¬Έμ λλ€. μ λ ₯μ κΈΈμ΄κ° λ§€μ° λ€μν λ λ§μ μκ°κ³Ό μ²λ¦¬ λ₯λ ₯μ μ μ½ν μ μμ΅λλ€!",tt,Q,lt,gs,jl="λ€μμ λͺ¨λ λ°μ΄ν° μΈνΈμ ν ν°ν ν¨μλ₯Ό ν λ²μ μ μ©νλ λ°©λ²μ λλ€. <code>map</code> νΈμΆμμ <code>batched=True</code>λ₯Ό μ¬μ©νλ―λ‘ ν¨μκ° λ°μ΄ν° μΈνΈμ κ° μμμ κ°λ³μ μΌλ‘κ° μλλΌ μ¬λ¬ μμμ ν λ²μ μ μ©λ©λλ€. μ΄λ₯Ό ν΅ν΄ λ λΉ λ₯Έ μ μ²λ¦¬κ° κ°λ₯ν©λλ€.",nt,Cs,at,ks,ul="π€ Datasets λΌμ΄λΈλ¬λ¦¬κ° μ΄ μ²λ¦¬λ₯Ό μ μ©νλ λ°©μμ μ μ²λ¦¬ ν¨μκ° λ°ννλ λμ λ리μ κ° ν€μ λν΄ λ°μ΄ν° μΈνΈμ μ νλλ₯Ό μΆκ°νλ κ²μ λλ€.",pt,Is,Mt,_s,Jl="<code>num_proc</code> μΈμλ₯Ό μ λ¬νμ¬ <code>map()</code>μΌλ‘ μ μ²λ¦¬ ν¨μλ₯Ό μ μ©ν λ λ©ν°νλ‘μΈμ±μ μ¬μ©ν μλ μμ΅λλ€. π€ Tokenizers λΌμ΄λΈλ¬λ¦¬κ° μ΄λ―Έ μ¬λ¬ μ€λ λλ₯Ό μ¬μ©νμ¬ μνμ λ λΉ λ₯΄κ² ν ν°ννλ―λ‘ μ¬κΈ°μλ μ΄λ₯Ό μ¬μ©νμ§ μμμ§λ§, μ΄ λΌμ΄λΈλ¬λ¦¬κ° λ·λ°μΉ¨νλ λΉ λ₯Έ ν ν¬λμ΄μ λ₯Ό μ¬μ©νμ§ μλλ€λ©΄ μ μ²λ¦¬ μλλ₯Ό λμΌ μ μμ΅λλ€.",it,As,ol="μ°λ¦¬μ <code>tokenize_function</code>μ <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code> ν€κ° μλ λμ λ리λ₯Ό λ°ννλ―λ‘, μ΄ μΈ νλκ° λ°μ΄ν° μΈνΈμ λͺ¨λ λΆν μ μΆκ°λ©λλ€. μ μ²λ¦¬ ν¨μκ° <code>map()</code>μ μ μ©ν λ°μ΄ν° μΈνΈμ κΈ°μ‘΄ ν€μ λν μ κ°μ λ°ννλ€λ©΄ κΈ°μ‘΄ νλλ₯Ό λ³κ²½ν μλ μμμ κ²μ λλ€.",rt,Qs,Ul="λ§μ§λ§μΌλ‘ ν΄μΌ ν μΌμ μμλ€μ λ°°μΉλ‘ λ¬Άμ λ λͺ¨λ μμ λ₯Ό κ°μ₯ κΈ΄ μμμ κΈΈμ΄λ‘ ν¨λ©νλ κ²μ λλ€ β μ΄ κΈ°λ²μ <em>λμ ν¨λ©</em>μ΄λΌκ³ ν©λλ€.",ct,Rs,mt,Zs,yt,vs,$l="λ°°μΉ λ΄μμ μνλ€μ ν¨κ» λ°°μΉνλ μν μ νλ ν¨μλ₯Ό <em>collate function</em>μ΄λΌκ³ ν©λλ€. μ΄λ <code>DataLoader</code>λ₯Ό ꡬμΆν λ μ λ¬ν μ μλ μΈμλ‘, κΈ°λ³Έκ°μ μνμ PyTorch ν μλ‘ λ³ννκ³ μ°κ²°νλ ν¨μμ λλ€(μμκ° λͺ©λ‘, νν λλ λμ λλ¦¬μΈ κ²½μ° μ¬κ·μ μΌλ‘). μ°λ¦¬μ κ²½μ° μ λ ₯μ΄ λͺ¨λ κ°μ ν¬κΈ°κ° μλλ―λ‘ μ΄κ²μ λΆκ°λ₯ν κ²μ λλ€. μ°λ¦¬λ μλμ μΌλ‘ ν¨λ©μ μ°κΈ°νμ¬ κ° λ°°μΉμμλ§ νμμ λ°λΌ μ μ©νκ³ λ§μ ν¨λ©μ΄ μλ μ§λμΉκ² κΈ΄ μ λ ₯μ νΌνμ΅λλ€. μ΄κ²μ νλ ¨μ μλΉν κ°μνν κ²μ΄μ§λ§, TPUμμ νλ ¨νλ κ²½μ° λ¬Έμ λ₯Ό μΌμΌν¬ μ μλ€λ μ μ μ£ΌμνμΈμ β TPUλ μΆκ° ν¨λ©μ΄ νμνλλΌλ κ³ μ λ λͺ¨μμ μ νΈν©λλ€.",jt,R,ut,Bs,fl="μ€μ λ‘ μ΄λ₯Ό μννλ €λ©΄ ν¨κ» λ°°μΉνλ €λ λ°μ΄ν° μΈνΈ νλͺ©μ μ μ ν μμ ν¨λ©μ μ μ©ν collate functionμ μ μν΄μΌ ν©λλ€. λ€νν π€ Transformers λΌμ΄λΈλ¬λ¦¬λ <code>DataCollatorWithPadding</code>μ ν΅ν΄ μ΄λ¬ν ν¨μλ₯Ό μ 곡ν©λλ€. μΈμ€ν΄μ€νν λ ν ν¬λμ΄μ λ₯Ό λ°μμ(μ΄λ€ ν¨λ© ν ν°μ μ¬μ©ν μ§, λͺ¨λΈμ΄ μ λ ₯μ μΌμͺ½ λλ μ€λ₯Έμͺ½μ ν¨λ©μ κΈ°λνλμ§ μκΈ° μν΄) νμν λͺ¨λ κ²μ μνν©λλ€.",Jt,Ns,ot,Vs,Tl="μ΄ μλ‘μ΄ λꡬλ₯Ό ν μ€νΈνκΈ° μν΄, ν¨κ» λ°°μΉνκ³ μΆμ νλ ¨ μΈνΈμμ λͺ κ°μ μνμ κ°μ Έμλ³΄κ² μ΅λλ€. μ¬κΈ°μλ <code>idx</code>, <code>sentence1</code>, <code>sentence2</code> μ΄μ μ κ±°ν©λλ€. μ΄λ€μ νμνμ§ μκ³ λ¬Έμμ΄μ ν¬ν¨νκ³ μμΌλ©°(λ¬Έμμ΄λ‘λ ν μλ₯Ό λ§λ€ μ μμ), λ°°μΉμ κ° νλͺ© κΈΈμ΄λ₯Ό μ΄ν΄λ³΄κ² μ΅λλ€.",Ut,Gs,$t,Hs,ft,Xs,dl="λΉμ°ν 32λΆν° 67κΉμ§ λ€μν κΈΈμ΄μ μνμ μ»μ΅λλ€. λμ ν¨λ©μ μ΄ λ°°μΉμ μνλ€μ΄ λͺ¨λ λ°°μΉ λ΄ μ΅λ κΈΈμ΄μΈ 67λ‘ ν¨λ©λμ΄μΌ ν¨μ μλ―Έν©λλ€. λμ ν¨λ©μ΄ μλ€λ©΄, λͺ¨λ μνμ΄ μ 체 λ°μ΄ν° μΈνΈμ μ΅λ κΈΈμ΄λ λͺ¨λΈμ΄ λ°μ μ μλ μ΅λ κΈΈμ΄λ‘ ν¨λ©λμ΄μΌ ν κ²μ λλ€. <code>data_collator</code>κ° λ°°μΉλ₯Ό λμ μΌλ‘ μ¬λ°λ₯΄κ² ν¨λ©νλμ§ λ€μ νμΈν΄λ³΄κ² μ΅λλ€.",Tt,Ws,dt,zs,ht,Es,hl="μ’μ 보μ λλ€! μ΄μ μμ ν μ€νΈμμ λͺ¨λΈμ΄ μ²λ¦¬ν μ μλ λ°°μΉκΉμ§ λ§λ€μμΌλ―λ‘, λ―ΈμΈ μ‘°μ ν μ€λΉκ° λμμ΅λλ€!",wt,Z,bt,Ds,wl="μλ²½ν©λλ€! μ΄μ π€ Datasets λΌμ΄λΈλ¬λ¦¬μ μ΅μ λͺ¨λ² μ¬λ‘λ‘ λ°μ΄ν°λ₯Ό μ μ²λ¦¬νμΌλ―λ‘, μ΅μ Trainer APIλ₯Ό μ¬μ©νμ¬ λͺ¨λΈμ νλ ¨ν μ€λΉκ° λμμ΅λλ€. λ€μ μΉμ μμλ Hugging Face μνκ³μμ μ¬μ©ν μ μλ μ΅μ κΈ°λ₯κ³Ό μ΅μ νλ₯Ό μ¬μ©νμ¬ λͺ¨λΈμ ν¨κ³Όμ μΌλ‘ λ―ΈμΈ μ‘°μ νλ λ°©λ²μ 보μ¬λλ¦¬κ² μ΅λλ€.",xt,Ss,gt,Ls,bl="λ°μ΄ν° μ²λ¦¬ κ°λ μ λν μ΄ν΄λλ₯Ό ν μ€νΈν΄λ³΄μΈμ.",Ct,Ys,kt,Fs,It,qs,_t,Ps,At,Ks,Qt,Os,Rt,se,Zt,ee,vt,te,Bt,le,Nt,v,Vt,ne,Gt,ae,Ht;return w=new b({props:{title:"λ°μ΄ν° μ²λ¦¬",local:"processing-the-data",headingTag:"h1"}}),h=new Wl({props:{chapter:3,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter3/section2.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter3/section2.ipynb"}]}}),N=new T({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdG9yY2gub3B0aW0lMjBpbXBvcnQlMjBBZGFtVyUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvVG9rZW5pemVyJTJDJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQSUyMyUyMCVFQyU5RCVCNCVFQyVBMCU4NCVFQSVCMyVCQyUyMCVFQiU4RiU5OSVFQyU5RCVCQyUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJiZXJ0LWJhc2UtdW5jYXNlZCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQXNlcXVlbmNlcyUyMCUzRCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMkkndmUlMjBiZWVuJTIwd2FpdGluZyUyMGZvciUyMGElMjBIdWdnaW5nRmFjZSUyMGNvdXJzZSUyMG15JTIwd2hvbGUlMjBsaWZlLiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMlRoaXMlMjBjb3Vyc2UlMjBpcyUyMGFtYXppbmchJTIyJTJDJTBBJTVEJTBBYmF0Y2glMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwJUVDJTk3JUFDJUVBJUI4JUIwJUVBJUIwJTgwJTIwJUVDJTgzJTg4JUVCJUExJTlDJUVDJTlBJUI0JTIwJUVCJUI2JTgwJUVCJUI2JTg0JTBBYmF0Y2glNUIlMjJsYWJlbHMlMjIlNUQlMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCMSUyQyUyMDElNUQpJTBBJTBBb3B0aW1pemVyJTIwJTNEJTIwQWRhbVcobW9kZWwucGFyYW1ldGVycygpKSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmJhdGNoKS5sb3NzJTBBbG9zcy5iYWNrd2FyZCgpJTBBb3B0aW1pemVyLnN0ZXAoKQ==",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification | |
| <span class="hljs-comment"># μ΄μ κ³Ό λμΌ</span> | |
| checkpoint = <span class="hljs-string">"bert-base-uncased"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [ | |
| <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, | |
| <span class="hljs-string">"This course is amazing!"</span>, | |
| ] | |
| batch = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># μ¬κΈ°κ° μλ‘μ΄ λΆλΆ</span> | |
| batch[<span class="hljs-string">"labels"</span>] = torch.tensor([<span class="hljs-number">1</span>, <span class="hljs-number">1</span>]) | |
| optimizer = AdamW(model.parameters()) | |
| loss = model(**batch).loss | |
| loss.backward() | |
| optimizer.step()`,wrap:!1}}),H=new b({props:{title:"Hubμμ λ°μ΄ν° μΈνΈ κ°μ Έμ€κΈ°",local:"loading-a-dataset-from-the-hub",headingTag:"h3"}}),X=new Xt({props:{id:"_BZearw7f0w"}}),C=new x({props:{$$slots:{default:[El]},$$scope:{ctx:d}}}),E=new T({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBcmF3X2RhdGFzZXRzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmdsdWUlMjIlMkMlMjAlMjJtcnBjJTIyKSUwQXJhd19kYXRhc2V0cw==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| raw_datasets = load_dataset(<span class="hljs-string">"glue"</span>, <span class="hljs-string">"mrpc"</span>) | |
| raw_datasets`,wrap:!1}}),D=new T({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCdsYWJlbCclMkMlMjAnaWR4JyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMzY2OCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB2YWxpZGF0aW9uJTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidzZW50ZW5jZTEnJTJDJTIwJ3NlbnRlbmNlMiclMkMlMjAnbGFiZWwnJTJDJTIwJ2lkeCclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDQwOCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB0ZXN0JTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidzZW50ZW5jZTEnJTJDJTIwJ3NlbnRlbmNlMiclMkMlMjAnbGFiZWwnJTJDJTIwJ2lkeCclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDE3MjUlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'idx'</span>], | |
| num_rows: <span class="hljs-number">3668</span> | |
| }) | |
| validation: Dataset({ | |
| features: [<span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'idx'</span>], | |
| num_rows: <span class="hljs-number">408</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'idx'</span>], | |
| num_rows: <span class="hljs-number">1725</span> | |
| }) | |
| })`,wrap:!1}}),k=new x({props:{$$slots:{default:[Dl]},$$scope:{ctx:d}}}),Y=new T({props:{code:"cmF3X3RyYWluX2RhdGFzZXQlMjAlM0QlMjByYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCUwQXJhd190cmFpbl9kYXRhc2V0JTVCMCU1RA==",highlighted:`raw_train_dataset = raw_datasets[<span class="hljs-string">"train"</span>] | |
| raw_train_dataset[<span class="hljs-number">0</span>]`,wrap:!1}}),F=new T({props:{code:"JTdCJ2lkeCclM0ElMjAwJTJDJTBBJTIwJ2xhYmVsJyUzQSUyMDElMkMlMEElMjAnc2VudGVuY2UxJyUzQSUyMCdBbXJvemklMjBhY2N1c2VkJTIwaGlzJTIwYnJvdGhlciUyMCUyQyUyMHdob20lMjBoZSUyMGNhbGxlZCUyMCUyMiUyMHRoZSUyMHdpdG5lc3MlMjAlMjIlMjAlMkMlMjBvZiUyMGRlbGliZXJhdGVseSUyMGRpc3RvcnRpbmclMjBoaXMlMjBldmlkZW5jZSUyMC4nJTJDJTBBJTIwJ3NlbnRlbmNlMiclM0ElMjAnUmVmZXJyaW5nJTIwdG8lMjBoaW0lMjBhcyUyMG9ubHklMjAlMjIlMjB0aGUlMjB3aXRuZXNzJTIwJTIyJTIwJTJDJTIwQW1yb3ppJTIwYWNjdXNlZCUyMGhpcyUyMGJyb3RoZXIlMjBvZiUyMGRlbGliZXJhdGVseSUyMGRpc3RvcnRpbmclMjBoaXMlMjBldmlkZW5jZSUyMC4nJTdE",highlighted:`{<span class="hljs-string">'idx'</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">'label'</span>: <span class="hljs-number">1</span>, | |
| <span class="hljs-string">'sentence1'</span>: <span class="hljs-string">'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .'</span>, | |
| <span class="hljs-string">'sentence2'</span>: <span class="hljs-string">'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .'</span>}`,wrap:!1}}),P=new T({props:{code:"cmF3X3RyYWluX2RhdGFzZXQuZmVhdHVyZXM=",highlighted:"raw_train_dataset.features",wrap:!1}}),K=new T({props:{code:"JTdCJ3NlbnRlbmNlMSclM0ElMjBWYWx1ZShkdHlwZSUzRCdzdHJpbmcnJTJDJTIwaWQlM0ROb25lKSUyQyUwQSUyMCdzZW50ZW5jZTInJTNBJTIwVmFsdWUoZHR5cGUlM0Qnc3RyaW5nJyUyQyUyMGlkJTNETm9uZSklMkMlMEElMjAnbGFiZWwnJTNBJTIwQ2xhc3NMYWJlbChudW1fY2xhc3NlcyUzRDIlMkMlMjBuYW1lcyUzRCU1Qidub3RfZXF1aXZhbGVudCclMkMlMjAnZXF1aXZhbGVudCclNUQlMkMlMjBuYW1lc19maWxlJTNETm9uZSUyQyUyMGlkJTNETm9uZSklMkMlMEElMjAnaWR4JyUzQSUyMFZhbHVlKGR0eXBlJTNEJ2ludDMyJyUyQyUyMGlkJTNETm9uZSklN0Q=",highlighted:`{<span class="hljs-string">'sentence1'</span>: Value(dtype=<span class="hljs-string">'string'</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), | |
| <span class="hljs-string">'sentence2'</span>: Value(dtype=<span class="hljs-string">'string'</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), | |
| <span class="hljs-string">'label'</span>: ClassLabel(num_classes=<span class="hljs-number">2</span>, names=[<span class="hljs-string">'not_equivalent'</span>, <span class="hljs-string">'equivalent'</span>], names_file=<span class="hljs-literal">None</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), | |
| <span class="hljs-string">'idx'</span>: Value(dtype=<span class="hljs-string">'int32'</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>)}`,wrap:!1}}),I=new x({props:{$$slots:{default:[Sl]},$$scope:{ctx:d}}}),ss=new b({props:{title:"λ°μ΄ν° μΈνΈ μ μ²λ¦¬",local:"preprocessing-a-dataset",headingTag:"h3"}}),es=new Xt({props:{id:"0u3ioSwev3s"}}),ls=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyYmVydC1iYXNlLXVuY2FzZWQlMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQXRva2VuaXplZF9zZW50ZW5jZXNfMSUyMCUzRCUyMHRva2VuaXplcihyYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCU1QiUyMnNlbnRlbmNlMSUyMiU1RCklMEF0b2tlbml6ZWRfc2VudGVuY2VzXzIlMjAlM0QlMjB0b2tlbml6ZXIocmF3X2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUIlMjJzZW50ZW5jZTIlMjIlNUQp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| checkpoint = <span class="hljs-string">"bert-base-uncased"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| tokenized_sentences_1 = tokenizer(raw_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-string">"sentence1"</span>]) | |
| tokenized_sentences_2 = tokenizer(raw_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-string">"sentence2"</span>])`,wrap:!1}}),_=new x({props:{$$slots:{default:[Ll]},$$scope:{ctx:d}}}),as=new T({props:{code:"aW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKCUyMlRoaXMlMjBpcyUyMHRoZSUyMGZpcnN0JTIwc2VudGVuY2UuJTIyJTJDJTIwJTIyVGhpcyUyMGlzJTIwdGhlJTIwc2Vjb25kJTIwb25lLiUyMiklMEFpbnB1dHM=",highlighted:`inputs = tokenizer(<span class="hljs-string">"This is the first sentence."</span>, <span class="hljs-string">"This is the second one."</span>) | |
| inputs`,wrap:!1}}),ps=new T({props:{code:"JTdCJTIwJTBBJTIwJTIwJ2lucHV0X2lkcyclM0ElMjAlNUIxMDElMkMlMjAyMDIzJTJDJTIwMjAwMyUyQyUyMDE5OTYlMkMlMjAyMDM0JTJDJTIwNjI1MSUyQyUyMDEwMTIlMkMlMjAxMDIlMkMlMjAyMDIzJTJDJTIwMjAwMyUyQyUyMDE5OTYlMkMlMjAyMTE3JTJDJTIwMjAyOCUyQyUyMDEwMTIlMkMlMjAxMDIlNUQlMkMlMEElMjAlMjAndG9rZW5fdHlwZV9pZHMnJTNBJTIwJTVCMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTVEJTJDJTBBJTIwJTIwJ2F0dGVudGlvbl9tYXNrJyUzQSUyMCU1QjElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSU1RCUwQSU3RA==",highlighted:`{ | |
| <span class="hljs-string">'input_ids'</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2023</span>, <span class="hljs-number">2003</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">2034</span>, <span class="hljs-number">6251</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>, <span class="hljs-number">2023</span>, <span class="hljs-number">2003</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">2117</span>, <span class="hljs-number">2028</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>], | |
| <span class="hljs-string">'token_type_ids'</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>], | |
| <span class="hljs-string">'attention_mask'</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>] | |
| }`,wrap:!1}}),A=new x({props:{$$slots:{default:[Yl]},$$scope:{ctx:d}}}),rs=new T({props:{code:"dG9rZW5pemVyLmNvbnZlcnRfaWRzX3RvX3Rva2VucyhpbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQp",highlighted:'tokenizer.convert_ids_to_tokens(inputs[<span class="hljs-string">"input_ids"</span>])',wrap:!1}}),ms=new T({props:{code:"JTVCJyU1QkNMUyU1RCclMkMlMjAndGhpcyclMkMlMjAnaXMnJTJDJTIwJ3RoZSclMkMlMjAnZmlyc3QnJTJDJTIwJ3NlbnRlbmNlJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTJDJTIwJ3RoaXMnJTJDJTIwJ2lzJyUyQyUyMCd0aGUnJTJDJTIwJ3NlY29uZCclMkMlMjAnb25lJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTVE",highlighted:'[<span class="hljs-string">'[CLS]'</span>, <span class="hljs-string">'this'</span>, <span class="hljs-string">'is'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'first'</span>, <span class="hljs-string">'sentence'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>, <span class="hljs-string">'this'</span>, <span class="hljs-string">'is'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'second'</span>, <span class="hljs-string">'one'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>]',wrap:!1}}),js=new T({props:{code:"JTVCJyU1QkNMUyU1RCclMkMlMjAndGhpcyclMkMlMjAnaXMnJTJDJTIwJ3RoZSclMkMlMjAnZmlyc3QnJTJDJTIwJ3NlbnRlbmNlJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTJDJTIwJ3RoaXMnJTJDJTIwJ2lzJyUyQyUyMCd0aGUnJTJDJTIwJ3NlY29uZCclMkMlMjAnb25lJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTVEJTBBJTVCJTIwJTIwJTIwJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAwJTJDJTIwJTIwJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAwJTJDJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAlMjAlMjAxJTJDJTIwJTIwJTIwJTIwMSUyQyUyMCUyMCUyMCUyMCUyMDElMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAxJTJDJTIwJTIwJTIwJTIwJTIwMSUyQyUyMCUyMCUyMDElMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAxJTVE",highlighted:`[<span class="hljs-string">'[CLS]'</span>, <span class="hljs-string">'this'</span>, <span class="hljs-string">'is'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'first'</span>, <span class="hljs-string">'sentence'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>, <span class="hljs-string">'this'</span>, <span class="hljs-string">'is'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'second'</span>, <span class="hljs-string">'one'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>] | |
| [ <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]`,wrap:!1}}),Ts=new T({props:{code:"dG9rZW5pemVkX2RhdGFzZXQlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwcmF3X2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUIlMjJzZW50ZW5jZTElMjIlNUQlMkMlMEElMjAlMjAlMjAlMjByYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCU1QiUyMnNlbnRlbmNlMiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMHBhZGRpbmclM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMEEp",highlighted:`tokenized_dataset = tokenizer( | |
| raw_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-string">"sentence1"</span>], | |
| raw_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-string">"sentence2"</span>], | |
| padding=<span class="hljs-literal">True</span>, | |
| truncation=<span class="hljs-literal">True</span>, | |
| )`,wrap:!1}}),ws=new T({props:{code:"ZGVmJTIwdG9rZW5pemVfZnVuY3Rpb24oZXhhbXBsZSklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoZXhhbXBsZSU1QiUyMnNlbnRlbmNlMSUyMiU1RCUyQyUyMGV4YW1wbGUlNUIlMjJzZW50ZW5jZTIlMjIlNUQlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_function</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> tokenizer(example[<span class="hljs-string">"sentence1"</span>], example[<span class="hljs-string">"sentence2"</span>], truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),Q=new x({props:{$$slots:{default:[Fl]},$$scope:{ctx:d}}}),Cs=new T({props:{code:"dG9rZW5pemVkX2RhdGFzZXRzJTIwJTNEJTIwcmF3X2RhdGFzZXRzLm1hcCh0b2tlbml6ZV9mdW5jdGlvbiUyQyUyMGJhdGNoZWQlM0RUcnVlKSUwQXRva2VuaXplZF9kYXRhc2V0cw==",highlighted:`tokenized_datasets = raw_datasets.<span class="hljs-built_in">map</span>(tokenize_function, batched=<span class="hljs-literal">True</span>) | |
| tokenized_datasets`,wrap:!1}}),Is=new T({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpZHgnJTJDJTIwJ2lucHV0X2lkcyclMkMlMjAnbGFiZWwnJTJDJTIwJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCd0b2tlbl90eXBlX2lkcyclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDM2NjglMEElMjAlMjAlMjAlMjAlN0QpJTBBJTIwJTIwJTIwJTIwdmFsaWRhdGlvbiUzQSUyMERhdGFzZXQoJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZmVhdHVyZXMlM0ElMjAlNUInYXR0ZW50aW9uX21hc2snJTJDJTIwJ2lkeCclMkMlMjAnaW5wdXRfaWRzJyUyQyUyMCdsYWJlbCclMkMlMjAnc2VudGVuY2UxJyUyQyUyMCdzZW50ZW5jZTInJTJDJTIwJ3Rva2VuX3R5cGVfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNDA4JTBBJTIwJTIwJTIwJTIwJTdEKSUwQSUyMCUyMCUyMCUyMHRlc3QlM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpZHgnJTJDJTIwJ2lucHV0X2lkcyclMkMlMjAnbGFiZWwnJTJDJTIwJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCd0b2tlbl90eXBlX2lkcyclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDE3MjUlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'idx'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'token_type_ids'</span>], | |
| num_rows: <span class="hljs-number">3668</span> | |
| }) | |
| validation: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'idx'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'token_type_ids'</span>], | |
| num_rows: <span class="hljs-number">408</span> | |
| }) | |
| test: Dataset({ | |
| features: [<span class="hljs-string">'attention_mask'</span>, <span class="hljs-string">'idx'</span>, <span class="hljs-string">'input_ids'</span>, <span class="hljs-string">'label'</span>, <span class="hljs-string">'sentence1'</span>, <span class="hljs-string">'sentence2'</span>, <span class="hljs-string">'token_type_ids'</span>], | |
| num_rows: <span class="hljs-number">1725</span> | |
| }) | |
| })`,wrap:!1}}),Rs=new b({props:{title:"λμ ν¨λ©",local:"dynamic-padding",headingTag:"h5"}}),Zs=new Xt({props:{id:"7q5NyFT8REg"}}),R=new x({props:{$$slots:{default:[ql]},$$scope:{ctx:d}}}),Ns=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplcik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding | |
| data_collator = DataCollatorWithPadding(tokenizer=tokenizer)`,wrap:!1}}),Gs=new T({props:{code:"c2FtcGxlcyUyMCUzRCUyMHRva2VuaXplZF9kYXRhc2V0cyU1QiUyMnRyYWluJTIyJTVEJTVCJTNBOCU1RCUwQXNhbXBsZXMlMjAlM0QlMjAlN0JrJTNBJTIwdiUyMGZvciUyMGslMkMlMjB2JTIwaW4lMjBzYW1wbGVzLml0ZW1zKCklMjBpZiUyMGslMjBub3QlMjBpbiUyMCU1QiUyMmlkeCUyMiUyQyUyMCUyMnNlbnRlbmNlMSUyMiUyQyUyMCUyMnNlbnRlbmNlMiUyMiU1RCU3RCUwQSU1Qmxlbih4KSUyMGZvciUyMHglMjBpbiUyMHNhbXBsZXMlNUIlMjJpbnB1dF9pZHMlMjIlNUQlNUQ=",highlighted:`samples = tokenized_datasets[<span class="hljs-string">"train"</span>][:<span class="hljs-number">8</span>] | |
| samples = {k: v <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> samples.items() <span class="hljs-keyword">if</span> k <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> [<span class="hljs-string">"idx"</span>, <span class="hljs-string">"sentence1"</span>, <span class="hljs-string">"sentence2"</span>]} | |
| [<span class="hljs-built_in">len</span>(x) <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> samples[<span class="hljs-string">"input_ids"</span>]]`,wrap:!1}}),Hs=new T({props:{code:"JTVCNTAlMkMlMjA1OSUyQyUyMDQ3JTJDJTIwNjclMkMlMjA1OSUyQyUyMDUwJTJDJTIwNjIlMkMlMjAzMiU1RA==",highlighted:'[<span class="hljs-number">50</span>, <span class="hljs-number">59</span>, <span class="hljs-number">47</span>, <span class="hljs-number">67</span>, <span class="hljs-number">59</span>, <span class="hljs-number">50</span>, <span class="hljs-number">62</span>, <span class="hljs-number">32</span>]',wrap:!1}}),Ws=new T({props:{code:"YmF0Y2glMjAlM0QlMjBkYXRhX2NvbGxhdG9yKHNhbXBsZXMpJTBBJTdCayUzQSUyMHYuc2hhcGUlMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwYmF0Y2guaXRlbXMoKSU3RA==",highlighted:`batch = data_collator(samples) | |
| {k: v.shape <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()}`,wrap:!1}}),zs=new T({props:{code:"JTdCJ2F0dGVudGlvbl9tYXNrJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY3JTVEKSUyQyUwQSUyMCdpbnB1dF9pZHMnJTNBJTIwdG9yY2guU2l6ZSglNUI4JTJDJTIwNjclNUQpJTJDJTBBJTIwJ3Rva2VuX3R5cGVfaWRzJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY3JTVEKSUyQyUwQSUyMCdsYWJlbHMnJTNBJTIwdG9yY2guU2l6ZSglNUI4JTVEKSU3RA==",highlighted:`{<span class="hljs-string">'attention_mask'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]), | |
| <span class="hljs-string">'input_ids'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]), | |
| <span class="hljs-string">'token_type_ids'</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]), | |
| <span class="hljs-string">'labels'</span>: torch.Size([<span class="hljs-number">8</span>])}`,wrap:!1}}),Z=new x({props:{$$slots:{default:[Pl]},$$scope:{ctx:d}}}),Ss=new b({props:{title:"μΉμ ν΄μ¦",local:"section-quiz",headingTag:"h2"}}),Ys=new b({props:{title:"1. batched=True μ ν¨κ» Dataset.map() μ μ¬μ©νλ μ£Όμ μ₯μ μ 무μμΈκ°μ?",local:"1-batchedtrue-μ-ν¨κ»-datasetmap-μ-μ¬μ©νλ-μ£Όμ-μ₯μ μ-무μμΈκ°μ",headingTag:"h3"}}),Fs=new pe({props:{choices:[{text:"λ©λͺ¨λ¦¬λ₯Ό λ μ¬μ©ν©λλ€.",explain:"λ λ©λͺ¨λ¦¬ ν¨μ¨μ μΌ μ μμ§λ§, μ΄κ²μ΄ μ£Όμ μ₯μ μ μλλλ€."},{text:"μ¬λ¬ μμ λ₯Ό ν λ²μ μ²λ¦¬νμ¬ ν ν°νλ₯Ό ν¨μ¬ λΉ λ₯΄κ² λ§λλλ€.",explain:"μ λ΅μ λλ€! λ°°μΉλ‘ μ²λ¦¬νλ©΄ λΉ λ₯Έ ν ν¬λμ΄μ κ° μ¬λ¬ μμ λ₯Ό λμμ μμ ν μ μμ΄ μλκ° ν¬κ² ν₯μλ©λλ€.",correct:!0},{text:"μλμΌλ‘ ν¨λ©μ μ²λ¦¬ν΄μ€λλ€.",explain:"λ°°μΉ μ²λ¦¬κ° μλμΌλ‘ ν¨λ©μ μ²λ¦¬νμ§λ μμ΅λλ€ - κ·Έκ²μ λ°μ΄ν° μ½λ μ΄ν°κ° μνν©λλ€."},{text:"λ°μ΄ν°λ₯Ό PyTorch ν μλ‘ λ³νν©λλ€.",explain:"ν μ λ³νμ νμμ μ€μ ν λ λ°μνλ©°, λ°°μΉ λ§€ν μ€μλ λ°μνμ§ μμ΅λλ€."}]}}),qs=new b({props:{title:"2. λ°μ΄ν° μΈνΈμ μ΅λ κΈΈμ΄λ‘ λͺ¨λ μνμ€λ₯Ό ν¨λ©νλ λμ λμ ν¨λ©μ μ¬μ©νλ μ΄μ λ 무μμΈκ°μ?",local:"2-λ°μ΄ν°-μΈνΈμ-μ΅λ-κΈΈμ΄λ‘-λͺ¨λ -μνμ€λ₯Ό-ν¨λ©νλ-λμ -λμ -ν¨λ©μ-μ¬μ©νλ-μ΄μ λ-무μμΈκ°μ",headingTag:"h3"}}),Ps=new pe({props:{choices:[{text:"λμ ν¨λ©μ΄ λͺ¨λΈ μν€ν μ²μ μν΄ μꡬλ©λλ€.",explain:"μλμ, λͺ¨λΈμ κ³ μ ν¨λ©κ³Ό λμ ν¨λ©μ λͺ¨λ μ²λ¦¬ν μ μμ΅λλ€."},{text:"κ° λ°°μΉμ μ΅λ κΈΈμ΄κΉμ§λ§ ν¨λ©νμ¬ κ³μ° μ€λ²ν€λλ₯Ό μ€μ λλ€.",explain:"μ λ΅μ λλ€! λμ ν¨λ©μ λ°μ΄ν° μΈνΈ μ΅λκ°μ΄ μλ λ°°μΉ μ΅λκ°κΉμ§λ§ ν¨λ©νμ¬ ν¨λ© ν ν°μ λν λΆνμν κ³μ°μ νΌν©λλ€.",correct:!0},{text:"λͺ¨λΈ μ νλλ₯Ό ν₯μμν΅λλ€.",explain:"ν¨λ© μ λ΅μ λͺ¨λΈ μ νλμ μ§μ μ μΈ μν₯μ μ£Όμ§ μμ΅λλ€."},{text:"DataCollatorWithPaddingμ μ¬μ©ν λ νμμ λλ€.",explain:"DataCollatorWithPaddingμ΄ λμ ν¨λ©μ κ°λ₯νκ² νμ§λ§, μνλ€λ©΄ μ¬μ ν κ³ μ ν¨λ©μ μ¬μ©ν μ μμ΅λλ€."}]}}),Ks=new b({props:{title:"3. BERT ν ν°νμμ token_type_ids νλλ 무μμ λνλ΄λμ?",local:"3-bert-ν ν°νμμ-tokentypeids-νλλ-무μμ-λνλ΄λμ",headingTag:"h3"}}),Os=new pe({props:{choices:[{text:"μνμ€μμ κ° ν ν°μ μμΉμ λλ€.",explain:"κ·Έκ²μ μμΉ μλ² λ©μ΄μ§, token_type_idsκ° μλλλ€."},{text:"λ¬Έμ₯ μμ μ²λ¦¬ν λ κ° ν ν°μ΄ μ΄λ λ¬Έμ₯μ μνλμ§λ₯Ό λνλ λλ€.",explain:"μ λ΅μ λλ€! token_type_idsλ λ¬Έμ₯ μ μμ μμ 첫 λ²μ§Έ λ¬Έμ₯(0)κ³Ό λ λ²μ§Έ λ¬Έμ₯(1)μ ꡬλΆν©λλ€.",correct:!0},{text:"κ° ν ν°μ μ΄ν μ λ§μ€ν¬μ λλ€.",explain:"μ΄ν μ λ§μ€ν¬λ μ΄λ€ ν ν°μ μ£Όμλ₯Ό κΈ°μΈμΌμ§ λνλ΄λ λ³λμ νλμ λλ€."},{text:"κ° ν ν°μ μ΄ν IDμ λλ€.",explain:"κ·Έκ²μ token_type_idsκ° μλ input_ids νλμ λλ€."}]}}),se=new b({props:{title:"4. load_dataset('glue', 'mrpc') λ‘ λ°μ΄ν° μΈνΈλ₯Ό λ‘λ©ν λ λ λ²μ§Έ μΈμλ 무μμ μ§μ νλμ?",local:"4-loaddatasetglue-mrpc-λ‘-λ°μ΄ν°-μΈνΈλ₯Ό-λ‘λ©ν -λ-λ-λ²μ§Έ-μΈμλ-무μμ-μ§μ νλμ",headingTag:"h3"}}),ee=new pe({props:{choices:[{text:"λ‘λ©ν λ°μ΄ν° μΈνΈμ λ²μ μ λλ€.",explain:"λ²μ μ§μ μ λ€λ₯Έ λ§€κ°λ³μλ₯Ό μ¬μ©ν©λλ€."},{text:"GLUE λ²€μΉλ§ν¬ λ΄μ νΉμ μμ λλ νμ μ§ν©μ λλ€.",explain:"μ λ΅μ λλ€! MRPCλ λ ν° GLUE λ²€μΉλ§ν¬ 컬λ μ λ΄μ νΉμ μμ μ€ νλμ λλ€.",correct:!0},{text:"λ°μ΄ν° μΈνΈμ λΆν (train/validation/test)μ λλ€.",explain:"λΆν μ λ‘λ© νμ μ κ·Όνλ©°, load_dataset νΈμΆμμ μ§μ νμ§ μμ΅λλ€."},{text:"λ°μ΄ν°λ₯Ό λ°νν νμμ λλ€.",explain:"νμμ λ‘λ© ν set_format() λ©μλλ₯Ό μ¬μ©νμ¬ μ€μ ν©λλ€."}]}}),te=new b({props:{title:"5. νλ ¨ μ μ βsentence1βκ³Ό βsentence2β κ°μ μ΄μ μ κ±°νλ λͺ©μ μ 무μμΈκ°μ?",local:"5-νλ ¨-μ μ-sentence1κ³Ό-sentence2-κ°μ-μ΄μ-μ κ±°νλ-λͺ©μ μ-무μμΈκ°μ",headingTag:"h3"}}),le=new pe({props:{choices:[{text:"νλ ¨ μ€ λ©λͺ¨λ¦¬λ₯Ό μ μ½νκΈ° μν΄μμ λλ€.",explain:"μ½κ°μ λ©λͺ¨λ¦¬λ μ μ½λμ§λ§, μ΄κ²μ΄ μ£Όλ μ΄μ λ μλλλ€."},{text:"λͺ¨λΈμ΄ μ΄λ¬ν μμ ν μ€νΈ μ΄μ μμνμ§ μκ³ μ€λ₯κ° λ°μν μ μκΈ° λλ¬Έμ λλ€.",explain:"μ λ΅μ λλ€! λͺ¨λΈμ μμ ν μ€νΈ λ¬Έμμ΄μ΄ μλ μμΉμ ν μλ₯Ό μμν©λλ€. ν μ€νΈ μ΄μ μ μ§νλ©΄ μ€λ₯κ° λ°μν κ²μ λλ€.",correct:!0},{text:"μ΄λ¬ν μ΄λ€μ΄ νκ°μ νμνμ§ μκΈ° λλ¬Έμ λλ€.",explain:"μ¬μ€μ΄κΈ΄ νμ§λ§, μ£Όλ μ΄μ λ λͺ¨λΈμ΄ μμ ν μ€νΈλ₯Ό μ²λ¦¬ν μ μλ€λ κ²μ λλ€."},{text:"νλ ¨ μλλ₯Ό ν¬κ² ν₯μμν€κΈ° λλ¬Έμ λλ€.",explain:"νΈνλμ§ μλ λ°μ΄ν° νμ μΌλ‘ μΈν μ€λ₯ λ°©μ§μ λΉν΄ μλ ν₯μμ λ―Έλ―Έν©λλ€."}]}}),v=new x({props:{$$slots:{default:[Kl]},$$scope:{ctx:d}}}),ne=new zl({props:{source:"https://github.com/huggingface/course/blob/main/chapters/ko/chapter3/2.mdx"}}),{c(){p=u("meta"),$=n(),M=u("p"),U=n(),i(w.$$.fragment),f=n(),i(h.$$.fragment),Me=n(),B=u("p"),B.innerHTML=Wt,ie=n(),i(N.$$.fragment),re=n(),V=u("p"),V.textContent=zt,ce=n(),G=u("p"),G.innerHTML=Et,me=n(),i(H.$$.fragment),ye=n(),i(X.$$.fragment),je=n(),W=u("p"),W.innerHTML=Dt,ue=n(),z=u("p"),z.textContent=St,Je=n(),i(C.$$.fragment),oe=n(),i(E.$$.fragment),Ue=n(),i(D.$$.fragment),$e=n(),S=u("p"),S.innerHTML=Lt,fe=n(),i(k.$$.fragment),Te=n(),L=u("p"),L.innerHTML=Yt,de=n(),i(Y.$$.fragment),he=n(),i(F.$$.fragment),we=n(),q=u("p"),q.innerHTML=Ft,be=n(),i(P.$$.fragment),xe=n(),i(K.$$.fragment),ge=n(),O=u("p"),O.innerHTML=qt,Ce=n(),i(I.$$.fragment),ke=n(),i(ss.$$.fragment),Ie=n(),i(es.$$.fragment),_e=n(),ts=u("p"),ts.innerHTML=Pt,Ae=n(),i(ls.$$.fragment),Qe=n(),i(_.$$.fragment),Re=n(),ns=u("p"),ns.textContent=Kt,Ze=n(),i(as.$$.fragment),ve=n(),i(ps.$$.fragment),Be=n(),Ms=u("p"),Ms.innerHTML=Ot,Ne=n(),i(A.$$.fragment),Ve=n(),is=u("p"),is.innerHTML=sl,Ge=n(),i(rs.$$.fragment),He=n(),cs=u("p"),cs.textContent=el,Xe=n(),i(ms.$$.fragment),We=n(),ys=u("p"),ys.innerHTML=tl,ze=n(),i(js.$$.fragment),Ee=n(),us=u("p"),us.innerHTML=ll,De=n(),Js=u("p"),Js.innerHTML=nl,Se=n(),os=u("p"),os.innerHTML=al,Le=n(),Us=u("p"),Us.textContent=pl,Ye=n(),$s=u("p"),$s.innerHTML=Ml,Fe=n(),fs=u("p"),fs.innerHTML=il,qe=n(),i(Ts.$$.fragment),Pe=n(),ds=u("p"),ds.innerHTML=rl,Ke=n(),hs=u("p"),hs.innerHTML=cl,Oe=n(),i(ws.$$.fragment),st=n(),bs=u("p"),bs.innerHTML=ml,et=n(),xs=u("p"),xs.innerHTML=yl,tt=n(),i(Q.$$.fragment),lt=n(),gs=u("p"),gs.innerHTML=jl,nt=n(),i(Cs.$$.fragment),at=n(),ks=u("p"),ks.textContent=ul,pt=n(),i(Is.$$.fragment),Mt=n(),_s=u("p"),_s.innerHTML=Jl,it=n(),As=u("p"),As.innerHTML=ol,rt=n(),Qs=u("p"),Qs.innerHTML=Ul,ct=n(),i(Rs.$$.fragment),mt=n(),i(Zs.$$.fragment),yt=n(),vs=u("p"),vs.innerHTML=$l,jt=n(),i(R.$$.fragment),ut=n(),Bs=u("p"),Bs.innerHTML=fl,Jt=n(),i(Ns.$$.fragment),ot=n(),Vs=u("p"),Vs.innerHTML=Tl,Ut=n(),i(Gs.$$.fragment),$t=n(),i(Hs.$$.fragment),ft=n(),Xs=u("p"),Xs.innerHTML=dl,Tt=n(),i(Ws.$$.fragment),dt=n(),i(zs.$$.fragment),ht=n(),Es=u("p"),Es.textContent=hl,wt=n(),i(Z.$$.fragment),bt=n(),Ds=u("p"),Ds.textContent=wl,xt=n(),i(Ss.$$.fragment),gt=n(),Ls=u("p"),Ls.textContent=bl,Ct=n(),i(Ys.$$.fragment),kt=n(),i(Fs.$$.fragment),It=n(),i(qs.$$.fragment),_t=n(),i(Ps.$$.fragment),At=n(),i(Ks.$$.fragment),Qt=n(),i(Os.$$.fragment),Rt=n(),i(se.$$.fragment),Zt=n(),i(ee.$$.fragment),vt=n(),i(te.$$.fragment),Bt=n(),i(le.$$.fragment),Nt=n(),i(v.$$.fragment),Vt=n(),i(ne.$$.fragment),Gt=n(),ae=u("p"),this.h()},l(s){const e=Hl("svelte-u9bgzb",document.head);p=J(e,"META",{name:!0,content:!0}),e.forEach(t),$=a(s),M=J(s,"P",{}),Zl(M).forEach(t),U=a(s),r(w.$$.fragment,s),f=a(s),r(h.$$.fragment,s),Me=a(s),B=J(s,"P",{"data-svelte-h":!0}),o(B)!=="svelte-18n4e1w"&&(B.innerHTML=Wt),ie=a(s),r(N.$$.fragment,s),re=a(s),V=J(s,"P",{"data-svelte-h":!0}),o(V)!=="svelte-1yvacnx"&&(V.textContent=zt),ce=a(s),G=J(s,"P",{"data-svelte-h":!0}),o(G)!=="svelte-6tmng3"&&(G.innerHTML=Et),me=a(s),r(H.$$.fragment,s),ye=a(s),r(X.$$.fragment,s),je=a(s),W=J(s,"P",{"data-svelte-h":!0}),o(W)!=="svelte-1quhi9e"&&(W.innerHTML=Dt),ue=a(s),z=J(s,"P",{"data-svelte-h":!0}),o(z)!=="svelte-1b27c1z"&&(z.textContent=St),Je=a(s),r(C.$$.fragment,s),oe=a(s),r(E.$$.fragment,s),Ue=a(s),r(D.$$.fragment,s),$e=a(s),S=J(s,"P",{"data-svelte-h":!0}),o(S)!=="svelte-1f0x3aj"&&(S.innerHTML=Lt),fe=a(s),r(k.$$.fragment,s),Te=a(s),L=J(s,"P",{"data-svelte-h":!0}),o(L)!=="svelte-nbm9uo"&&(L.innerHTML=Yt),de=a(s),r(Y.$$.fragment,s),he=a(s),r(F.$$.fragment,s),we=a(s),q=J(s,"P",{"data-svelte-h":!0}),o(q)!=="svelte-11hwp3u"&&(q.innerHTML=Ft),be=a(s),r(P.$$.fragment,s),xe=a(s),r(K.$$.fragment,s),ge=a(s),O=J(s,"P",{"data-svelte-h":!0}),o(O)!=="svelte-1i632h4"&&(O.innerHTML=qt),Ce=a(s),r(I.$$.fragment,s),ke=a(s),r(ss.$$.fragment,s),Ie=a(s),r(es.$$.fragment,s),_e=a(s),ts=J(s,"P",{"data-svelte-h":!0}),o(ts)!=="svelte-72w04v"&&(ts.innerHTML=Pt),Ae=a(s),r(ls.$$.fragment,s),Qe=a(s),r(_.$$.fragment,s),Re=a(s),ns=J(s,"P",{"data-svelte-h":!0}),o(ns)!=="svelte-1gliuzv"&&(ns.textContent=Kt),Ze=a(s),r(as.$$.fragment,s),ve=a(s),r(ps.$$.fragment,s),Be=a(s),Ms=J(s,"P",{"data-svelte-h":!0}),o(Ms)!=="svelte-1dayjrj"&&(Ms.innerHTML=Ot),Ne=a(s),r(A.$$.fragment,s),Ve=a(s),is=J(s,"P",{"data-svelte-h":!0}),o(is)!=="svelte-fp8ycy"&&(is.innerHTML=sl),Ge=a(s),r(rs.$$.fragment,s),He=a(s),cs=J(s,"P",{"data-svelte-h":!0}),o(cs)!=="svelte-1a5aa9u"&&(cs.textContent=el),Xe=a(s),r(ms.$$.fragment,s),We=a(s),ys=J(s,"P",{"data-svelte-h":!0}),o(ys)!=="svelte-tqglnc"&&(ys.innerHTML=tl),ze=a(s),r(js.$$.fragment,s),Ee=a(s),us=J(s,"P",{"data-svelte-h":!0}),o(us)!=="svelte-13qt3k3"&&(us.innerHTML=ll),De=a(s),Js=J(s,"P",{"data-svelte-h":!0}),o(Js)!=="svelte-1x3j9xz"&&(Js.innerHTML=nl),Se=a(s),os=J(s,"P",{"data-svelte-h":!0}),o(os)!=="svelte-16t7d1w"&&(os.innerHTML=al),Le=a(s),Us=J(s,"P",{"data-svelte-h":!0}),o(Us)!=="svelte-mmugyu"&&(Us.textContent=pl),Ye=a(s),$s=J(s,"P",{"data-svelte-h":!0}),o($s)!=="svelte-9ytk6a"&&($s.innerHTML=Ml),Fe=a(s),fs=J(s,"P",{"data-svelte-h":!0}),o(fs)!=="svelte-1lt9uol"&&(fs.innerHTML=il),qe=a(s),r(Ts.$$.fragment,s),Pe=a(s),ds=J(s,"P",{"data-svelte-h":!0}),o(ds)!=="svelte-1qzyd20"&&(ds.innerHTML=rl),Ke=a(s),hs=J(s,"P",{"data-svelte-h":!0}),o(hs)!=="svelte-rdvnbx"&&(hs.innerHTML=cl),Oe=a(s),r(ws.$$.fragment,s),st=a(s),bs=J(s,"P",{"data-svelte-h":!0}),o(bs)!=="svelte-f19j5a"&&(bs.innerHTML=ml),et=a(s),xs=J(s,"P",{"data-svelte-h":!0}),o(xs)!=="svelte-1if9us4"&&(xs.innerHTML=yl),tt=a(s),r(Q.$$.fragment,s),lt=a(s),gs=J(s,"P",{"data-svelte-h":!0}),o(gs)!=="svelte-1qq8yfu"&&(gs.innerHTML=jl),nt=a(s),r(Cs.$$.fragment,s),at=a(s),ks=J(s,"P",{"data-svelte-h":!0}),o(ks)!=="svelte-1uiluni"&&(ks.textContent=ul),pt=a(s),r(Is.$$.fragment,s),Mt=a(s),_s=J(s,"P",{"data-svelte-h":!0}),o(_s)!=="svelte-1irxq8x"&&(_s.innerHTML=Jl),it=a(s),As=J(s,"P",{"data-svelte-h":!0}),o(As)!=="svelte-15wpq8c"&&(As.innerHTML=ol),rt=a(s),Qs=J(s,"P",{"data-svelte-h":!0}),o(Qs)!=="svelte-1qi6gn3"&&(Qs.innerHTML=Ul),ct=a(s),r(Rs.$$.fragment,s),mt=a(s),r(Zs.$$.fragment,s),yt=a(s),vs=J(s,"P",{"data-svelte-h":!0}),o(vs)!=="svelte-1yf9txj"&&(vs.innerHTML=$l),jt=a(s),r(R.$$.fragment,s),ut=a(s),Bs=J(s,"P",{"data-svelte-h":!0}),o(Bs)!=="svelte-ae4z87"&&(Bs.innerHTML=fl),Jt=a(s),r(Ns.$$.fragment,s),ot=a(s),Vs=J(s,"P",{"data-svelte-h":!0}),o(Vs)!=="svelte-1r7taxf"&&(Vs.innerHTML=Tl),Ut=a(s),r(Gs.$$.fragment,s),$t=a(s),r(Hs.$$.fragment,s),ft=a(s),Xs=J(s,"P",{"data-svelte-h":!0}),o(Xs)!=="svelte-rpp9y6"&&(Xs.innerHTML=dl),Tt=a(s),r(Ws.$$.fragment,s),dt=a(s),r(zs.$$.fragment,s),ht=a(s),Es=J(s,"P",{"data-svelte-h":!0}),o(Es)!=="svelte-1ccey19"&&(Es.textContent=hl),wt=a(s),r(Z.$$.fragment,s),bt=a(s),Ds=J(s,"P",{"data-svelte-h":!0}),o(Ds)!=="svelte-furr9b"&&(Ds.textContent=wl),xt=a(s),r(Ss.$$.fragment,s),gt=a(s),Ls=J(s,"P",{"data-svelte-h":!0}),o(Ls)!=="svelte-1jo93me"&&(Ls.textContent=bl),Ct=a(s),r(Ys.$$.fragment,s),kt=a(s),r(Fs.$$.fragment,s),It=a(s),r(qs.$$.fragment,s),_t=a(s),r(Ps.$$.fragment,s),At=a(s),r(Ks.$$.fragment,s),Qt=a(s),r(Os.$$.fragment,s),Rt=a(s),r(se.$$.fragment,s),Zt=a(s),r(ee.$$.fragment,s),vt=a(s),r(te.$$.fragment,s),Bt=a(s),r(le.$$.fragment,s),Nt=a(s),r(v.$$.fragment,s),Vt=a(s),r(ne.$$.fragment,s),Gt=a(s),ae=J(s,"P",{}),Zl(ae).forEach(t),this.h()},h(){vl(p,"name","hf:doc:metadata"),vl(p,"content",sn)},m(s,e){Xl(document.head,p),l(s,$,e),l(s,M,e),l(s,U,e),c(w,s,e),l(s,f,e),c(h,s,e),l(s,Me,e),l(s,B,e),l(s,ie,e),c(N,s,e),l(s,re,e),l(s,V,e),l(s,ce,e),l(s,G,e),l(s,me,e),c(H,s,e),l(s,ye,e),c(X,s,e),l(s,je,e),l(s,W,e),l(s,ue,e),l(s,z,e),l(s,Je,e),c(C,s,e),l(s,oe,e),c(E,s,e),l(s,Ue,e),c(D,s,e),l(s,$e,e),l(s,S,e),l(s,fe,e),c(k,s,e),l(s,Te,e),l(s,L,e),l(s,de,e),c(Y,s,e),l(s,he,e),c(F,s,e),l(s,we,e),l(s,q,e),l(s,be,e),c(P,s,e),l(s,xe,e),c(K,s,e),l(s,ge,e),l(s,O,e),l(s,Ce,e),c(I,s,e),l(s,ke,e),c(ss,s,e),l(s,Ie,e),c(es,s,e),l(s,_e,e),l(s,ts,e),l(s,Ae,e),c(ls,s,e),l(s,Qe,e),c(_,s,e),l(s,Re,e),l(s,ns,e),l(s,Ze,e),c(as,s,e),l(s,ve,e),c(ps,s,e),l(s,Be,e),l(s,Ms,e),l(s,Ne,e),c(A,s,e),l(s,Ve,e),l(s,is,e),l(s,Ge,e),c(rs,s,e),l(s,He,e),l(s,cs,e),l(s,Xe,e),c(ms,s,e),l(s,We,e),l(s,ys,e),l(s,ze,e),c(js,s,e),l(s,Ee,e),l(s,us,e),l(s,De,e),l(s,Js,e),l(s,Se,e),l(s,os,e),l(s,Le,e),l(s,Us,e),l(s,Ye,e),l(s,$s,e),l(s,Fe,e),l(s,fs,e),l(s,qe,e),c(Ts,s,e),l(s,Pe,e),l(s,ds,e),l(s,Ke,e),l(s,hs,e),l(s,Oe,e),c(ws,s,e),l(s,st,e),l(s,bs,e),l(s,et,e),l(s,xs,e),l(s,tt,e),c(Q,s,e),l(s,lt,e),l(s,gs,e),l(s,nt,e),c(Cs,s,e),l(s,at,e),l(s,ks,e),l(s,pt,e),c(Is,s,e),l(s,Mt,e),l(s,_s,e),l(s,it,e),l(s,As,e),l(s,rt,e),l(s,Qs,e),l(s,ct,e),c(Rs,s,e),l(s,mt,e),c(Zs,s,e),l(s,yt,e),l(s,vs,e),l(s,jt,e),c(R,s,e),l(s,ut,e),l(s,Bs,e),l(s,Jt,e),c(Ns,s,e),l(s,ot,e),l(s,Vs,e),l(s,Ut,e),c(Gs,s,e),l(s,$t,e),c(Hs,s,e),l(s,ft,e),l(s,Xs,e),l(s,Tt,e),c(Ws,s,e),l(s,dt,e),c(zs,s,e),l(s,ht,e),l(s,Es,e),l(s,wt,e),c(Z,s,e),l(s,bt,e),l(s,Ds,e),l(s,xt,e),c(Ss,s,e),l(s,gt,e),l(s,Ls,e),l(s,Ct,e),c(Ys,s,e),l(s,kt,e),c(Fs,s,e),l(s,It,e),c(qs,s,e),l(s,_t,e),c(Ps,s,e),l(s,At,e),c(Ks,s,e),l(s,Qt,e),c(Os,s,e),l(s,Rt,e),c(se,s,e),l(s,Zt,e),c(ee,s,e),l(s,vt,e),c(te,s,e),l(s,Bt,e),c(le,s,e),l(s,Nt,e),c(v,s,e),l(s,Vt,e),c(ne,s,e),l(s,Gt,e),l(s,ae,e),Ht=!0},p(s,[e]){const xl={};e&2&&(xl.$$scope={dirty:e,ctx:s}),C.$set(xl);const gl={};e&2&&(gl.$$scope={dirty:e,ctx:s}),k.$set(gl);const Cl={};e&2&&(Cl.$$scope={dirty:e,ctx:s}),I.$set(Cl);const kl={};e&2&&(kl.$$scope={dirty:e,ctx:s}),_.$set(kl);const Il={};e&2&&(Il.$$scope={dirty:e,ctx:s}),A.$set(Il);const _l={};e&2&&(_l.$$scope={dirty:e,ctx:s}),Q.$set(_l);const Al={};e&2&&(Al.$$scope={dirty:e,ctx:s}),R.$set(Al);const Ql={};e&2&&(Ql.$$scope={dirty:e,ctx:s}),Z.$set(Ql);const Rl={};e&2&&(Rl.$$scope={dirty:e,ctx:s}),v.$set(Rl)},i(s){Ht||(m(w.$$.fragment,s),m(h.$$.fragment,s),m(N.$$.fragment,s),m(H.$$.fragment,s),m(X.$$.fragment,s),m(C.$$.fragment,s),m(E.$$.fragment,s),m(D.$$.fragment,s),m(k.$$.fragment,s),m(Y.$$.fragment,s),m(F.$$.fragment,s),m(P.$$.fragment,s),m(K.$$.fragment,s),m(I.$$.fragment,s),m(ss.$$.fragment,s),m(es.$$.fragment,s),m(ls.$$.fragment,s),m(_.$$.fragment,s),m(as.$$.fragment,s),m(ps.$$.fragment,s),m(A.$$.fragment,s),m(rs.$$.fragment,s),m(ms.$$.fragment,s),m(js.$$.fragment,s),m(Ts.$$.fragment,s),m(ws.$$.fragment,s),m(Q.$$.fragment,s),m(Cs.$$.fragment,s),m(Is.$$.fragment,s),m(Rs.$$.fragment,s),m(Zs.$$.fragment,s),m(R.$$.fragment,s),m(Ns.$$.fragment,s),m(Gs.$$.fragment,s),m(Hs.$$.fragment,s),m(Ws.$$.fragment,s),m(zs.$$.fragment,s),m(Z.$$.fragment,s),m(Ss.$$.fragment,s),m(Ys.$$.fragment,s),m(Fs.$$.fragment,s),m(qs.$$.fragment,s),m(Ps.$$.fragment,s),m(Ks.$$.fragment,s),m(Os.$$.fragment,s),m(se.$$.fragment,s),m(ee.$$.fragment,s),m(te.$$.fragment,s),m(le.$$.fragment,s),m(v.$$.fragment,s),m(ne.$$.fragment,s),Ht=!0)},o(s){y(w.$$.fragment,s),y(h.$$.fragment,s),y(N.$$.fragment,s),y(H.$$.fragment,s),y(X.$$.fragment,s),y(C.$$.fragment,s),y(E.$$.fragment,s),y(D.$$.fragment,s),y(k.$$.fragment,s),y(Y.$$.fragment,s),y(F.$$.fragment,s),y(P.$$.fragment,s),y(K.$$.fragment,s),y(I.$$.fragment,s),y(ss.$$.fragment,s),y(es.$$.fragment,s),y(ls.$$.fragment,s),y(_.$$.fragment,s),y(as.$$.fragment,s),y(ps.$$.fragment,s),y(A.$$.fragment,s),y(rs.$$.fragment,s),y(ms.$$.fragment,s),y(js.$$.fragment,s),y(Ts.$$.fragment,s),y(ws.$$.fragment,s),y(Q.$$.fragment,s),y(Cs.$$.fragment,s),y(Is.$$.fragment,s),y(Rs.$$.fragment,s),y(Zs.$$.fragment,s),y(R.$$.fragment,s),y(Ns.$$.fragment,s),y(Gs.$$.fragment,s),y(Hs.$$.fragment,s),y(Ws.$$.fragment,s),y(zs.$$.fragment,s),y(Z.$$.fragment,s),y(Ss.$$.fragment,s),y(Ys.$$.fragment,s),y(Fs.$$.fragment,s),y(qs.$$.fragment,s),y(Ps.$$.fragment,s),y(Ks.$$.fragment,s),y(Os.$$.fragment,s),y(se.$$.fragment,s),y(ee.$$.fragment,s),y(te.$$.fragment,s),y(le.$$.fragment,s),y(v.$$.fragment,s),y(ne.$$.fragment,s),Ht=!1},d(s){s&&(t($),t(M),t(U),t(f),t(Me),t(B),t(ie),t(re),t(V),t(ce),t(G),t(me),t(ye),t(je),t(W),t(ue),t(z),t(Je),t(oe),t(Ue),t($e),t(S),t(fe),t(Te),t(L),t(de),t(he),t(we),t(q),t(be),t(xe),t(ge),t(O),t(Ce),t(ke),t(Ie),t(_e),t(ts),t(Ae),t(Qe),t(Re),t(ns),t(Ze),t(ve),t(Be),t(Ms),t(Ne),t(Ve),t(is),t(Ge),t(He),t(cs),t(Xe),t(We),t(ys),t(ze),t(Ee),t(us),t(De),t(Js),t(Se),t(os),t(Le),t(Us),t(Ye),t($s),t(Fe),t(fs),t(qe),t(Pe),t(ds),t(Ke),t(hs),t(Oe),t(st),t(bs),t(et),t(xs),t(tt),t(lt),t(gs),t(nt),t(at),t(ks),t(pt),t(Mt),t(_s),t(it),t(As),t(rt),t(Qs),t(ct),t(mt),t(yt),t(vs),t(jt),t(ut),t(Bs),t(Jt),t(ot),t(Vs),t(Ut),t($t),t(ft),t(Xs),t(Tt),t(dt),t(ht),t(Es),t(wt),t(bt),t(Ds),t(xt),t(gt),t(Ls),t(Ct),t(kt),t(It),t(_t),t(At),t(Qt),t(Rt),t(Zt),t(vt),t(Bt),t(Nt),t(Vt),t(Gt),t(ae)),t(p),j(w,s),j(h,s),j(N,s),j(H,s),j(X,s),j(C,s),j(E,s),j(D,s),j(k,s),j(Y,s),j(F,s),j(P,s),j(K,s),j(I,s),j(ss,s),j(es,s),j(ls,s),j(_,s),j(as,s),j(ps,s),j(A,s),j(rs,s),j(ms,s),j(js,s),j(Ts,s),j(ws,s),j(Q,s),j(Cs,s),j(Is,s),j(Rs,s),j(Zs,s),j(R,s),j(Ns,s),j(Gs,s),j(Hs,s),j(Ws,s),j(zs,s),j(Z,s),j(Ss,s),j(Ys,s),j(Fs,s),j(qs,s),j(Ps,s),j(Ks,s),j(Os,s),j(se,s),j(ee,s),j(te,s),j(le,s),j(v,s),j(ne,s)}}}const sn=`{"title":"λ°μ΄ν° μ²λ¦¬","local":"processing-the-data","sections":[{"title":"Hubμμ λ°μ΄ν° μΈνΈ κ°μ Έμ€κΈ°","local":"loading-a-dataset-from-the-hub","sections":[],"depth":3},{"title":"λ°μ΄ν° μΈνΈ μ μ²λ¦¬","local":"preprocessing-a-dataset","sections":[{"title":"λμ ν¨λ©","local":"dynamic-padding","sections":[],"depth":5}],"depth":3},{"title":"μΉμ ν΄μ¦","local":"section-quiz","sections":[{"title":"1. batched=True μ ν¨κ» Dataset.map() μ μ¬μ©νλ μ£Όμ μ₯μ μ 무μμΈκ°μ?","local":"1-batchedtrue-μ-ν¨κ»-datasetmap-μ-μ¬μ©νλ-μ£Όμ-μ₯μ μ-무μμΈκ°μ","sections":[],"depth":3},{"title":"2. λ°μ΄ν° μΈνΈμ μ΅λ κΈΈμ΄λ‘ λͺ¨λ μνμ€λ₯Ό ν¨λ©νλ λμ λμ ν¨λ©μ μ¬μ©νλ μ΄μ λ 무μμΈκ°μ?","local":"2-λ°μ΄ν°-μΈνΈμ-μ΅λ-κΈΈμ΄λ‘-λͺ¨λ -μνμ€λ₯Ό-ν¨λ©νλ-λμ -λμ -ν¨λ©μ-μ¬μ©νλ-μ΄μ λ-무μμΈκ°μ","sections":[],"depth":3},{"title":"3. BERT ν ν°νμμ token_type_ids νλλ 무μμ λνλ΄λμ?","local":"3-bert-ν ν°νμμ-tokentypeids-νλλ-무μμ-λνλ΄λμ","sections":[],"depth":3},{"title":"4. load_dataset('glue', 'mrpc') λ‘ λ°μ΄ν° μΈνΈλ₯Ό λ‘λ©ν λ λ λ²μ§Έ μΈμλ 무μμ μ§μ νλμ?","local":"4-loaddatasetglue-mrpc-λ‘-λ°μ΄ν°-μΈνΈλ₯Ό-λ‘λ©ν -λ-λ-λ²μ§Έ-μΈμλ-무μμ-μ§μ νλμ","sections":[],"depth":3},{"title":"5. νλ ¨ μ μ βsentence1βκ³Ό βsentence2β κ°μ μ΄μ μ κ±°νλ λͺ©μ μ 무μμΈκ°μ?","local":"5-νλ ¨-μ μ-sentence1κ³Ό-sentence2-κ°μ-μ΄μ-μ κ±°νλ-λͺ©μ μ-무μμΈκ°μ","sections":[],"depth":3}],"depth":2}],"depth":1}`;function en(d){return Nl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class mn extends Vl{constructor(p){super(),Gl(this,p,en,Ol,Bl,{})}}export{mn as component}; | |
Xet Storage Details
- Size:
- 64.2 kB
- Xet hash:
- bda6e0d8406d4624b92b5bf170b4025522db15e924296b179fc9f1ad16d11de8
Β·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.