Buckets:

rtrm's picture
download
raw
64.2 kB
import{s as Bl,o as Nl,n as g}from"../chunks/scheduler.37c15a92.js";import{S as Vl,i as Gl,g as u,s as n,r as i,A as Hl,h as J,f as t,c as a,j as Zl,u as r,x as o,k as vl,y as Xl,a as l,v as c,d as m,t as y,w as j}from"../chunks/index.2bf4358c.js";import{T as x}from"../chunks/Tip.363c041f.js";import{Y as Xt}from"../chunks/Youtube.1e50a667.js";import{C as T}from"../chunks/CodeBlock.4e987730.js";import{C as Wl}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{Q as pe}from"../chunks/Question.668688bc.js";import{H as b,E as zl}from"../chunks/getInferenceSnippets.24b50994.js";function El(d){let p,$='πŸ’‘ <strong>μΆ”κ°€ 자료</strong>: 더 λ§Žμ€ 데이터 μ„ΈνŠΈ λ‘œλ”© 기법과 예제λ₯Ό 보렀면 <a href="https://huggingface.co/docs/datasets/" rel="nofollow">πŸ€— Datasets λ¬Έμ„œ</a>λ₯Ό ν™•μΈν•˜μ„Έμš”.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-qls13x"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Dl(d){let p,$="이 λͺ…령은 기본적으둜 <em>~/.cache/huggingface/datasets</em>에 데이터 μ„ΈνŠΈλ₯Ό λ‹€μš΄λ‘œλ“œν•˜κ³  μΊμ‹œν•©λ‹ˆλ‹€. 2μž₯μ—μ„œ μ–ΈκΈ‰ν–ˆλ“―μ΄ <code>HF_HOME</code> ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•˜μ—¬ μΊμ‹œ 폴더λ₯Ό 맞좀 μ„€μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€.";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1t8miz5"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Sl(d){let p,$="✏️ <strong>직접 해보기!</strong> ν›ˆλ ¨ μ„ΈνŠΈμ˜ 15번째 μš”μ†Œμ™€ 검증 μ„ΈνŠΈμ˜ 87번째 μš”μ†Œλ₯Ό μ‚΄νŽ΄λ³΄μ„Έμš”. κ·Έλ“€μ˜ λ ˆμ΄λΈ”μ€ λ¬΄μ—‡μΈκ°€μš”?";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-hik0zm"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Ll(d){let p,$='πŸ’‘ <strong>심화 ν•™μŠ΅</strong>: 더 κ³ κΈ‰ 토큰화 기법과 λ‹€μ–‘ν•œ ν† ν¬λ‚˜μ΄μ €κ°€ μž‘λ™ν•˜λŠ” 방식을 μ΄ν•΄ν•˜λ €λ©΄ <a href="https://huggingface.co/docs/transformers/main/en/tokenizer_summary" rel="nofollow">πŸ€— Tokenizers λ¬Έμ„œ</a>와 <a href="https://huggingface.co/learn/cookbook/en/advanced_rag#tokenization-strategies" rel="nofollow">쿑뢁의 토큰화 κ°€μ΄λ“œ</a>λ₯Ό μ‚΄νŽ΄λ³΄μ„Έμš”.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-wel7bo"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Yl(d){let p,$="✏️ <strong>직접 해보기!</strong> ν›ˆλ ¨ μ„ΈνŠΈμ˜ 15번째 μš”μ†Œλ₯Ό κ°€μ Έμ™€μ„œ 두 λ¬Έμž₯을 λ”°λ‘œλ”°λ‘œ ν† ν°ν™”ν•˜κ³  μŒμœΌλ‘œλ„ ν† ν°ν™”ν•΄λ³΄μ„Έμš”. 두 결과의 차이점은 λ¬΄μ—‡μΈκ°€μš”?";return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-xfqr76"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Fl(d){let p,$='πŸ“š <strong>μ„±λŠ₯ 팁</strong>: 효율적인 데이터 처리 기법에 λŒ€ν•œ μžμ„Έν•œ λ‚΄μš©μ€ <a href="https://huggingface.co/docs/datasets/about_arrow" rel="nofollow">πŸ€— Datasets μ„±λŠ₯ κ°€μ΄λ“œ</a>μ—μ„œ 배울 수 μžˆμŠ΅λ‹ˆλ‹€.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-4fzn75"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function ql(d){let p,$='πŸš€ <strong>μ΅œμ ν™” κ°€μ΄λ“œ</strong>: νŒ¨λ”© μ „λž΅κ³Ό TPU 고렀사항을 ν¬ν•¨ν•œ ν›ˆλ ¨ μ„±λŠ₯ μ΅œμ ν™”μ— λŒ€ν•œ μžμ„Έν•œ λ‚΄μš©μ€ <a href="https://huggingface.co/docs/transformers/main/en/performance" rel="nofollow">πŸ€— Transformers μ„±λŠ₯ λ¬Έμ„œ</a>λ₯Ό μ°Έμ‘°ν•˜μ„Έμš”.';return{c(){p=u("p"),p.innerHTML=$},l(M){p=J(M,"P",{"data-svelte-h":!0}),o(p)!=="svelte-15yxb2i"&&(p.innerHTML=$)},m(M,U){l(M,p,U)},p:g,d(M){M&&t(p)}}}function Pl(d){let p,$="✏️ <strong>직접 해보기!</strong> GLUE SST-2 데이터 μ„ΈνŠΈμ—μ„œ μ „μ²˜λ¦¬λ₯Ό λ³΅μ œν•΄λ³΄μ„Έμš”. 쌍이 μ•„λ‹Œ 단일 λ¬Έμž₯으둜 κ΅¬μ„±λ˜μ–΄ μžˆμ–΄ μ•½κ°„ λ‹€λ₯΄μ§€λ§Œ, λ‚˜λ¨Έμ§€λŠ” λ™μΌν•˜κ²Œ 보일 κ²ƒμž…λ‹ˆλ‹€. 더 μ–΄λ €μš΄ 도전을 μœ„ν•΄μ„œλŠ” GLUE μž‘μ—… 쀑 μ–΄λ–€ κ²ƒμ—μ„œλ„ μž‘λ™ν•˜λŠ” μ „μ²˜λ¦¬ ν•¨μˆ˜λ₯Ό μž‘μ„±ν•΄λ³΄μ„Έμš”.",M,U,w='πŸ“– <strong>μΆ”κ°€ μ—°μŠ΅</strong>: <a href="https://huggingface.co/docs/transformers/main/en/notebooks" rel="nofollow">πŸ€— Transformers 예제</a>μ—μ„œ μ΄λŸ¬ν•œ μ‹€μŠ΅ μ˜ˆμ œλ“€μ„ ν™•μΈν•΄λ³΄μ„Έμš”.';return{c(){p=u("p"),p.innerHTML=$,M=n(),U=u("p"),U.innerHTML=w},l(f){p=J(f,"P",{"data-svelte-h":!0}),o(p)!=="svelte-1qahcvz"&&(p.innerHTML=$),M=a(f),U=J(f,"P",{"data-svelte-h":!0}),o(U)!=="svelte-p3aocu"&&(U.innerHTML=w)},m(f,h){l(f,p,h),l(f,M,h),l(f,U,h)},p:g,d(f){f&&(t(p),t(M),t(U))}}}function Kl(d){let p,$="πŸ’‘ <strong>핡심 μš”μ </strong>",M,U,w="<li>μ „μ²˜λ¦¬λ₯Ό 훨씬 λΉ λ₯΄κ²Œ ν•˜λ €λ©΄ <code>Dataset.map()</code>μ—μ„œ <code>batched=True</code>λ₯Ό μ‚¬μš©ν•˜μ„Έμš”</li> <li><code>DataCollatorWithPadding</code>을 μ‚¬μš©ν•œ 동적 νŒ¨λ”©μ΄ κ³ μ • 길이 νŒ¨λ”©λ³΄λ‹€ νš¨μœ¨μ μž…λ‹ˆλ‹€</li> <li>λͺ¨λΈμ˜ μΆ”λ‘  κ²°κ³Όλ¬Ό(수치적 ν…μ„œ, μ˜¬λ°”λ₯Έ μ—΄ 이름)에 맞게 항상 데이터λ₯Ό μ „μ²˜λ¦¬ν•˜μ„Έμš”</li> <li>πŸ€— Datasets λΌμ΄λΈŒλŸ¬λ¦¬λŠ” λŒ€κ·œλͺ¨ 효율적인 데이터 처리λ₯Ό μœ„ν•œ κ°•λ ₯ν•œ 도ꡬλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€</li>";return{c(){p=u("p"),p.innerHTML=$,M=n(),U=u("ul"),U.innerHTML=w},l(f){p=J(f,"P",{"data-svelte-h":!0}),o(p)!=="svelte-17xh5q0"&&(p.innerHTML=$),M=a(f),U=J(f,"UL",{"data-svelte-h":!0}),o(U)!=="svelte-mib1et"&&(U.innerHTML=w)},m(f,h){l(f,p,h),l(f,M,h),l(f,U,h)},p:g,d(f){f&&(t(p),t(M),t(U))}}}function Ol(d){let p,$,M,U,w,f,h,Me,B,Wt='<a href="/course/chapter2">이전 챕터</a>의 μ˜ˆμ œμ— μ΄μ–΄μ„œ, ν•œ λ°°μΉ˜μ—μ„œ μ‹œν€€μŠ€ λΆ„λ₯˜κΈ°λ₯Ό ν›ˆλ ¨ν•˜λŠ” 방법은 λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€.',ie,N,re,V,zt="λ¬Όλ‘  두 λ¬Έμž₯만으둜 λͺ¨λΈμ„ ν›ˆλ ¨ν•˜λŠ” κ²ƒμœΌλ‘œλŠ” 맀우 쒋은 κ²°κ³Όλ₯Ό 얻을 수 μ—†μŠ΅λ‹ˆλ‹€. 더 λ‚˜μ€ κ²°κ³Όλ₯Ό μ–»μœΌλ €λ©΄ 더 큰 데이터 μ„ΈνŠΈλ₯Ό μ€€λΉ„ν•΄μ•Ό ν•©λ‹ˆλ‹€.",ce,G,Et='이 μ„Ήμ…˜μ—μ„œλŠ” William B. Dolanκ³Ό Chris Brockett의 <a href="https://www.aclweb.org/anthology/I05-5002.pdf" rel="nofollow">λ…Όλ¬Έ</a>μ—μ„œ μ†Œκ°œλœ MRPC(Microsoft Research Paraphrase Corpus) 데이터 μ„ΈνŠΈλ₯Ό 예제둜 μ‚¬μš©ν•˜κ² μŠ΅λ‹ˆλ‹€. 이 데이터 μ„ΈνŠΈλŠ” 5,801개의 λ¬Έμž₯ 쌍으둜 κ΅¬μ„±λ˜μ–΄ 있으며, 각 쌍이 νŒ¨λŸ¬ν”„λ ˆμ΄μ¦ˆμΈμ§€ μ•„λ‹Œμ§€λ₯Ό λ‚˜νƒ€λ‚΄λŠ” λ ˆμ΄λΈ”μ΄ μžˆμŠ΅λ‹ˆλ‹€(즉, 두 λ¬Έμž₯이 같은 μ˜λ―ΈμΈμ§€). 이 μ±•ν„°μ—μ„œ 이 데이터 μ„ΈνŠΈλ₯Ό μ„ νƒν•œ μ΄μœ λŠ” μž‘μ€ 데이터 μ„ΈνŠΈμ΄λ―€λ‘œ ν›ˆλ ¨ μ‹€ν—˜μ„ ν•˜κΈ°μ— 쉽기 λ•Œλ¬Έμž…λ‹ˆλ‹€.',me,H,ye,X,je,W,Dt='Hubμ—λŠ” λͺ¨λΈλΏλ§Œ μ•„λ‹ˆλΌ λ‹€μ–‘ν•œ μ–Έμ–΄λ‘œ 된 μ—¬λŸ¬ 데이터 μ„ΈνŠΈλ„ μžˆμŠ΅λ‹ˆλ‹€. <a href="https://huggingface.co/datasets" rel="nofollow">μ—¬κΈ°</a>μ—μ„œ 데이터 μ„ΈνŠΈλ₯Ό μ°Ύμ•„λ³Ό 수 있으며, 이 μ„Ήμ…˜μ„ μ™„λ£Œν•œ ν›„μ—λŠ” μƒˆλ‘œμš΄ 데이터 μ„ΈνŠΈλ₯Ό λ‘œλ“œν•˜κ³  μ²˜λ¦¬ν•΄λ³΄λŠ” 것을 ꢌμž₯ν•©λ‹ˆλ‹€(<a href="https://huggingface.co/docs/datasets/loading" rel="nofollow">μ—¬κΈ°</a>μ—μ„œ 일반적인 λ¬Έμ„œλ₯Ό μ°Έμ‘°ν•˜μ„Έμš”). ν•˜μ§€λ§Œ μ§€κΈˆμ€ MRPC 데이터 μ„ΈνŠΈμ— μ§‘μ€‘ν•΄λ³΄κ² μŠ΅λ‹ˆλ‹€! 이것은 <a href="https://gluebenchmark.com/" rel="nofollow">GLUE 벀치마크</a>λ₯Ό κ΅¬μ„±ν•˜λŠ” 10개 데이터 μ„ΈνŠΈ 쀑 ν•˜λ‚˜λ‘œ, 10개의 μ„œλ‘œ λ‹€λ₯Έ ν…μŠ€νŠΈ λΆ„λ₯˜ μž‘μ—…μ— 걸쳐 ML λͺ¨λΈμ˜ μ„±λŠ₯을 μΈ‘μ •ν•˜λŠ” 데 μ‚¬μš©λ˜λŠ” ν•™μˆ μ  λ²€μΉ˜λ§ˆν¬μž…λ‹ˆλ‹€.',ue,z,St="πŸ€— Datasets λΌμ΄λΈŒλŸ¬λ¦¬λŠ” Hubμ—μ„œ 데이터 μ„ΈνŠΈλ₯Ό λ‹€μš΄λ‘œλ“œν•˜κ³  μΊμ‹œν•˜λŠ” 맀우 κ°„λ‹¨ν•œ λͺ…령을 μ œκ³΅ν•©λ‹ˆλ‹€. MRPC 데이터 μ„ΈνŠΈλ₯Ό λ‹€μŒκ³Ό 같이 λ‹€μš΄λ‘œλ“œν•  수 μžˆμŠ΅λ‹ˆλ‹€.",Je,C,oe,E,Ue,D,$e,S,Lt="λ³΄μ‹œλ‹€μ‹œν”Ό, ν›ˆλ ¨ μ„ΈνŠΈ, 검증 μ„ΈνŠΈ, ν…ŒμŠ€νŠΈ μ„ΈνŠΈκ°€ ν¬ν•¨λœ <code>DatasetDict</code> 객체λ₯Ό μ–»μŠ΅λ‹ˆλ‹€. 각각은 μ—¬λŸ¬ μ—΄(<code>sentence1</code>, <code>sentence2</code>, <code>label</code>, <code>idx</code>)κ³Ό 가변적인 ν–‰ 수λ₯Ό ν¬ν•¨ν•˜λ©°, μ΄λŠ” 각 μ„ΈνŠΈμ˜ μš”μ†Œ μˆ˜μž…λ‹ˆλ‹€(λ”°λΌμ„œ ν›ˆλ ¨ μ„ΈνŠΈμ—λŠ” 3,668개의 λ¬Έμž₯ 쌍이, 검증 μ„ΈνŠΈμ—λŠ” 408κ°œκ°€, ν…ŒμŠ€νŠΈ μ„ΈνŠΈμ—λŠ” 1,725κ°œκ°€ μžˆμŠ΅λ‹ˆλ‹€).",fe,k,Te,L,Yt="λ”•μ…”λ„ˆλ¦¬μ²˜λŸΌ μΈλ±μ‹±ν•˜μ—¬ <code>raw_datasets</code> 객체의 각 λ¬Έμž₯ μŒμ— μ ‘κ·Όν•  수 μžˆμŠ΅λ‹ˆλ‹€.",de,Y,he,F,we,q,Ft="λ ˆμ΄λΈ”μ΄ 이미 μ •μˆ˜λ‘œ λ˜μ–΄ μžˆμœΌλ―€λ‘œ μ—¬κΈ°μ„œ μ „μ²˜λ¦¬λ₯Ό ν•  ν•„μš”κ°€ μ—†μŠ΅λ‹ˆλ‹€. μ–΄λ–€ μ •μˆ˜κ°€ μ–΄λ–€ λ ˆμ΄λΈ”μ— ν•΄λ‹Ήν•˜λŠ”μ§€ μ•Œμ•„λ³΄λ €λ©΄ <code>raw_train_dataset</code>의 <code>features</code>λ₯Ό κ²€μ‚¬ν•˜λ©΄ λ©λ‹ˆλ‹€. 이것은 각 μ—΄μ˜ νƒ€μž…μ„ μ•Œλ €μ€λ‹ˆλ‹€.",be,P,xe,K,ge,O,qt="λ‚΄λΆ€μ μœΌλ‘œ <code>label</code>은 <code>ClassLabel</code> νƒ€μž…μ΄λ©°, μ •μˆ˜μ™€ λ ˆμ΄λΈ” μ΄λ¦„μ˜ 맀핑이 <em>names</em> 폴더에 μ €μž₯λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. <code>0</code>은 <code>not_equivalent</code>에, <code>1</code>은 <code>equivalent</code>에 ν•΄λ‹Ήν•©λ‹ˆλ‹€.",Ce,I,ke,ss,Ie,es,_e,ts,Pt='데이터 μ„ΈνŠΈλ₯Ό μ „μ²˜λ¦¬ν•˜λ €λ©΄ ν…μŠ€νŠΈλ₯Ό λͺ¨λΈμ΄ 이해할 수 μžˆλŠ” 숫자둜 λ³€ν™˜ν•΄μ•Ό ν•©λ‹ˆλ‹€. <a href="/course/chapter2">이전 챕터</a>μ—μ„œ λ³΄μ•˜λ“―μ΄, μ΄λŠ” ν† ν¬λ‚˜μ΄μ €λ‘œ μˆ˜ν–‰λ©λ‹ˆλ‹€. ν† ν¬λ‚˜μ΄μ €μ— ν•œ λ¬Έμž₯μ΄λ‚˜ λ¬Έμž₯ λͺ©λ‘μ„ μž…λ ₯ν•  수 μžˆμœΌλ―€λ‘œ, λ‹€μŒκ³Ό 같이 각 쌍의 λͺ¨λ“  첫 번째 λ¬Έμž₯κ³Ό λͺ¨λ“  두 번째 λ¬Έμž₯을 직접 토큰화할 수 μžˆμŠ΅λ‹ˆλ‹€.',Ae,ls,Qe,_,Re,ns,Kt="ν•˜μ§€λ§Œ 두 μ‹œν€€μŠ€λ₯Ό λͺ¨λΈμ— μ „λ‹¬ν•˜κΈ°λ§Œ ν•΄μ„œλŠ” 두 λ¬Έμž₯이 νŒ¨λŸ¬ν”„λ ˆμ΄μ¦ˆμΈμ§€ μ•„λ‹Œμ§€ μ˜ˆμΈ‘ν•  수 μ—†μŠ΅λ‹ˆλ‹€. 두 μ‹œν€€μŠ€λ₯Ό 쌍으둜 μ²˜λ¦¬ν•˜κ³  μ μ ˆν•œ μ „μ²˜λ¦¬λ₯Ό μ μš©ν•΄μ•Ό ν•©λ‹ˆλ‹€. λ‹€ν–‰νžˆ ν† ν¬λ‚˜μ΄μ €λŠ” ν•œ 쌍의 μ‹œν€€μŠ€λ₯Ό λ°›μ•„μ„œ BERT λͺ¨λΈμ΄ κΈ°λŒ€ν•˜λŠ” λ°©μ‹μœΌλ‘œ μ€€λΉ„ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€.",Ze,as,ve,ps,Be,Ms,Ot='<a href="/course/chapter2">2μž₯</a>μ—μ„œ <code>input_ids</code>와 <code>attention_mask</code> 킀에 λŒ€ν•΄ λ…Όμ˜ν–ˆμ§€λ§Œ, <code>token_type_ids</code>에 λŒ€ν•œ μ΄μ•ΌκΈ°λŠ” λ―Έλ€„λ‘μ—ˆμŠ΅λ‹ˆλ‹€. 이 μ˜ˆμ œμ—μ„œ 이것은 μž…λ ₯의 μ–΄λŠ 뢀뢄이 첫 번째 λ¬Έμž₯이고 μ–΄λŠ 뢀뢄이 두 번째 λ¬Έμž₯인지 λͺ¨λΈμ— μ•Œλ €μ£ΌλŠ” 역할을 ν•©λ‹ˆλ‹€.',Ne,A,Ve,is,sl="<code>input_ids</code> μ•ˆμ˜ IDλ₯Ό λ‹€μ‹œ λ‹¨μ–΄λ‘œ λ””μ½”λ”©ν•˜λ©΄",Ge,rs,He,cs,el="λ‹€μŒμ„ μ–»μŠ΅λ‹ˆλ‹€.",Xe,ms,We,ys,tl="λ”°λΌμ„œ λͺ¨λΈμ€ 두 λ¬Έμž₯이 μžˆμ„ λ•Œ μž…λ ₯이 <code>[CLS] sentence1 [SEP] sentence2 [SEP]</code> ν˜•νƒœμ΄κΈ°λ₯Ό κΈ°λŒ€ν•œλ‹€λŠ” 것을 μ•Œ 수 μžˆμŠ΅λ‹ˆλ‹€. 이λ₯Ό <code>token_type_ids</code>와 맞좰보면",ze,js,Ee,us,ll="λ³΄μ‹œλ‹€μ‹œν”Ό, <code>[CLS] sentence1 [SEP]</code>에 ν•΄λ‹Ήν•˜λŠ” μž…λ ₯ 뢀뢄은 λͺ¨λ‘ 토큰 νƒ€μž… IDκ°€ <code>0</code>이고, <code>sentence2 [SEP]</code>에 ν•΄λ‹Ήν•˜λŠ” λ‹€λ₯Έ 뢀뢄듀은 λͺ¨λ‘ 토큰 νƒ€μž… IDκ°€ <code>1</code>μž…λ‹ˆλ‹€.",De,Js,nl="λ‹€λ₯Έ 체크포인트(checkpoint)λ₯Ό μ„ νƒν•˜λ©΄ ν† ν°ν™”λœ μž…λ ₯에 <code>token_type_ids</code>κ°€ λ°˜λ“œμ‹œ μžˆμ§€λŠ” μ•Šλ‹€λŠ” 점에 μ£Όμ˜ν•˜μ„Έμš”(예λ₯Ό λ“€μ–΄, DistilBERT λͺ¨λΈμ„ μ‚¬μš©ν•˜λ©΄ λ°˜ν™˜λ˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€). λͺ¨λΈμ΄ 사전 ν›ˆλ ¨ 쀑에 이λ₯Ό λ³Έ 적이 μžˆμ–΄μ„œ 무엇을 ν•΄μ•Ό ν• μ§€ μ•Œ λ•Œλ§Œ λ°˜ν™˜λ©λ‹ˆλ‹€.",Se,os,al='μ—¬κΈ°μ„œ BERTλŠ” 토큰 νƒ€μž… ID둜 사전 ν›ˆλ ¨λ˜μ—ˆμœΌλ©°, <a href="/course/chapter1">1μž₯</a>μ—μ„œ μ΄μ•ΌκΈ°ν•œ λ§ˆμŠ€ν¬λ“œ μ–Έμ–΄ λͺ¨λΈλ§ λͺ©ν‘œ 외에도 <em>λ‹€μŒ λ¬Έμž₯ 예츑</em>μ΄λΌλŠ” μΆ”κ°€ λͺ©ν‘œλ₯Ό κ°€μ§€κ³  μžˆμŠ΅λ‹ˆλ‹€. 이 μž‘μ—…μ˜ λͺ©ν‘œλŠ” λ¬Έμž₯ 쌍 κ°„μ˜ 관계λ₯Ό λͺ¨λΈλ§ν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€.',Le,Us,pl="λ‹€μŒ λ¬Έμž₯ μ˜ˆμΈ‘μ—μ„œλŠ” λͺ¨λΈμ— λ¬Έμž₯ 쌍(λ¬΄μž‘μœ„λ‘œ λ§ˆμŠ€ν‚Ήλœ 토큰과 ν•¨κ»˜)이 제곡되고 두 번째 λ¬Έμž₯이 첫 번째 λ¬Έμž₯을 λ”°λ₯΄λŠ”μ§€ μ˜ˆμΈ‘ν•˜λ„λ‘ μš”μ²­λ°›μŠ΅λ‹ˆλ‹€. μž‘μ—…μ„ 쉽지 μ•Šκ²Œ λ§Œλ“€κΈ° μœ„ν•΄, 절반의 κ²½μš°μ—λŠ” λ¬Έμž₯듀이 μΆ”μΆœλœ 원본 λ¬Έμ„œμ—μ„œ μ„œλ‘œλ₯Ό λ”°λ₯΄κ³ , λ‚˜λ¨Έμ§€ 절반의 κ²½μš°μ—λŠ” 두 λ¬Έμž₯이 μ„œλ‘œ λ‹€λ₯Έ λ¬Έμ„œμ—μ„œ λ‚˜μ˜΅λ‹ˆλ‹€.",Ye,$s,Ml="일반적으둜 ν† ν°ν™”λœ μž…λ ₯에 <code>token_type_ids</code>κ°€ μžˆλŠ”μ§€ 여뢀에 λŒ€ν•΄ κ±±μ •ν•  ν•„μš”λŠ” μ—†μŠ΅λ‹ˆλ‹€. ν† ν¬λ‚˜μ΄μ €μ™€ λͺ¨λΈμ— λ™μΌν•œ 체크포인트(checkpoint)λ₯Ό μ‚¬μš©ν•˜λŠ” ν•œ, ν† ν¬λ‚˜μ΄μ €κ°€ λͺ¨λΈμ— μ œκ³΅ν•΄μ•Ό ν•  것을 μ•Œκ³  μžˆμœΌλ―€λ‘œ λͺ¨λ“  것이 잘 될 κ²ƒμž…λ‹ˆλ‹€.",Fe,fs,il='이제 ν† ν¬λ‚˜μ΄μ €κ°€ ν•œ 쌍의 λ¬Έμž₯을 μ–΄λ–»κ²Œ μ²˜λ¦¬ν•  수 μžˆλŠ”μ§€ λ³΄μ•˜μœΌλ―€λ‘œ, 이λ₯Ό μ‚¬μš©ν•˜μ—¬ 전체 데이터 μ„ΈνŠΈλ₯Ό 토큰화할 수 μžˆμŠ΅λ‹ˆλ‹€: <a href="/course/chapter2">이전 챕터</a>μ—μ„œμ²˜λŸΌ, 첫 번째 λ¬Έμž₯ λͺ©λ‘κ³Ό 두 번째 λ¬Έμž₯ λͺ©λ‘μ„ μ œκ³΅ν•˜μ—¬ ν† ν¬λ‚˜μ΄μ €μ— λ¬Έμž₯ 쌍 λͺ©λ‘μ„ μž…λ ₯ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŠ” <a href="/course/chapter2">2μž₯</a>μ—μ„œ λ³Έ νŒ¨λ”©κ³Ό μƒλž΅ μ˜΅μ…˜κ³Όλ„ ν˜Έν™˜λ©λ‹ˆλ‹€. λ”°λΌμ„œ ν›ˆλ ¨ 데이터 μ„ΈνŠΈλ₯Ό μ „μ²˜λ¦¬ν•˜λŠ” ν•œ κ°€μ§€ 방법은',qe,Ts,Pe,ds,rl='이것은 잘 μž‘λ™ν•˜μ§€λ§Œ, λ”•μ…”λ„ˆλ¦¬(ν‚€λŠ” <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code>이고 값은 λͺ©λ‘μ˜ λͺ©λ‘)λ₯Ό λ°˜ν™˜ν•œλ‹€λŠ” 단점이 μžˆμŠ΅λ‹ˆλ‹€. λ˜ν•œ 토큰화 쀑에 전체 데이터 μ„ΈνŠΈλ₯Ό λ©”λͺ¨λ¦¬μ— μ €μž₯ν•  수 μžˆλŠ” μΆ©λΆ„ν•œ RAM이 μžˆλŠ” κ²½μš°μ—λ§Œ μž‘λ™ν•©λ‹ˆλ‹€(πŸ€— Datasets 라이브러리의 데이터 μ„ΈνŠΈλŠ” λ””μŠ€ν¬μ— μ €μž₯된 <a href="https://arrow.apache.org/" rel="nofollow">Apache Arrow</a> νŒŒμΌμ΄λ―€λ‘œ, μš”μ²­ν•œ μƒ˜ν”Œλ§Œ λ©”λͺ¨λ¦¬μ— λ‘œλ“œλ©λ‹ˆλ‹€).',Ke,hs,cl='데이터λ₯Ό 데이터 μ„ΈνŠΈλ‘œ μœ μ§€ν•˜λ €λ©΄ <a href="https://huggingface.co/docs/datasets/package_reference/main_classes#datasets.Dataset.map" rel="nofollow"><code>Dataset.map()</code></a> λ©”μ†Œλ“œλ₯Ό μ‚¬μš©ν•˜κ² μŠ΅λ‹ˆλ‹€. μ΄λŠ” 토큰화 μ΄μƒμ˜ μ „μ²˜λ¦¬κ°€ ν•„μš”ν•œ 경우 좔가적인 μœ μ—°μ„±λ„ μ œκ³΅ν•©λ‹ˆλ‹€. <code>map()</code> λ©”μ†Œλ“œλŠ” 데이터 μ„ΈνŠΈμ˜ 각 μš”μ†Œμ— ν•¨μˆ˜λ₯Ό μ μš©ν•˜μ—¬ μž‘λ™ν•˜λ―€λ‘œ, μž…λ ₯을 ν† ν°ν™”ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ •μ˜ν•΄λ³΄κ² μŠ΅λ‹ˆλ‹€.',Oe,ws,st,bs,ml='이 ν•¨μˆ˜λŠ” λ”•μ…”λ„ˆλ¦¬(데이터 μ„ΈνŠΈμ˜ ν•­λͺ©κ³Ό 같은)λ₯Ό λ°›μ•„μ„œ <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code> ν‚€κ°€ μžˆλŠ” μƒˆ λ”•μ…”λ„ˆλ¦¬λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€. <code>example</code> λ”•μ…”λ„ˆλ¦¬μ— μ—¬λŸ¬ μƒ˜ν”Œμ΄ ν¬ν•¨λ˜μ–΄ μžˆμ–΄λ„(각 ν‚€κ°€ λ¬Έμž₯ λͺ©λ‘μœΌλ‘œ) μž‘λ™ν•œλ‹€λŠ” 점에 μ£Όλͺ©ν•˜μ„Έμš”. μ•žμ„œ λ³Έ κ²ƒμ²˜λŸΌ <code>tokenizer</code>λŠ” λ¬Έμž₯ 쌍의 λͺ©λ‘μ—μ„œ μž‘λ™ν•˜κΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€. 이λ₯Ό 톡해 <code>map()</code> ν˜ΈμΆœμ—μ„œ <code>batched=True</code> μ˜΅μ…˜μ„ μ‚¬μš©ν•  수 있으며, μ΄λŠ” 토큰화λ₯Ό 크게 가속화할 κ²ƒμž…λ‹ˆλ‹€. <code>tokenizer</code>λŠ” <a href="https://github.com/huggingface/tokenizers" rel="nofollow">πŸ€— Tokenizers</a> 라이브러리의 Rust둜 μž‘μ„±λœ ν† ν¬λ‚˜μ΄μ €λ‘œ λ’·λ°›μΉ¨λ©λ‹ˆλ‹€. 이 ν† ν¬λ‚˜μ΄μ €λŠ” 맀우 λΉ λ₯Ό 수 μžˆμ§€λ§Œ, ν•œ λ²ˆμ— λ§Žμ€ μž…λ ₯을 μ œκ³΅ν•΄μ•Όλ§Œ κ·Έλ ‡μŠ΅λ‹ˆλ‹€.',et,xs,yl="μ§€κΈˆμ€ 토큰화 ν•¨μˆ˜μ—μ„œ <code>padding</code> 인수λ₯Ό λΉΌλ‘” 것에 μ£Όλͺ©ν•˜μ„Έμš”. λͺ¨λ“  μƒ˜ν”Œμ„ μ΅œλŒ€ 길이둜 νŒ¨λ”©ν•˜λŠ” 것은 νš¨μœ¨μ μ΄μ§€ μ•ŠκΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€. 배치λ₯Ό λ§Œλ“€ λ•Œ μƒ˜ν”Œμ„ νŒ¨λ”©ν•˜λŠ” 것이 더 μ’‹μŠ΅λ‹ˆλ‹€. 그러면 ν•΄λ‹Ή 배치의 μ΅œλŒ€ κΈΈμ΄κΉŒμ§€λ§Œ νŒ¨λ”©ν•˜λ©΄ 되고, 전체 데이터 μ„ΈνŠΈμ˜ μ΅œλŒ€ κΈΈμ΄κΉŒμ§€ νŒ¨λ”©ν•  ν•„μš”κ°€ μ—†κΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€. μž…λ ₯의 길이가 맀우 λ‹€μ–‘ν•  λ•Œ λ§Žμ€ μ‹œκ°„κ³Ό 처리 λŠ₯λ ₯을 μ ˆμ•½ν•  수 μžˆμŠ΅λ‹ˆλ‹€!",tt,Q,lt,gs,jl="λ‹€μŒμ€ λͺ¨λ“  데이터 μ„ΈνŠΈμ— 토큰화 ν•¨μˆ˜λ₯Ό ν•œ λ²ˆμ— μ μš©ν•˜λŠ” λ°©λ²•μž…λ‹ˆλ‹€. <code>map</code> ν˜ΈμΆœμ—μ„œ <code>batched=True</code>λ₯Ό μ‚¬μš©ν•˜λ―€λ‘œ ν•¨μˆ˜κ°€ 데이터 μ„ΈνŠΈμ˜ 각 μš”μ†Œμ— κ°œλ³„μ μœΌλ‘œκ°€ μ•„λ‹ˆλΌ μ—¬λŸ¬ μš”μ†Œμ— ν•œ λ²ˆμ— μ μš©λ©λ‹ˆλ‹€. 이λ₯Ό 톡해 더 λΉ λ₯Έ μ „μ²˜λ¦¬κ°€ κ°€λŠ₯ν•©λ‹ˆλ‹€.",nt,Cs,at,ks,ul="πŸ€— Datasets λΌμ΄λΈŒλŸ¬λ¦¬κ°€ 이 처리λ₯Ό μ μš©ν•˜λŠ” 방식은 μ „μ²˜λ¦¬ ν•¨μˆ˜κ°€ λ°˜ν™˜ν•˜λŠ” λ”•μ…”λ„ˆλ¦¬μ˜ 각 킀에 λŒ€ν•΄ 데이터 μ„ΈνŠΈμ— μƒˆ ν•„λ“œλ₯Ό μΆ”κ°€ν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€.",pt,Is,Mt,_s,Jl="<code>num_proc</code> 인수λ₯Ό μ „λ‹¬ν•˜μ—¬ <code>map()</code>으둜 μ „μ²˜λ¦¬ ν•¨μˆ˜λ₯Ό μ μš©ν•  λ•Œ λ©€ν‹°ν”„λ‘œμ„Έμ‹±μ„ μ‚¬μš©ν•  μˆ˜λ„ μžˆμŠ΅λ‹ˆλ‹€. πŸ€— Tokenizers λΌμ΄λΈŒλŸ¬λ¦¬κ°€ 이미 μ—¬λŸ¬ μŠ€λ ˆλ“œλ₯Ό μ‚¬μš©ν•˜μ—¬ μƒ˜ν”Œμ„ 더 λΉ λ₯΄κ²Œ ν† ν°ν™”ν•˜λ―€λ‘œ μ—¬κΈ°μ„œλŠ” 이λ₯Ό μ‚¬μš©ν•˜μ§€ μ•Šμ•˜μ§€λ§Œ, 이 λΌμ΄λΈŒλŸ¬λ¦¬κ°€ λ’·λ°›μΉ¨ν•˜λŠ” λΉ λ₯Έ ν† ν¬λ‚˜μ΄μ €λ₯Ό μ‚¬μš©ν•˜μ§€ μ•ŠλŠ”λ‹€λ©΄ μ „μ²˜λ¦¬ 속도λ₯Ό 높일 수 μžˆμŠ΅λ‹ˆλ‹€.",it,As,ol="우리의 <code>tokenize_function</code>은 <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code> ν‚€κ°€ μžˆλŠ” λ”•μ…”λ„ˆλ¦¬λ₯Ό λ°˜ν™˜ν•˜λ―€λ‘œ, 이 μ„Έ ν•„λ“œκ°€ 데이터 μ„ΈνŠΈμ˜ λͺ¨λ“  뢄할에 μΆ”κ°€λ©λ‹ˆλ‹€. μ „μ²˜λ¦¬ ν•¨μˆ˜κ°€ <code>map()</code>을 μ μš©ν•œ 데이터 μ„ΈνŠΈμ˜ κΈ°μ‘΄ 킀에 λŒ€ν•œ μƒˆ 값을 λ°˜ν™˜ν•œλ‹€λ©΄ κΈ°μ‘΄ ν•„λ“œλ₯Ό λ³€κ²½ν•  μˆ˜λ„ μžˆμ—ˆμ„ κ²ƒμž…λ‹ˆλ‹€.",rt,Qs,Ul="λ§ˆμ§€λ§‰μœΌλ‘œ ν•΄μ•Ό ν•  일은 μš”μ†Œλ“€μ„ 배치둜 묢을 λ•Œ λͺ¨λ“  예제λ₯Ό κ°€μž₯ κΈ΄ μš”μ†Œμ˜ 길이둜 νŒ¨λ”©ν•˜λŠ” κ²ƒμž…λ‹ˆλ‹€ β€” 이 기법을 <em>동적 νŒ¨λ”©</em>이라고 ν•©λ‹ˆλ‹€.",ct,Rs,mt,Zs,yt,vs,$l="배치 λ‚΄μ—μ„œ μƒ˜ν”Œλ“€μ„ ν•¨κ»˜ λ°°μΉ˜ν•˜λŠ” 역할을 ν•˜λŠ” ν•¨μˆ˜λ₯Ό <em>collate function</em>이라고 ν•©λ‹ˆλ‹€. μ΄λŠ” <code>DataLoader</code>λ₯Ό ꡬ좕할 λ•Œ 전달할 수 μžˆλŠ” 인수둜, 기본값은 μƒ˜ν”Œμ„ PyTorch ν…μ„œλ‘œ λ³€ν™˜ν•˜κ³  μ—°κ²°ν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€(μš”μ†Œκ°€ λͺ©λ‘, νŠœν”Œ λ˜λŠ” λ”•μ…”λ„ˆλ¦¬μΈ 경우 μž¬κ·€μ μœΌλ‘œ). 우리의 경우 μž…λ ₯이 λͺ¨λ‘ 같은 크기가 μ•„λ‹ˆλ―€λ‘œ 이것은 λΆˆκ°€λŠ₯ν•  κ²ƒμž…λ‹ˆλ‹€. μš°λ¦¬λŠ” μ˜λ„μ μœΌλ‘œ νŒ¨λ”©μ„ μ—°κΈ°ν•˜μ—¬ 각 λ°°μΉ˜μ—μ„œλ§Œ ν•„μš”μ— 따라 μ μš©ν•˜κ³  λ§Žμ€ νŒ¨λ”©μ΄ μžˆλŠ” μ§€λ‚˜μΉ˜κ²Œ κΈ΄ μž…λ ₯을 ν”Όν–ˆμŠ΅λ‹ˆλ‹€. 이것은 ν›ˆλ ¨μ„ μƒλ‹Ήνžˆ 가속화할 κ²ƒμ΄μ§€λ§Œ, TPUμ—μ„œ ν›ˆλ ¨ν•˜λŠ” 경우 문제λ₯Ό μΌμœΌν‚¬ 수 μžˆλ‹€λŠ” 점에 μ£Όμ˜ν•˜μ„Έμš” β€” TPUλŠ” μΆ”κ°€ νŒ¨λ”©μ΄ ν•„μš”ν•˜λ”λΌλ„ κ³ μ •λœ λͺ¨μ–‘을 μ„ ν˜Έν•©λ‹ˆλ‹€.",jt,R,ut,Bs,fl="μ‹€μ œλ‘œ 이λ₯Ό μˆ˜ν–‰ν•˜λ €λ©΄ ν•¨κ»˜ λ°°μΉ˜ν•˜λ €λŠ” 데이터 μ„ΈνŠΈ ν•­λͺ©μ— μ μ ˆν•œ μ–‘μ˜ νŒ¨λ”©μ„ μ μš©ν•  collate function을 μ •μ˜ν•΄μ•Ό ν•©λ‹ˆλ‹€. λ‹€ν–‰νžˆ πŸ€— Transformers λΌμ΄λΈŒλŸ¬λ¦¬λŠ” <code>DataCollatorWithPadding</code>을 톡해 μ΄λŸ¬ν•œ ν•¨μˆ˜λ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€. μΈμŠ€ν„΄μŠ€ν™”ν•  λ•Œ ν† ν¬λ‚˜μ΄μ €λ₯Ό λ°›μ•„μ„œ(μ–΄λ–€ νŒ¨λ”© 토큰을 μ‚¬μš©ν• μ§€, λͺ¨λΈμ΄ μž…λ ₯의 μ™Όμͺ½ λ˜λŠ” 였λ₯Έμͺ½μ— νŒ¨λ”©μ„ κΈ°λŒ€ν•˜λŠ”μ§€ μ•ŒκΈ° μœ„ν•΄) ν•„μš”ν•œ λͺ¨λ“  것을 μˆ˜ν–‰ν•©λ‹ˆλ‹€.",Jt,Ns,ot,Vs,Tl="이 μƒˆλ‘œμš΄ 도ꡬλ₯Ό ν…ŒμŠ€νŠΈν•˜κΈ° μœ„ν•΄, ν•¨κ»˜ λ°°μΉ˜ν•˜κ³  싢은 ν›ˆλ ¨ μ„ΈνŠΈμ—μ„œ λͺ‡ 개의 μƒ˜ν”Œμ„ κ°€μ Έμ™€λ³΄κ² μŠ΅λ‹ˆλ‹€. μ—¬κΈ°μ„œλŠ” <code>idx</code>, <code>sentence1</code>, <code>sentence2</code> 열을 μ œκ±°ν•©λ‹ˆλ‹€. 이듀은 ν•„μš”ν•˜μ§€ μ•Šκ³  λ¬Έμžμ—΄μ„ ν¬ν•¨ν•˜κ³  있으며(λ¬Έμžμ—΄λ‘œλŠ” ν…μ„œλ₯Ό λ§Œλ“€ 수 μ—†μŒ), 배치의 각 ν•­λͺ© 길이λ₯Ό μ‚΄νŽ΄λ³΄κ² μŠ΅λ‹ˆλ‹€.",Ut,Gs,$t,Hs,ft,Xs,dl="λ‹Ήμ—°νžˆ 32λΆ€ν„° 67κΉŒμ§€ λ‹€μ–‘ν•œ 길이의 μƒ˜ν”Œμ„ μ–»μŠ΅λ‹ˆλ‹€. 동적 νŒ¨λ”©μ€ 이 배치의 μƒ˜ν”Œλ“€μ΄ λͺ¨λ‘ 배치 λ‚΄ μ΅œλŒ€ 길이인 67둜 νŒ¨λ”©λ˜μ–΄μ•Ό 함을 μ˜λ―Έν•©λ‹ˆλ‹€. 동적 νŒ¨λ”©μ΄ μ—†λ‹€λ©΄, λͺ¨λ“  μƒ˜ν”Œμ΄ 전체 데이터 μ„ΈνŠΈμ˜ μ΅œλŒ€ κΈΈμ΄λ‚˜ λͺ¨λΈμ΄ 받을 수 μžˆλŠ” μ΅œλŒ€ 길이둜 νŒ¨λ”©λ˜μ–΄μ•Ό ν•  κ²ƒμž…λ‹ˆλ‹€. <code>data_collator</code>κ°€ 배치λ₯Ό λ™μ μœΌλ‘œ μ˜¬λ°”λ₯΄κ²Œ νŒ¨λ”©ν•˜λŠ”μ§€ λ‹€μ‹œ ν™•μΈν•΄λ³΄κ² μŠ΅λ‹ˆλ‹€.",Tt,Ws,dt,zs,ht,Es,hl="μ’‹μ•„ λ³΄μž…λ‹ˆλ‹€! 이제 μ›μ‹œ ν…μŠ€νŠΈμ—μ„œ λͺ¨λΈμ΄ μ²˜λ¦¬ν•  수 μžˆλŠ” λ°°μΉ˜κΉŒμ§€ λ§Œλ“€μ—ˆμœΌλ―€λ‘œ, λ―Έμ„Έ μ‘°μ •ν•  μ€€λΉ„κ°€ λ˜μ—ˆμŠ΅λ‹ˆλ‹€!",wt,Z,bt,Ds,wl="μ™„λ²½ν•©λ‹ˆλ‹€! 이제 πŸ€— Datasets 라이브러리의 μ΅œμ‹  λͺ¨λ²” μ‚¬λ‘€λ‘œ 데이터λ₯Ό μ „μ²˜λ¦¬ν–ˆμœΌλ―€λ‘œ, μ΅œμ‹  Trainer APIλ₯Ό μ‚¬μš©ν•˜μ—¬ λͺ¨λΈμ„ ν›ˆλ ¨ν•  μ€€λΉ„κ°€ λ˜μ—ˆμŠ΅λ‹ˆλ‹€. λ‹€μŒ μ„Ήμ…˜μ—μ„œλŠ” Hugging Face μƒνƒœκ³„μ—μ„œ μ‚¬μš©ν•  수 μžˆλŠ” μ΅œμ‹  κΈ°λŠ₯κ³Ό μ΅œμ ν™”λ₯Ό μ‚¬μš©ν•˜μ—¬ λͺ¨λΈμ„ 효과적으둜 λ―Έμ„Έ μ‘°μ •ν•˜λŠ” 방법을 λ³΄μ—¬λ“œλ¦¬κ² μŠ΅λ‹ˆλ‹€.",xt,Ss,gt,Ls,bl="데이터 처리 κ°œλ…μ— λŒ€ν•œ 이해도λ₯Ό ν…ŒμŠ€νŠΈν•΄λ³΄μ„Έμš”.",Ct,Ys,kt,Fs,It,qs,_t,Ps,At,Ks,Qt,Os,Rt,se,Zt,ee,vt,te,Bt,le,Nt,v,Vt,ne,Gt,ae,Ht;return w=new b({props:{title:"데이터 처리",local:"processing-the-data",headingTag:"h1"}}),h=new Wl({props:{chapter:3,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter3/section2.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter3/section2.ipynb"}]}}),N=new T({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdG9yY2gub3B0aW0lMjBpbXBvcnQlMjBBZGFtVyUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvVG9rZW5pemVyJTJDJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQSUyMyUyMCVFQyU5RCVCNCVFQyVBMCU4NCVFQSVCMyVCQyUyMCVFQiU4RiU5OSVFQyU5RCVCQyUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJiZXJ0LWJhc2UtdW5jYXNlZCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQXNlcXVlbmNlcyUyMCUzRCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMkkndmUlMjBiZWVuJTIwd2FpdGluZyUyMGZvciUyMGElMjBIdWdnaW5nRmFjZSUyMGNvdXJzZSUyMG15JTIwd2hvbGUlMjBsaWZlLiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMlRoaXMlMjBjb3Vyc2UlMjBpcyUyMGFtYXppbmchJTIyJTJDJTBBJTVEJTBBYmF0Y2glMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwJUVDJTk3JUFDJUVBJUI4JUIwJUVBJUIwJTgwJTIwJUVDJTgzJTg4JUVCJUExJTlDJUVDJTlBJUI0JTIwJUVCJUI2JTgwJUVCJUI2JTg0JTBBYmF0Y2glNUIlMjJsYWJlbHMlMjIlNUQlMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCMSUyQyUyMDElNUQpJTBBJTBBb3B0aW1pemVyJTIwJTNEJTIwQWRhbVcobW9kZWwucGFyYW1ldGVycygpKSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmJhdGNoKS5sb3NzJTBBbG9zcy5iYWNrd2FyZCgpJTBBb3B0aW1pemVyLnN0ZXAoKQ==",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification
<span class="hljs-comment"># 이전과 동일</span>
checkpoint = <span class="hljs-string">&quot;bert-base-uncased&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [
<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>,
<span class="hljs-string">&quot;This course is amazing!&quot;</span>,
]
batch = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-comment"># μ—¬κΈ°κ°€ μƒˆλ‘œμš΄ λΆ€λΆ„</span>
batch[<span class="hljs-string">&quot;labels&quot;</span>] = torch.tensor([<span class="hljs-number">1</span>, <span class="hljs-number">1</span>])
optimizer = AdamW(model.parameters())
loss = model(**batch).loss
loss.backward()
optimizer.step()`,wrap:!1}}),H=new b({props:{title:"Hubμ—μ„œ 데이터 μ„ΈνŠΈ κ°€μ Έμ˜€κΈ°",local:"loading-a-dataset-from-the-hub",headingTag:"h3"}}),X=new Xt({props:{id:"_BZearw7f0w"}}),C=new x({props:{$$slots:{default:[El]},$$scope:{ctx:d}}}),E=new T({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBcmF3X2RhdGFzZXRzJTIwJTNEJTIwbG9hZF9kYXRhc2V0KCUyMmdsdWUlMjIlMkMlMjAlMjJtcnBjJTIyKSUwQXJhd19kYXRhc2V0cw==",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
raw_datasets = load_dataset(<span class="hljs-string">&quot;glue&quot;</span>, <span class="hljs-string">&quot;mrpc&quot;</span>)
raw_datasets`,wrap:!1}}),D=new T({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCdsYWJlbCclMkMlMjAnaWR4JyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwMzY2OCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB2YWxpZGF0aW9uJTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidzZW50ZW5jZTEnJTJDJTIwJ3NlbnRlbmNlMiclMkMlMjAnbGFiZWwnJTJDJTIwJ2lkeCclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDQwOCUwQSUyMCUyMCUyMCUyMCU3RCklMEElMjAlMjAlMjAlMjB0ZXN0JTNBJTIwRGF0YXNldCglN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmZWF0dXJlcyUzQSUyMCU1QidzZW50ZW5jZTEnJTJDJTIwJ3NlbnRlbmNlMiclMkMlMjAnbGFiZWwnJTJDJTIwJ2lkeCclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDE3MjUlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>],
num_rows: <span class="hljs-number">3668</span>
})
validation: Dataset({
features: [<span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>],
num_rows: <span class="hljs-number">408</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>],
num_rows: <span class="hljs-number">1725</span>
})
})`,wrap:!1}}),k=new x({props:{$$slots:{default:[Dl]},$$scope:{ctx:d}}}),Y=new T({props:{code:"cmF3X3RyYWluX2RhdGFzZXQlMjAlM0QlMjByYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCUwQXJhd190cmFpbl9kYXRhc2V0JTVCMCU1RA==",highlighted:`raw_train_dataset = raw_datasets[<span class="hljs-string">&quot;train&quot;</span>]
raw_train_dataset[<span class="hljs-number">0</span>]`,wrap:!1}}),F=new T({props:{code:"JTdCJ2lkeCclM0ElMjAwJTJDJTBBJTIwJ2xhYmVsJyUzQSUyMDElMkMlMEElMjAnc2VudGVuY2UxJyUzQSUyMCdBbXJvemklMjBhY2N1c2VkJTIwaGlzJTIwYnJvdGhlciUyMCUyQyUyMHdob20lMjBoZSUyMGNhbGxlZCUyMCUyMiUyMHRoZSUyMHdpdG5lc3MlMjAlMjIlMjAlMkMlMjBvZiUyMGRlbGliZXJhdGVseSUyMGRpc3RvcnRpbmclMjBoaXMlMjBldmlkZW5jZSUyMC4nJTJDJTBBJTIwJ3NlbnRlbmNlMiclM0ElMjAnUmVmZXJyaW5nJTIwdG8lMjBoaW0lMjBhcyUyMG9ubHklMjAlMjIlMjB0aGUlMjB3aXRuZXNzJTIwJTIyJTIwJTJDJTIwQW1yb3ppJTIwYWNjdXNlZCUyMGhpcyUyMGJyb3RoZXIlMjBvZiUyMGRlbGliZXJhdGVseSUyMGRpc3RvcnRpbmclMjBoaXMlMjBldmlkZW5jZSUyMC4nJTdE",highlighted:`{<span class="hljs-string">&#x27;idx&#x27;</span>: <span class="hljs-number">0</span>,
<span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-number">1</span>,
<span class="hljs-string">&#x27;sentence1&#x27;</span>: <span class="hljs-string">&#x27;Amrozi accused his brother , whom he called &quot; the witness &quot; , of deliberately distorting his evidence .&#x27;</span>,
<span class="hljs-string">&#x27;sentence2&#x27;</span>: <span class="hljs-string">&#x27;Referring to him as only &quot; the witness &quot; , Amrozi accused his brother of deliberately distorting his evidence .&#x27;</span>}`,wrap:!1}}),P=new T({props:{code:"cmF3X3RyYWluX2RhdGFzZXQuZmVhdHVyZXM=",highlighted:"raw_train_dataset.features",wrap:!1}}),K=new T({props:{code:"JTdCJ3NlbnRlbmNlMSclM0ElMjBWYWx1ZShkdHlwZSUzRCdzdHJpbmcnJTJDJTIwaWQlM0ROb25lKSUyQyUwQSUyMCdzZW50ZW5jZTInJTNBJTIwVmFsdWUoZHR5cGUlM0Qnc3RyaW5nJyUyQyUyMGlkJTNETm9uZSklMkMlMEElMjAnbGFiZWwnJTNBJTIwQ2xhc3NMYWJlbChudW1fY2xhc3NlcyUzRDIlMkMlMjBuYW1lcyUzRCU1Qidub3RfZXF1aXZhbGVudCclMkMlMjAnZXF1aXZhbGVudCclNUQlMkMlMjBuYW1lc19maWxlJTNETm9uZSUyQyUyMGlkJTNETm9uZSklMkMlMEElMjAnaWR4JyUzQSUyMFZhbHVlKGR0eXBlJTNEJ2ludDMyJyUyQyUyMGlkJTNETm9uZSklN0Q=",highlighted:`{<span class="hljs-string">&#x27;sentence1&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;string&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;sentence2&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;string&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;label&#x27;</span>: ClassLabel(num_classes=<span class="hljs-number">2</span>, names=[<span class="hljs-string">&#x27;not_equivalent&#x27;</span>, <span class="hljs-string">&#x27;equivalent&#x27;</span>], names_file=<span class="hljs-literal">None</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;idx&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;int32&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>)}`,wrap:!1}}),I=new x({props:{$$slots:{default:[Sl]},$$scope:{ctx:d}}}),ss=new b({props:{title:"데이터 μ„ΈνŠΈ μ „μ²˜λ¦¬",local:"preprocessing-a-dataset",headingTag:"h3"}}),es=new Xt({props:{id:"0u3ioSwev3s"}}),ls=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyYmVydC1iYXNlLXVuY2FzZWQlMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQXRva2VuaXplZF9zZW50ZW5jZXNfMSUyMCUzRCUyMHRva2VuaXplcihyYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCU1QiUyMnNlbnRlbmNlMSUyMiU1RCklMEF0b2tlbml6ZWRfc2VudGVuY2VzXzIlMjAlM0QlMjB0b2tlbml6ZXIocmF3X2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUIlMjJzZW50ZW5jZTIlMjIlNUQp",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
checkpoint = <span class="hljs-string">&quot;bert-base-uncased&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenized_sentences_1 = tokenizer(raw_datasets[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-string">&quot;sentence1&quot;</span>])
tokenized_sentences_2 = tokenizer(raw_datasets[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-string">&quot;sentence2&quot;</span>])`,wrap:!1}}),_=new x({props:{$$slots:{default:[Ll]},$$scope:{ctx:d}}}),as=new T({props:{code:"aW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKCUyMlRoaXMlMjBpcyUyMHRoZSUyMGZpcnN0JTIwc2VudGVuY2UuJTIyJTJDJTIwJTIyVGhpcyUyMGlzJTIwdGhlJTIwc2Vjb25kJTIwb25lLiUyMiklMEFpbnB1dHM=",highlighted:`inputs = tokenizer(<span class="hljs-string">&quot;This is the first sentence.&quot;</span>, <span class="hljs-string">&quot;This is the second one.&quot;</span>)
inputs`,wrap:!1}}),ps=new T({props:{code:"JTdCJTIwJTBBJTIwJTIwJ2lucHV0X2lkcyclM0ElMjAlNUIxMDElMkMlMjAyMDIzJTJDJTIwMjAwMyUyQyUyMDE5OTYlMkMlMjAyMDM0JTJDJTIwNjI1MSUyQyUyMDEwMTIlMkMlMjAxMDIlMkMlMjAyMDIzJTJDJTIwMjAwMyUyQyUyMDE5OTYlMkMlMjAyMTE3JTJDJTIwMjAyOCUyQyUyMDEwMTIlMkMlMjAxMDIlNUQlMkMlMEElMjAlMjAndG9rZW5fdHlwZV9pZHMnJTNBJTIwJTVCMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTVEJTJDJTBBJTIwJTIwJ2F0dGVudGlvbl9tYXNrJyUzQSUyMCU1QjElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSU1RCUwQSU3RA==",highlighted:`{
<span class="hljs-string">&#x27;input_ids&#x27;</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2023</span>, <span class="hljs-number">2003</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">2034</span>, <span class="hljs-number">6251</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>, <span class="hljs-number">2023</span>, <span class="hljs-number">2003</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">2117</span>, <span class="hljs-number">2028</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]
}`,wrap:!1}}),A=new x({props:{$$slots:{default:[Yl]},$$scope:{ctx:d}}}),rs=new T({props:{code:"dG9rZW5pemVyLmNvbnZlcnRfaWRzX3RvX3Rva2VucyhpbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQp",highlighted:'tokenizer.convert_ids_to_tokens(inputs[<span class="hljs-string">&quot;input_ids&quot;</span>])',wrap:!1}}),ms=new T({props:{code:"JTVCJyU1QkNMUyU1RCclMkMlMjAndGhpcyclMkMlMjAnaXMnJTJDJTIwJ3RoZSclMkMlMjAnZmlyc3QnJTJDJTIwJ3NlbnRlbmNlJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTJDJTIwJ3RoaXMnJTJDJTIwJ2lzJyUyQyUyMCd0aGUnJTJDJTIwJ3NlY29uZCclMkMlMjAnb25lJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTVE",highlighted:'[<span class="hljs-string">&#x27;[CLS]&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;first&#x27;</span>, <span class="hljs-string">&#x27;sentence&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;second&#x27;</span>, <span class="hljs-string">&#x27;one&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>]',wrap:!1}}),js=new T({props:{code:"JTVCJyU1QkNMUyU1RCclMkMlMjAndGhpcyclMkMlMjAnaXMnJTJDJTIwJ3RoZSclMkMlMjAnZmlyc3QnJTJDJTIwJ3NlbnRlbmNlJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTJDJTIwJ3RoaXMnJTJDJTIwJ2lzJyUyQyUyMCd0aGUnJTJDJTIwJ3NlY29uZCclMkMlMjAnb25lJyUyQyUyMCcuJyUyQyUyMCclNUJTRVAlNUQnJTVEJTBBJTVCJTIwJTIwJTIwJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAwJTJDJTIwJTIwJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAwJTJDJTIwJTIwJTIwMCUyQyUyMCUyMCUyMCUyMCUyMCUyMCUyMDAlMkMlMjAlMjAlMjAlMjAlMjAlMjAxJTJDJTIwJTIwJTIwJTIwMSUyQyUyMCUyMCUyMCUyMCUyMDElMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAxJTJDJTIwJTIwJTIwJTIwJTIwMSUyQyUyMCUyMCUyMDElMkMlMjAlMjAlMjAlMjAlMjAlMjAlMjAxJTVE",highlighted:`[<span class="hljs-string">&#x27;[CLS]&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;first&#x27;</span>, <span class="hljs-string">&#x27;sentence&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;second&#x27;</span>, <span class="hljs-string">&#x27;one&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>]
[ <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]`,wrap:!1}}),Ts=new T({props:{code:"dG9rZW5pemVkX2RhdGFzZXQlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwcmF3X2RhdGFzZXRzJTVCJTIydHJhaW4lMjIlNUQlNUIlMjJzZW50ZW5jZTElMjIlNUQlMkMlMEElMjAlMjAlMjAlMjByYXdfZGF0YXNldHMlNUIlMjJ0cmFpbiUyMiU1RCU1QiUyMnNlbnRlbmNlMiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMHBhZGRpbmclM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwdHJ1bmNhdGlvbiUzRFRydWUlMkMlMEEp",highlighted:`tokenized_dataset = tokenizer(
raw_datasets[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-string">&quot;sentence1&quot;</span>],
raw_datasets[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-string">&quot;sentence2&quot;</span>],
padding=<span class="hljs-literal">True</span>,
truncation=<span class="hljs-literal">True</span>,
)`,wrap:!1}}),ws=new T({props:{code:"ZGVmJTIwdG9rZW5pemVfZnVuY3Rpb24oZXhhbXBsZSklM0ElMEElMjAlMjAlMjAlMjByZXR1cm4lMjB0b2tlbml6ZXIoZXhhbXBsZSU1QiUyMnNlbnRlbmNlMSUyMiU1RCUyQyUyMGV4YW1wbGUlNUIlMjJzZW50ZW5jZTIlMjIlNUQlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSk=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_function</span>(<span class="hljs-params">example</span>):
<span class="hljs-keyword">return</span> tokenizer(example[<span class="hljs-string">&quot;sentence1&quot;</span>], example[<span class="hljs-string">&quot;sentence2&quot;</span>], truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),Q=new x({props:{$$slots:{default:[Fl]},$$scope:{ctx:d}}}),Cs=new T({props:{code:"dG9rZW5pemVkX2RhdGFzZXRzJTIwJTNEJTIwcmF3X2RhdGFzZXRzLm1hcCh0b2tlbml6ZV9mdW5jdGlvbiUyQyUyMGJhdGNoZWQlM0RUcnVlKSUwQXRva2VuaXplZF9kYXRhc2V0cw==",highlighted:`tokenized_datasets = raw_datasets.<span class="hljs-built_in">map</span>(tokenize_function, batched=<span class="hljs-literal">True</span>)
tokenized_datasets`,wrap:!1}}),Is=new T({props:{code:"RGF0YXNldERpY3QoJTdCJTBBJTIwJTIwJTIwJTIwdHJhaW4lM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpZHgnJTJDJTIwJ2lucHV0X2lkcyclMkMlMjAnbGFiZWwnJTJDJTIwJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCd0b2tlbl90eXBlX2lkcyclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDM2NjglMEElMjAlMjAlMjAlMjAlN0QpJTBBJTIwJTIwJTIwJTIwdmFsaWRhdGlvbiUzQSUyMERhdGFzZXQoJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZmVhdHVyZXMlM0ElMjAlNUInYXR0ZW50aW9uX21hc2snJTJDJTIwJ2lkeCclMkMlMjAnaW5wdXRfaWRzJyUyQyUyMCdsYWJlbCclMkMlMjAnc2VudGVuY2UxJyUyQyUyMCdzZW50ZW5jZTInJTJDJTIwJ3Rva2VuX3R5cGVfaWRzJyU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG51bV9yb3dzJTNBJTIwNDA4JTBBJTIwJTIwJTIwJTIwJTdEKSUwQSUyMCUyMCUyMCUyMHRlc3QlM0ElMjBEYXRhc2V0KCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGZlYXR1cmVzJTNBJTIwJTVCJ2F0dGVudGlvbl9tYXNrJyUyQyUyMCdpZHgnJTJDJTIwJ2lucHV0X2lkcyclMkMlMjAnbGFiZWwnJTJDJTIwJ3NlbnRlbmNlMSclMkMlMjAnc2VudGVuY2UyJyUyQyUyMCd0b2tlbl90eXBlX2lkcyclNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBudW1fcm93cyUzQSUyMDE3MjUlMEElMjAlMjAlMjAlMjAlN0QpJTBBJTdEKQ==",highlighted:`DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;attention_mask&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>, <span class="hljs-string">&#x27;input_ids&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;token_type_ids&#x27;</span>],
num_rows: <span class="hljs-number">3668</span>
})
validation: Dataset({
features: [<span class="hljs-string">&#x27;attention_mask&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>, <span class="hljs-string">&#x27;input_ids&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;token_type_ids&#x27;</span>],
num_rows: <span class="hljs-number">408</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;attention_mask&#x27;</span>, <span class="hljs-string">&#x27;idx&#x27;</span>, <span class="hljs-string">&#x27;input_ids&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>, <span class="hljs-string">&#x27;sentence1&#x27;</span>, <span class="hljs-string">&#x27;sentence2&#x27;</span>, <span class="hljs-string">&#x27;token_type_ids&#x27;</span>],
num_rows: <span class="hljs-number">1725</span>
})
})`,wrap:!1}}),Rs=new b({props:{title:"동적 νŒ¨λ”©",local:"dynamic-padding",headingTag:"h5"}}),Zs=new Xt({props:{id:"7q5NyFT8REg"}}),R=new x({props:{$$slots:{default:[ql]},$$scope:{ctx:d}}}),Ns=new T({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nJTBBJTBBZGF0YV9jb2xsYXRvciUyMCUzRCUyMERhdGFDb2xsYXRvcldpdGhQYWRkaW5nKHRva2VuaXplciUzRHRva2VuaXplcik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)`,wrap:!1}}),Gs=new T({props:{code:"c2FtcGxlcyUyMCUzRCUyMHRva2VuaXplZF9kYXRhc2V0cyU1QiUyMnRyYWluJTIyJTVEJTVCJTNBOCU1RCUwQXNhbXBsZXMlMjAlM0QlMjAlN0JrJTNBJTIwdiUyMGZvciUyMGslMkMlMjB2JTIwaW4lMjBzYW1wbGVzLml0ZW1zKCklMjBpZiUyMGslMjBub3QlMjBpbiUyMCU1QiUyMmlkeCUyMiUyQyUyMCUyMnNlbnRlbmNlMSUyMiUyQyUyMCUyMnNlbnRlbmNlMiUyMiU1RCU3RCUwQSU1Qmxlbih4KSUyMGZvciUyMHglMjBpbiUyMHNhbXBsZXMlNUIlMjJpbnB1dF9pZHMlMjIlNUQlNUQ=",highlighted:`samples = tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>][:<span class="hljs-number">8</span>]
samples = {k: v <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> samples.items() <span class="hljs-keyword">if</span> k <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> [<span class="hljs-string">&quot;idx&quot;</span>, <span class="hljs-string">&quot;sentence1&quot;</span>, <span class="hljs-string">&quot;sentence2&quot;</span>]}
[<span class="hljs-built_in">len</span>(x) <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> samples[<span class="hljs-string">&quot;input_ids&quot;</span>]]`,wrap:!1}}),Hs=new T({props:{code:"JTVCNTAlMkMlMjA1OSUyQyUyMDQ3JTJDJTIwNjclMkMlMjA1OSUyQyUyMDUwJTJDJTIwNjIlMkMlMjAzMiU1RA==",highlighted:'[<span class="hljs-number">50</span>, <span class="hljs-number">59</span>, <span class="hljs-number">47</span>, <span class="hljs-number">67</span>, <span class="hljs-number">59</span>, <span class="hljs-number">50</span>, <span class="hljs-number">62</span>, <span class="hljs-number">32</span>]',wrap:!1}}),Ws=new T({props:{code:"YmF0Y2glMjAlM0QlMjBkYXRhX2NvbGxhdG9yKHNhbXBsZXMpJTBBJTdCayUzQSUyMHYuc2hhcGUlMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwYmF0Y2guaXRlbXMoKSU3RA==",highlighted:`batch = data_collator(samples)
{k: v.shape <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> batch.items()}`,wrap:!1}}),zs=new T({props:{code:"JTdCJ2F0dGVudGlvbl9tYXNrJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY3JTVEKSUyQyUwQSUyMCdpbnB1dF9pZHMnJTNBJTIwdG9yY2guU2l6ZSglNUI4JTJDJTIwNjclNUQpJTJDJTBBJTIwJ3Rva2VuX3R5cGVfaWRzJyUzQSUyMHRvcmNoLlNpemUoJTVCOCUyQyUyMDY3JTVEKSUyQyUwQSUyMCdsYWJlbHMnJTNBJTIwdG9yY2guU2l6ZSglNUI4JTVEKSU3RA==",highlighted:`{<span class="hljs-string">&#x27;attention_mask&#x27;</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]),
<span class="hljs-string">&#x27;input_ids&#x27;</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]),
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: torch.Size([<span class="hljs-number">8</span>, <span class="hljs-number">67</span>]),
<span class="hljs-string">&#x27;labels&#x27;</span>: torch.Size([<span class="hljs-number">8</span>])}`,wrap:!1}}),Z=new x({props:{$$slots:{default:[Pl]},$$scope:{ctx:d}}}),Ss=new b({props:{title:"μ„Ήμ…˜ ν€΄μ¦ˆ",local:"section-quiz",headingTag:"h2"}}),Ys=new b({props:{title:"1. batched=True 와 ν•¨κ»˜ Dataset.map() 을 μ‚¬μš©ν•˜λŠ” μ£Όμš” μž₯점은 λ¬΄μ—‡μΈκ°€μš”?",local:"1-batchedtrue-와-ν•¨κ»˜-datasetmap-을-μ‚¬μš©ν•˜λŠ”-μ£Όμš”-μž₯점은-λ¬΄μ—‡μΈκ°€μš”",headingTag:"h3"}}),Fs=new pe({props:{choices:[{text:"λ©”λͺ¨λ¦¬λ₯Ό 덜 μ‚¬μš©ν•©λ‹ˆλ‹€.",explain:"더 λ©”λͺ¨λ¦¬ 효율적일 수 μžˆμ§€λ§Œ, 이것이 μ£Όμš” μž₯점은 μ•„λ‹™λ‹ˆλ‹€."},{text:"μ—¬λŸ¬ 예제λ₯Ό ν•œ λ²ˆμ— μ²˜λ¦¬ν•˜μ—¬ 토큰화λ₯Ό 훨씬 λΉ λ₯΄κ²Œ λ§Œλ“­λ‹ˆλ‹€.",explain:"μ •λ‹΅μž…λ‹ˆλ‹€! 배치둜 μ²˜λ¦¬ν•˜λ©΄ λΉ λ₯Έ ν† ν¬λ‚˜μ΄μ €κ°€ μ—¬λŸ¬ 예제λ₯Ό λ™μ‹œμ— μž‘μ—…ν•  수 μžˆμ–΄ 속도가 크게 ν–₯μƒλ©λ‹ˆλ‹€.",correct:!0},{text:"μžλ™μœΌλ‘œ νŒ¨λ”©μ„ μ²˜λ¦¬ν•΄μ€λ‹ˆλ‹€.",explain:"배치 μ²˜λ¦¬κ°€ μžλ™μœΌλ‘œ νŒ¨λ”©μ„ μ²˜λ¦¬ν•˜μ§€λŠ” μ•ŠμŠ΅λ‹ˆλ‹€ - 그것은 데이터 μ½œλ ˆμ΄ν„°κ°€ μˆ˜ν–‰ν•©λ‹ˆλ‹€."},{text:"데이터λ₯Ό PyTorch ν…μ„œλ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.",explain:"ν…μ„œ λ³€ν™˜μ€ ν˜•μ‹μ„ μ„€μ •ν•  λ•Œ λ°œμƒν•˜λ©°, 배치 λ§€ν•‘ μ€‘μ—λŠ” λ°œμƒν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€."}]}}),qs=new b({props:{title:"2. 데이터 μ„ΈνŠΈμ˜ μ΅œλŒ€ 길이둜 λͺ¨λ“  μ‹œν€€μŠ€λ₯Ό νŒ¨λ”©ν•˜λŠ” λŒ€μ‹  동적 νŒ¨λ”©μ„ μ‚¬μš©ν•˜λŠ” μ΄μœ λŠ” λ¬΄μ—‡μΈκ°€μš”?",local:"2-데이터-μ„ΈνŠΈμ˜-μ΅œλŒ€-길이둜-λͺ¨λ“ -μ‹œν€€μŠ€λ₯Ό-νŒ¨λ”©ν•˜λŠ”-λŒ€μ‹ -동적-νŒ¨λ”©μ„-μ‚¬μš©ν•˜λŠ”-μ΄μœ λŠ”-λ¬΄μ—‡μΈκ°€μš”",headingTag:"h3"}}),Ps=new pe({props:{choices:[{text:"동적 νŒ¨λ”©μ΄ λͺ¨λΈ μ•„ν‚€ν…μ²˜μ— μ˜ν•΄ μš”κ΅¬λ©λ‹ˆλ‹€.",explain:"μ•„λ‹ˆμš”, λͺ¨λΈμ€ κ³ μ • νŒ¨λ”©κ³Ό 동적 νŒ¨λ”©μ„ λͺ¨λ‘ μ²˜λ¦¬ν•  수 μžˆμŠ΅λ‹ˆλ‹€."},{text:"각 배치의 μ΅œλŒ€ κΈΈμ΄κΉŒμ§€λ§Œ νŒ¨λ”©ν•˜μ—¬ 계산 μ˜€λ²„ν—€λ“œλ₯Ό μ€„μž…λ‹ˆλ‹€.",explain:"μ •λ‹΅μž…λ‹ˆλ‹€! 동적 νŒ¨λ”©μ€ 데이터 μ„ΈνŠΈ μ΅œλŒ€κ°’μ΄ μ•„λ‹Œ 배치 μ΅œλŒ€κ°’κΉŒμ§€λ§Œ νŒ¨λ”©ν•˜μ—¬ νŒ¨λ”© 토큰에 λŒ€ν•œ λΆˆν•„μš”ν•œ 계산을 ν”Όν•©λ‹ˆλ‹€.",correct:!0},{text:"λͺ¨λΈ 정확도λ₯Ό ν–₯μƒμ‹œν‚΅λ‹ˆλ‹€.",explain:"νŒ¨λ”© μ „λž΅μ€ λͺ¨λΈ 정확도에 직접적인 영ν–₯을 μ£Όμ§€ μ•ŠμŠ΅λ‹ˆλ‹€."},{text:"DataCollatorWithPadding을 μ‚¬μš©ν•  λ•Œ ν•„μˆ˜μž…λ‹ˆλ‹€.",explain:"DataCollatorWithPadding이 동적 νŒ¨λ”©μ„ κ°€λŠ₯ν•˜κ²Œ ν•˜μ§€λ§Œ, μ›ν•œλ‹€λ©΄ μ—¬μ „νžˆ κ³ μ • νŒ¨λ”©μ„ μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€."}]}}),Ks=new b({props:{title:"3. BERT ν† ν°ν™”μ—μ„œ token_type_ids ν•„λ“œλŠ” 무엇을 λ‚˜νƒ€λ‚΄λ‚˜μš”?",local:"3-bert-ν† ν°ν™”μ—μ„œ-tokentypeids-ν•„λ“œλŠ”-무엇을-λ‚˜νƒ€λ‚΄λ‚˜μš”",headingTag:"h3"}}),Os=new pe({props:{choices:[{text:"μ‹œν€€μŠ€μ—μ„œ 각 ν† ν°μ˜ μœ„μΉ˜μž…λ‹ˆλ‹€.",explain:"그것은 μœ„μΉ˜ μž„λ² λ”©μ΄μ§€, token_type_idsκ°€ μ•„λ‹™λ‹ˆλ‹€."},{text:"λ¬Έμž₯ μŒμ„ μ²˜λ¦¬ν•  λ•Œ 각 토큰이 μ–΄λŠ λ¬Έμž₯에 μ†ν•˜λŠ”μ§€λ₯Ό λ‚˜νƒ€λƒ…λ‹ˆλ‹€.",explain:"μ •λ‹΅μž…λ‹ˆλ‹€! token_type_idsλŠ” λ¬Έμž₯ 쌍 μž‘μ—…μ—μ„œ 첫 번째 λ¬Έμž₯(0)κ³Ό 두 번째 λ¬Έμž₯(1)을 κ΅¬λΆ„ν•©λ‹ˆλ‹€.",correct:!0},{text:"각 ν† ν°μ˜ μ–΄ν…μ…˜ λ§ˆμŠ€ν¬μž…λ‹ˆλ‹€.",explain:"μ–΄ν…μ…˜ λ§ˆμŠ€ν¬λŠ” μ–΄λ–€ 토큰에 주의λ₯Ό κΈ°μšΈμΌμ§€ λ‚˜νƒ€λ‚΄λŠ” λ³„λ„μ˜ ν•„λ“œμž…λ‹ˆλ‹€."},{text:"각 ν† ν°μ˜ μ–΄νœ˜ IDμž…λ‹ˆλ‹€.",explain:"그것은 token_type_idsκ°€ μ•„λ‹Œ input_ids ν•„λ“œμž…λ‹ˆλ‹€."}]}}),se=new b({props:{title:"4. load_dataset('glue', 'mrpc') 둜 데이터 μ„ΈνŠΈλ₯Ό λ‘œλ”©ν•  λ•Œ 두 번째 μΈμˆ˜λŠ” 무엇을 μ§€μ •ν•˜λ‚˜μš”?",local:"4-loaddatasetglue-mrpc-둜-데이터-μ„ΈνŠΈλ₯Ό-λ‘œλ”©ν• -λ•Œ-두-번째-μΈμˆ˜λŠ”-무엇을-μ§€μ •ν•˜λ‚˜μš”",headingTag:"h3"}}),ee=new pe({props:{choices:[{text:"λ‘œλ”©ν•  데이터 μ„ΈνŠΈμ˜ λ²„μ „μž…λ‹ˆλ‹€.",explain:"버전 지정은 λ‹€λ₯Έ λ§€κ°œλ³€μˆ˜λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€."},{text:"GLUE 벀치마크 λ‚΄μ˜ νŠΉμ • μž‘μ—… λ˜λŠ” ν•˜μœ„ μ§‘ν•©μž…λ‹ˆλ‹€.",explain:"μ •λ‹΅μž…λ‹ˆλ‹€! MRPCλŠ” 더 큰 GLUE 벀치마크 μ»¬λ ‰μ…˜ λ‚΄μ˜ νŠΉμ • μž‘μ—… 쀑 ν•˜λ‚˜μž…λ‹ˆλ‹€.",correct:!0},{text:"데이터 μ„ΈνŠΈμ˜ λΆ„ν• (train/validation/test)μž…λ‹ˆλ‹€.",explain:"뢄할은 λ‘œλ”© 후에 μ ‘κ·Όν•˜λ©°, load_dataset ν˜ΈμΆœμ—μ„œ μ§€μ •ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€."},{text:"데이터λ₯Ό λ°˜ν™˜ν•  ν˜•μ‹μž…λ‹ˆλ‹€.",explain:"ν˜•μ‹μ€ λ‘œλ”© ν›„ set_format() λ©”μ†Œλ“œλ₯Ό μ‚¬μš©ν•˜μ—¬ μ„€μ •ν•©λ‹ˆλ‹€."}]}}),te=new b({props:{title:"5. ν›ˆλ ¨ 전에 β€˜sentence1’과 β€˜sentence2’ 같은 열을 μ œκ±°ν•˜λŠ” λͺ©μ μ€ λ¬΄μ—‡μΈκ°€μš”?",local:"5-ν›ˆλ ¨-전에-sentence1κ³Ό-sentence2-같은-열을-μ œκ±°ν•˜λŠ”-λͺ©μ μ€-λ¬΄μ—‡μΈκ°€μš”",headingTag:"h3"}}),le=new pe({props:{choices:[{text:"ν›ˆλ ¨ 쀑 λ©”λͺ¨λ¦¬λ₯Ό μ ˆμ•½ν•˜κΈ° μœ„ν•΄μ„œμž…λ‹ˆλ‹€.",explain:"μ•½κ°„μ˜ λ©”λͺ¨λ¦¬λŠ” μ ˆμ•½λ˜μ§€λ§Œ, 이것이 주된 μ΄μœ λŠ” μ•„λ‹™λ‹ˆλ‹€."},{text:"λͺ¨λΈμ΄ μ΄λŸ¬ν•œ μ›μ‹œ ν…μŠ€νŠΈ 열을 μ˜ˆμƒν•˜μ§€ μ•Šκ³  였λ₯˜κ°€ λ°œμƒν•  수 있기 λ•Œλ¬Έμž…λ‹ˆλ‹€.",explain:"μ •λ‹΅μž…λ‹ˆλ‹€! λͺ¨λΈμ€ μ›μ‹œ ν…μŠ€νŠΈ λ¬Έμžμ—΄μ΄ μ•„λ‹Œ 수치적 ν…μ„œλ₯Ό μ˜ˆμƒν•©λ‹ˆλ‹€. ν…μŠ€νŠΈ 열을 μœ μ§€ν•˜λ©΄ 였λ₯˜κ°€ λ°œμƒν•  κ²ƒμž…λ‹ˆλ‹€.",correct:!0},{text:"μ΄λŸ¬ν•œ 열듀이 평가에 ν•„μš”ν•˜μ§€ μ•ŠκΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€.",explain:"사싀이긴 ν•˜μ§€λ§Œ, 주된 μ΄μœ λŠ” λͺ¨λΈμ΄ μ›μ‹œ ν…μŠ€νŠΈλ₯Ό μ²˜λ¦¬ν•  수 μ—†λ‹€λŠ” κ²ƒμž…λ‹ˆλ‹€."},{text:"ν›ˆλ ¨ 속도λ₯Ό 크게 ν–₯μƒμ‹œν‚€κΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€.",explain:"ν˜Έν™˜λ˜μ§€ μ•ŠλŠ” 데이터 νƒ€μž…μœΌλ‘œ μΈν•œ 였λ₯˜ 방지에 λΉ„ν•΄ 속도 ν–₯상은 λ―Έλ―Έν•©λ‹ˆλ‹€."}]}}),v=new x({props:{$$slots:{default:[Kl]},$$scope:{ctx:d}}}),ne=new zl({props:{source:"https://github.com/huggingface/course/blob/main/chapters/ko/chapter3/2.mdx"}}),{c(){p=u("meta"),$=n(),M=u("p"),U=n(),i(w.$$.fragment),f=n(),i(h.$$.fragment),Me=n(),B=u("p"),B.innerHTML=Wt,ie=n(),i(N.$$.fragment),re=n(),V=u("p"),V.textContent=zt,ce=n(),G=u("p"),G.innerHTML=Et,me=n(),i(H.$$.fragment),ye=n(),i(X.$$.fragment),je=n(),W=u("p"),W.innerHTML=Dt,ue=n(),z=u("p"),z.textContent=St,Je=n(),i(C.$$.fragment),oe=n(),i(E.$$.fragment),Ue=n(),i(D.$$.fragment),$e=n(),S=u("p"),S.innerHTML=Lt,fe=n(),i(k.$$.fragment),Te=n(),L=u("p"),L.innerHTML=Yt,de=n(),i(Y.$$.fragment),he=n(),i(F.$$.fragment),we=n(),q=u("p"),q.innerHTML=Ft,be=n(),i(P.$$.fragment),xe=n(),i(K.$$.fragment),ge=n(),O=u("p"),O.innerHTML=qt,Ce=n(),i(I.$$.fragment),ke=n(),i(ss.$$.fragment),Ie=n(),i(es.$$.fragment),_e=n(),ts=u("p"),ts.innerHTML=Pt,Ae=n(),i(ls.$$.fragment),Qe=n(),i(_.$$.fragment),Re=n(),ns=u("p"),ns.textContent=Kt,Ze=n(),i(as.$$.fragment),ve=n(),i(ps.$$.fragment),Be=n(),Ms=u("p"),Ms.innerHTML=Ot,Ne=n(),i(A.$$.fragment),Ve=n(),is=u("p"),is.innerHTML=sl,Ge=n(),i(rs.$$.fragment),He=n(),cs=u("p"),cs.textContent=el,Xe=n(),i(ms.$$.fragment),We=n(),ys=u("p"),ys.innerHTML=tl,ze=n(),i(js.$$.fragment),Ee=n(),us=u("p"),us.innerHTML=ll,De=n(),Js=u("p"),Js.innerHTML=nl,Se=n(),os=u("p"),os.innerHTML=al,Le=n(),Us=u("p"),Us.textContent=pl,Ye=n(),$s=u("p"),$s.innerHTML=Ml,Fe=n(),fs=u("p"),fs.innerHTML=il,qe=n(),i(Ts.$$.fragment),Pe=n(),ds=u("p"),ds.innerHTML=rl,Ke=n(),hs=u("p"),hs.innerHTML=cl,Oe=n(),i(ws.$$.fragment),st=n(),bs=u("p"),bs.innerHTML=ml,et=n(),xs=u("p"),xs.innerHTML=yl,tt=n(),i(Q.$$.fragment),lt=n(),gs=u("p"),gs.innerHTML=jl,nt=n(),i(Cs.$$.fragment),at=n(),ks=u("p"),ks.textContent=ul,pt=n(),i(Is.$$.fragment),Mt=n(),_s=u("p"),_s.innerHTML=Jl,it=n(),As=u("p"),As.innerHTML=ol,rt=n(),Qs=u("p"),Qs.innerHTML=Ul,ct=n(),i(Rs.$$.fragment),mt=n(),i(Zs.$$.fragment),yt=n(),vs=u("p"),vs.innerHTML=$l,jt=n(),i(R.$$.fragment),ut=n(),Bs=u("p"),Bs.innerHTML=fl,Jt=n(),i(Ns.$$.fragment),ot=n(),Vs=u("p"),Vs.innerHTML=Tl,Ut=n(),i(Gs.$$.fragment),$t=n(),i(Hs.$$.fragment),ft=n(),Xs=u("p"),Xs.innerHTML=dl,Tt=n(),i(Ws.$$.fragment),dt=n(),i(zs.$$.fragment),ht=n(),Es=u("p"),Es.textContent=hl,wt=n(),i(Z.$$.fragment),bt=n(),Ds=u("p"),Ds.textContent=wl,xt=n(),i(Ss.$$.fragment),gt=n(),Ls=u("p"),Ls.textContent=bl,Ct=n(),i(Ys.$$.fragment),kt=n(),i(Fs.$$.fragment),It=n(),i(qs.$$.fragment),_t=n(),i(Ps.$$.fragment),At=n(),i(Ks.$$.fragment),Qt=n(),i(Os.$$.fragment),Rt=n(),i(se.$$.fragment),Zt=n(),i(ee.$$.fragment),vt=n(),i(te.$$.fragment),Bt=n(),i(le.$$.fragment),Nt=n(),i(v.$$.fragment),Vt=n(),i(ne.$$.fragment),Gt=n(),ae=u("p"),this.h()},l(s){const e=Hl("svelte-u9bgzb",document.head);p=J(e,"META",{name:!0,content:!0}),e.forEach(t),$=a(s),M=J(s,"P",{}),Zl(M).forEach(t),U=a(s),r(w.$$.fragment,s),f=a(s),r(h.$$.fragment,s),Me=a(s),B=J(s,"P",{"data-svelte-h":!0}),o(B)!=="svelte-18n4e1w"&&(B.innerHTML=Wt),ie=a(s),r(N.$$.fragment,s),re=a(s),V=J(s,"P",{"data-svelte-h":!0}),o(V)!=="svelte-1yvacnx"&&(V.textContent=zt),ce=a(s),G=J(s,"P",{"data-svelte-h":!0}),o(G)!=="svelte-6tmng3"&&(G.innerHTML=Et),me=a(s),r(H.$$.fragment,s),ye=a(s),r(X.$$.fragment,s),je=a(s),W=J(s,"P",{"data-svelte-h":!0}),o(W)!=="svelte-1quhi9e"&&(W.innerHTML=Dt),ue=a(s),z=J(s,"P",{"data-svelte-h":!0}),o(z)!=="svelte-1b27c1z"&&(z.textContent=St),Je=a(s),r(C.$$.fragment,s),oe=a(s),r(E.$$.fragment,s),Ue=a(s),r(D.$$.fragment,s),$e=a(s),S=J(s,"P",{"data-svelte-h":!0}),o(S)!=="svelte-1f0x3aj"&&(S.innerHTML=Lt),fe=a(s),r(k.$$.fragment,s),Te=a(s),L=J(s,"P",{"data-svelte-h":!0}),o(L)!=="svelte-nbm9uo"&&(L.innerHTML=Yt),de=a(s),r(Y.$$.fragment,s),he=a(s),r(F.$$.fragment,s),we=a(s),q=J(s,"P",{"data-svelte-h":!0}),o(q)!=="svelte-11hwp3u"&&(q.innerHTML=Ft),be=a(s),r(P.$$.fragment,s),xe=a(s),r(K.$$.fragment,s),ge=a(s),O=J(s,"P",{"data-svelte-h":!0}),o(O)!=="svelte-1i632h4"&&(O.innerHTML=qt),Ce=a(s),r(I.$$.fragment,s),ke=a(s),r(ss.$$.fragment,s),Ie=a(s),r(es.$$.fragment,s),_e=a(s),ts=J(s,"P",{"data-svelte-h":!0}),o(ts)!=="svelte-72w04v"&&(ts.innerHTML=Pt),Ae=a(s),r(ls.$$.fragment,s),Qe=a(s),r(_.$$.fragment,s),Re=a(s),ns=J(s,"P",{"data-svelte-h":!0}),o(ns)!=="svelte-1gliuzv"&&(ns.textContent=Kt),Ze=a(s),r(as.$$.fragment,s),ve=a(s),r(ps.$$.fragment,s),Be=a(s),Ms=J(s,"P",{"data-svelte-h":!0}),o(Ms)!=="svelte-1dayjrj"&&(Ms.innerHTML=Ot),Ne=a(s),r(A.$$.fragment,s),Ve=a(s),is=J(s,"P",{"data-svelte-h":!0}),o(is)!=="svelte-fp8ycy"&&(is.innerHTML=sl),Ge=a(s),r(rs.$$.fragment,s),He=a(s),cs=J(s,"P",{"data-svelte-h":!0}),o(cs)!=="svelte-1a5aa9u"&&(cs.textContent=el),Xe=a(s),r(ms.$$.fragment,s),We=a(s),ys=J(s,"P",{"data-svelte-h":!0}),o(ys)!=="svelte-tqglnc"&&(ys.innerHTML=tl),ze=a(s),r(js.$$.fragment,s),Ee=a(s),us=J(s,"P",{"data-svelte-h":!0}),o(us)!=="svelte-13qt3k3"&&(us.innerHTML=ll),De=a(s),Js=J(s,"P",{"data-svelte-h":!0}),o(Js)!=="svelte-1x3j9xz"&&(Js.innerHTML=nl),Se=a(s),os=J(s,"P",{"data-svelte-h":!0}),o(os)!=="svelte-16t7d1w"&&(os.innerHTML=al),Le=a(s),Us=J(s,"P",{"data-svelte-h":!0}),o(Us)!=="svelte-mmugyu"&&(Us.textContent=pl),Ye=a(s),$s=J(s,"P",{"data-svelte-h":!0}),o($s)!=="svelte-9ytk6a"&&($s.innerHTML=Ml),Fe=a(s),fs=J(s,"P",{"data-svelte-h":!0}),o(fs)!=="svelte-1lt9uol"&&(fs.innerHTML=il),qe=a(s),r(Ts.$$.fragment,s),Pe=a(s),ds=J(s,"P",{"data-svelte-h":!0}),o(ds)!=="svelte-1qzyd20"&&(ds.innerHTML=rl),Ke=a(s),hs=J(s,"P",{"data-svelte-h":!0}),o(hs)!=="svelte-rdvnbx"&&(hs.innerHTML=cl),Oe=a(s),r(ws.$$.fragment,s),st=a(s),bs=J(s,"P",{"data-svelte-h":!0}),o(bs)!=="svelte-f19j5a"&&(bs.innerHTML=ml),et=a(s),xs=J(s,"P",{"data-svelte-h":!0}),o(xs)!=="svelte-1if9us4"&&(xs.innerHTML=yl),tt=a(s),r(Q.$$.fragment,s),lt=a(s),gs=J(s,"P",{"data-svelte-h":!0}),o(gs)!=="svelte-1qq8yfu"&&(gs.innerHTML=jl),nt=a(s),r(Cs.$$.fragment,s),at=a(s),ks=J(s,"P",{"data-svelte-h":!0}),o(ks)!=="svelte-1uiluni"&&(ks.textContent=ul),pt=a(s),r(Is.$$.fragment,s),Mt=a(s),_s=J(s,"P",{"data-svelte-h":!0}),o(_s)!=="svelte-1irxq8x"&&(_s.innerHTML=Jl),it=a(s),As=J(s,"P",{"data-svelte-h":!0}),o(As)!=="svelte-15wpq8c"&&(As.innerHTML=ol),rt=a(s),Qs=J(s,"P",{"data-svelte-h":!0}),o(Qs)!=="svelte-1qi6gn3"&&(Qs.innerHTML=Ul),ct=a(s),r(Rs.$$.fragment,s),mt=a(s),r(Zs.$$.fragment,s),yt=a(s),vs=J(s,"P",{"data-svelte-h":!0}),o(vs)!=="svelte-1yf9txj"&&(vs.innerHTML=$l),jt=a(s),r(R.$$.fragment,s),ut=a(s),Bs=J(s,"P",{"data-svelte-h":!0}),o(Bs)!=="svelte-ae4z87"&&(Bs.innerHTML=fl),Jt=a(s),r(Ns.$$.fragment,s),ot=a(s),Vs=J(s,"P",{"data-svelte-h":!0}),o(Vs)!=="svelte-1r7taxf"&&(Vs.innerHTML=Tl),Ut=a(s),r(Gs.$$.fragment,s),$t=a(s),r(Hs.$$.fragment,s),ft=a(s),Xs=J(s,"P",{"data-svelte-h":!0}),o(Xs)!=="svelte-rpp9y6"&&(Xs.innerHTML=dl),Tt=a(s),r(Ws.$$.fragment,s),dt=a(s),r(zs.$$.fragment,s),ht=a(s),Es=J(s,"P",{"data-svelte-h":!0}),o(Es)!=="svelte-1ccey19"&&(Es.textContent=hl),wt=a(s),r(Z.$$.fragment,s),bt=a(s),Ds=J(s,"P",{"data-svelte-h":!0}),o(Ds)!=="svelte-furr9b"&&(Ds.textContent=wl),xt=a(s),r(Ss.$$.fragment,s),gt=a(s),Ls=J(s,"P",{"data-svelte-h":!0}),o(Ls)!=="svelte-1jo93me"&&(Ls.textContent=bl),Ct=a(s),r(Ys.$$.fragment,s),kt=a(s),r(Fs.$$.fragment,s),It=a(s),r(qs.$$.fragment,s),_t=a(s),r(Ps.$$.fragment,s),At=a(s),r(Ks.$$.fragment,s),Qt=a(s),r(Os.$$.fragment,s),Rt=a(s),r(se.$$.fragment,s),Zt=a(s),r(ee.$$.fragment,s),vt=a(s),r(te.$$.fragment,s),Bt=a(s),r(le.$$.fragment,s),Nt=a(s),r(v.$$.fragment,s),Vt=a(s),r(ne.$$.fragment,s),Gt=a(s),ae=J(s,"P",{}),Zl(ae).forEach(t),this.h()},h(){vl(p,"name","hf:doc:metadata"),vl(p,"content",sn)},m(s,e){Xl(document.head,p),l(s,$,e),l(s,M,e),l(s,U,e),c(w,s,e),l(s,f,e),c(h,s,e),l(s,Me,e),l(s,B,e),l(s,ie,e),c(N,s,e),l(s,re,e),l(s,V,e),l(s,ce,e),l(s,G,e),l(s,me,e),c(H,s,e),l(s,ye,e),c(X,s,e),l(s,je,e),l(s,W,e),l(s,ue,e),l(s,z,e),l(s,Je,e),c(C,s,e),l(s,oe,e),c(E,s,e),l(s,Ue,e),c(D,s,e),l(s,$e,e),l(s,S,e),l(s,fe,e),c(k,s,e),l(s,Te,e),l(s,L,e),l(s,de,e),c(Y,s,e),l(s,he,e),c(F,s,e),l(s,we,e),l(s,q,e),l(s,be,e),c(P,s,e),l(s,xe,e),c(K,s,e),l(s,ge,e),l(s,O,e),l(s,Ce,e),c(I,s,e),l(s,ke,e),c(ss,s,e),l(s,Ie,e),c(es,s,e),l(s,_e,e),l(s,ts,e),l(s,Ae,e),c(ls,s,e),l(s,Qe,e),c(_,s,e),l(s,Re,e),l(s,ns,e),l(s,Ze,e),c(as,s,e),l(s,ve,e),c(ps,s,e),l(s,Be,e),l(s,Ms,e),l(s,Ne,e),c(A,s,e),l(s,Ve,e),l(s,is,e),l(s,Ge,e),c(rs,s,e),l(s,He,e),l(s,cs,e),l(s,Xe,e),c(ms,s,e),l(s,We,e),l(s,ys,e),l(s,ze,e),c(js,s,e),l(s,Ee,e),l(s,us,e),l(s,De,e),l(s,Js,e),l(s,Se,e),l(s,os,e),l(s,Le,e),l(s,Us,e),l(s,Ye,e),l(s,$s,e),l(s,Fe,e),l(s,fs,e),l(s,qe,e),c(Ts,s,e),l(s,Pe,e),l(s,ds,e),l(s,Ke,e),l(s,hs,e),l(s,Oe,e),c(ws,s,e),l(s,st,e),l(s,bs,e),l(s,et,e),l(s,xs,e),l(s,tt,e),c(Q,s,e),l(s,lt,e),l(s,gs,e),l(s,nt,e),c(Cs,s,e),l(s,at,e),l(s,ks,e),l(s,pt,e),c(Is,s,e),l(s,Mt,e),l(s,_s,e),l(s,it,e),l(s,As,e),l(s,rt,e),l(s,Qs,e),l(s,ct,e),c(Rs,s,e),l(s,mt,e),c(Zs,s,e),l(s,yt,e),l(s,vs,e),l(s,jt,e),c(R,s,e),l(s,ut,e),l(s,Bs,e),l(s,Jt,e),c(Ns,s,e),l(s,ot,e),l(s,Vs,e),l(s,Ut,e),c(Gs,s,e),l(s,$t,e),c(Hs,s,e),l(s,ft,e),l(s,Xs,e),l(s,Tt,e),c(Ws,s,e),l(s,dt,e),c(zs,s,e),l(s,ht,e),l(s,Es,e),l(s,wt,e),c(Z,s,e),l(s,bt,e),l(s,Ds,e),l(s,xt,e),c(Ss,s,e),l(s,gt,e),l(s,Ls,e),l(s,Ct,e),c(Ys,s,e),l(s,kt,e),c(Fs,s,e),l(s,It,e),c(qs,s,e),l(s,_t,e),c(Ps,s,e),l(s,At,e),c(Ks,s,e),l(s,Qt,e),c(Os,s,e),l(s,Rt,e),c(se,s,e),l(s,Zt,e),c(ee,s,e),l(s,vt,e),c(te,s,e),l(s,Bt,e),c(le,s,e),l(s,Nt,e),c(v,s,e),l(s,Vt,e),c(ne,s,e),l(s,Gt,e),l(s,ae,e),Ht=!0},p(s,[e]){const xl={};e&2&&(xl.$$scope={dirty:e,ctx:s}),C.$set(xl);const gl={};e&2&&(gl.$$scope={dirty:e,ctx:s}),k.$set(gl);const Cl={};e&2&&(Cl.$$scope={dirty:e,ctx:s}),I.$set(Cl);const kl={};e&2&&(kl.$$scope={dirty:e,ctx:s}),_.$set(kl);const Il={};e&2&&(Il.$$scope={dirty:e,ctx:s}),A.$set(Il);const _l={};e&2&&(_l.$$scope={dirty:e,ctx:s}),Q.$set(_l);const Al={};e&2&&(Al.$$scope={dirty:e,ctx:s}),R.$set(Al);const Ql={};e&2&&(Ql.$$scope={dirty:e,ctx:s}),Z.$set(Ql);const Rl={};e&2&&(Rl.$$scope={dirty:e,ctx:s}),v.$set(Rl)},i(s){Ht||(m(w.$$.fragment,s),m(h.$$.fragment,s),m(N.$$.fragment,s),m(H.$$.fragment,s),m(X.$$.fragment,s),m(C.$$.fragment,s),m(E.$$.fragment,s),m(D.$$.fragment,s),m(k.$$.fragment,s),m(Y.$$.fragment,s),m(F.$$.fragment,s),m(P.$$.fragment,s),m(K.$$.fragment,s),m(I.$$.fragment,s),m(ss.$$.fragment,s),m(es.$$.fragment,s),m(ls.$$.fragment,s),m(_.$$.fragment,s),m(as.$$.fragment,s),m(ps.$$.fragment,s),m(A.$$.fragment,s),m(rs.$$.fragment,s),m(ms.$$.fragment,s),m(js.$$.fragment,s),m(Ts.$$.fragment,s),m(ws.$$.fragment,s),m(Q.$$.fragment,s),m(Cs.$$.fragment,s),m(Is.$$.fragment,s),m(Rs.$$.fragment,s),m(Zs.$$.fragment,s),m(R.$$.fragment,s),m(Ns.$$.fragment,s),m(Gs.$$.fragment,s),m(Hs.$$.fragment,s),m(Ws.$$.fragment,s),m(zs.$$.fragment,s),m(Z.$$.fragment,s),m(Ss.$$.fragment,s),m(Ys.$$.fragment,s),m(Fs.$$.fragment,s),m(qs.$$.fragment,s),m(Ps.$$.fragment,s),m(Ks.$$.fragment,s),m(Os.$$.fragment,s),m(se.$$.fragment,s),m(ee.$$.fragment,s),m(te.$$.fragment,s),m(le.$$.fragment,s),m(v.$$.fragment,s),m(ne.$$.fragment,s),Ht=!0)},o(s){y(w.$$.fragment,s),y(h.$$.fragment,s),y(N.$$.fragment,s),y(H.$$.fragment,s),y(X.$$.fragment,s),y(C.$$.fragment,s),y(E.$$.fragment,s),y(D.$$.fragment,s),y(k.$$.fragment,s),y(Y.$$.fragment,s),y(F.$$.fragment,s),y(P.$$.fragment,s),y(K.$$.fragment,s),y(I.$$.fragment,s),y(ss.$$.fragment,s),y(es.$$.fragment,s),y(ls.$$.fragment,s),y(_.$$.fragment,s),y(as.$$.fragment,s),y(ps.$$.fragment,s),y(A.$$.fragment,s),y(rs.$$.fragment,s),y(ms.$$.fragment,s),y(js.$$.fragment,s),y(Ts.$$.fragment,s),y(ws.$$.fragment,s),y(Q.$$.fragment,s),y(Cs.$$.fragment,s),y(Is.$$.fragment,s),y(Rs.$$.fragment,s),y(Zs.$$.fragment,s),y(R.$$.fragment,s),y(Ns.$$.fragment,s),y(Gs.$$.fragment,s),y(Hs.$$.fragment,s),y(Ws.$$.fragment,s),y(zs.$$.fragment,s),y(Z.$$.fragment,s),y(Ss.$$.fragment,s),y(Ys.$$.fragment,s),y(Fs.$$.fragment,s),y(qs.$$.fragment,s),y(Ps.$$.fragment,s),y(Ks.$$.fragment,s),y(Os.$$.fragment,s),y(se.$$.fragment,s),y(ee.$$.fragment,s),y(te.$$.fragment,s),y(le.$$.fragment,s),y(v.$$.fragment,s),y(ne.$$.fragment,s),Ht=!1},d(s){s&&(t($),t(M),t(U),t(f),t(Me),t(B),t(ie),t(re),t(V),t(ce),t(G),t(me),t(ye),t(je),t(W),t(ue),t(z),t(Je),t(oe),t(Ue),t($e),t(S),t(fe),t(Te),t(L),t(de),t(he),t(we),t(q),t(be),t(xe),t(ge),t(O),t(Ce),t(ke),t(Ie),t(_e),t(ts),t(Ae),t(Qe),t(Re),t(ns),t(Ze),t(ve),t(Be),t(Ms),t(Ne),t(Ve),t(is),t(Ge),t(He),t(cs),t(Xe),t(We),t(ys),t(ze),t(Ee),t(us),t(De),t(Js),t(Se),t(os),t(Le),t(Us),t(Ye),t($s),t(Fe),t(fs),t(qe),t(Pe),t(ds),t(Ke),t(hs),t(Oe),t(st),t(bs),t(et),t(xs),t(tt),t(lt),t(gs),t(nt),t(at),t(ks),t(pt),t(Mt),t(_s),t(it),t(As),t(rt),t(Qs),t(ct),t(mt),t(yt),t(vs),t(jt),t(ut),t(Bs),t(Jt),t(ot),t(Vs),t(Ut),t($t),t(ft),t(Xs),t(Tt),t(dt),t(ht),t(Es),t(wt),t(bt),t(Ds),t(xt),t(gt),t(Ls),t(Ct),t(kt),t(It),t(_t),t(At),t(Qt),t(Rt),t(Zt),t(vt),t(Bt),t(Nt),t(Vt),t(Gt),t(ae)),t(p),j(w,s),j(h,s),j(N,s),j(H,s),j(X,s),j(C,s),j(E,s),j(D,s),j(k,s),j(Y,s),j(F,s),j(P,s),j(K,s),j(I,s),j(ss,s),j(es,s),j(ls,s),j(_,s),j(as,s),j(ps,s),j(A,s),j(rs,s),j(ms,s),j(js,s),j(Ts,s),j(ws,s),j(Q,s),j(Cs,s),j(Is,s),j(Rs,s),j(Zs,s),j(R,s),j(Ns,s),j(Gs,s),j(Hs,s),j(Ws,s),j(zs,s),j(Z,s),j(Ss,s),j(Ys,s),j(Fs,s),j(qs,s),j(Ps,s),j(Ks,s),j(Os,s),j(se,s),j(ee,s),j(te,s),j(le,s),j(v,s),j(ne,s)}}}const sn=`{"title":"데이터 처리","local":"processing-the-data","sections":[{"title":"Hubμ—μ„œ 데이터 μ„ΈνŠΈ κ°€μ Έμ˜€κΈ°","local":"loading-a-dataset-from-the-hub","sections":[],"depth":3},{"title":"데이터 μ„ΈνŠΈ μ „μ²˜λ¦¬","local":"preprocessing-a-dataset","sections":[{"title":"동적 νŒ¨λ”©","local":"dynamic-padding","sections":[],"depth":5}],"depth":3},{"title":"μ„Ήμ…˜ ν€΄μ¦ˆ","local":"section-quiz","sections":[{"title":"1. batched=True 와 ν•¨κ»˜ Dataset.map() 을 μ‚¬μš©ν•˜λŠ” μ£Όμš” μž₯점은 λ¬΄μ—‡μΈκ°€μš”?","local":"1-batchedtrue-와-ν•¨κ»˜-datasetmap-을-μ‚¬μš©ν•˜λŠ”-μ£Όμš”-μž₯점은-λ¬΄μ—‡μΈκ°€μš”","sections":[],"depth":3},{"title":"2. 데이터 μ„ΈνŠΈμ˜ μ΅œλŒ€ 길이둜 λͺ¨λ“  μ‹œν€€μŠ€λ₯Ό νŒ¨λ”©ν•˜λŠ” λŒ€μ‹  동적 νŒ¨λ”©μ„ μ‚¬μš©ν•˜λŠ” μ΄μœ λŠ” λ¬΄μ—‡μΈκ°€μš”?","local":"2-데이터-μ„ΈνŠΈμ˜-μ΅œλŒ€-길이둜-λͺ¨λ“ -μ‹œν€€μŠ€λ₯Ό-νŒ¨λ”©ν•˜λŠ”-λŒ€μ‹ -동적-νŒ¨λ”©μ„-μ‚¬μš©ν•˜λŠ”-μ΄μœ λŠ”-λ¬΄μ—‡μΈκ°€μš”","sections":[],"depth":3},{"title":"3. BERT ν† ν°ν™”μ—μ„œ token_type_ids ν•„λ“œλŠ” 무엇을 λ‚˜νƒ€λ‚΄λ‚˜μš”?","local":"3-bert-ν† ν°ν™”μ—μ„œ-tokentypeids-ν•„λ“œλŠ”-무엇을-λ‚˜νƒ€λ‚΄λ‚˜μš”","sections":[],"depth":3},{"title":"4. load_dataset('glue', 'mrpc') 둜 데이터 μ„ΈνŠΈλ₯Ό λ‘œλ”©ν•  λ•Œ 두 번째 μΈμˆ˜λŠ” 무엇을 μ§€μ •ν•˜λ‚˜μš”?","local":"4-loaddatasetglue-mrpc-둜-데이터-μ„ΈνŠΈλ₯Ό-λ‘œλ”©ν• -λ•Œ-두-번째-μΈμˆ˜λŠ”-무엇을-μ§€μ •ν•˜λ‚˜μš”","sections":[],"depth":3},{"title":"5. ν›ˆλ ¨ 전에 β€˜sentence1’과 β€˜sentence2’ 같은 열을 μ œκ±°ν•˜λŠ” λͺ©μ μ€ λ¬΄μ—‡μΈκ°€μš”?","local":"5-ν›ˆλ ¨-전에-sentence1κ³Ό-sentence2-같은-열을-μ œκ±°ν•˜λŠ”-λͺ©μ μ€-λ¬΄μ—‡μΈκ°€μš”","sections":[],"depth":3}],"depth":2}],"depth":1}`;function en(d){return Nl(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class mn extends Vl{constructor(p){super(),Gl(this,p,en,Ol,Bl,{})}}export{mn as component};

Xet Storage Details

Size:
64.2 kB
Β·
Xet hash:
bda6e0d8406d4624b92b5bf170b4025522db15e924296b179fc9f1ad16d11de8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.