Buckets:
| import{s as ne,o as ae}from"../chunks/scheduler.ddeee2a5.js";import{S as pe,i as Je,e as c,s as p,c as U,h as Me,a as m,d as s,b as J,f as Kl,g as r,j as I,k as le,l as oe,m as n,n as u,o,q as ee,t as i,p as T,r as se}from"../chunks/index.5e54ab65.js";import{C as ie,H as Ol,E as Ue}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.c77c9083.js";import{C as b}from"../chunks/CodeBlock.bada9946.js";import{C as te}from"../chunks/CourseFloatingBanner.6f2d819e.js";import{F as re}from"../chunks/FrameworkSwitchCourse.9bce2294.js";function ue(y){let a,M;return a=new te({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"}]}}),{c(){U(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){u(a,t,j),M=!0},i(t){M||(i(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){T(a,t)}}}function Te(y){let a,M;return a=new te({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"}]}}),{c(){U(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){u(a,t,j),M=!0},i(t){M||(i(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){T(a,t)}}}function ce(y){let a,M;return a=new b({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){U(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){u(a,t,j),M=!0},i(t){M||(i(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){T(a,t)}}}function me(y){let a,M;return a=new b({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){U(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){u(a,t,j),M=!0},i(t){M||(i(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){T(a,t)}}}function je(y){let a,M,t,j,g,nl,Q,al,V,pl,w,f,el,E,Bl="지난 섹션에서는 대부분의 과정을 하나씩 수행해왔습니다. 토크나이저의 작동 방식을 살펴보고 토큰화, 입력 ID로의 변환, 패딩, 잘라내기 그리고 어텐션 마스크에 대해 알아봤습니다.",Jl,k,Al="하지만 2장에서 보았듯이 우리는 🤗 Transformers API의 고수준 함수로 이 모든 것을 처리할 수 있습니다. 문장을 이용해 <code>tokenizer</code>를 호출하면 모델로 넘겨줄 수 있는 입력을 얻게 됩니다.",Ml,R,ol,Z,Nl="이제 <code>model_inputs</code> 변수는 모델이 잘 동작하기 위해 필요한 모든 것을 가지고 있습니다. DistilBERT는 어텐션 마스크뿐만 아니라 입력 ID도 포함합니다. 추가적인 입력을 받는 다른 모델들도 <code>tokenizer</code> 객체에 의해 생기는 결과물을 가지고 있습니다.",il,$,_l="아래의 예시를 보면 tokenizer 메서드는 매우 강력합니다. 먼저, 이 메서드는 단일 시퀀스를 토큰화할 수 있습니다.",Ul,W,rl,C,zl="또한 API의 변경 없이 여러 개의 시퀀스를 한 번에 처리할 수 있습니다.",ul,O,Tl,B,Xl="원하는대로 패딩을 추가할 수 있습니다.",cl,A,ml,N,Dl="시퀀스 길이를 잘라낼 수도 있습니다.",jl,_,Il,z,Sl="<code>tokenizer</code> 객체를 이용해 결과를 특정 프레임워크의 텐서로 변환할 수 있으며, 이는 모델에 바로 보내질 수 있습니다. 예를 들어 아래 코드 예시에서 토크나이저가 프레임워크에 따라 다른 텐서를 반환하게 했습니다 - <code>"pt"</code>는 PyTorch 텐서를 반환하고 <code>"tf"</code>는 TensorFlow 텐서를 반환하며, <code>"np"</code>는 NumPy 배열을 반환합니다.",bl,X,yl,D,wl,S,xl="토크나이저가 반환한 입력 ID를 자세히 살펴보면 이전에 봤던 결과와 조금 다르다는 것을 알 수 있습니다.",fl,x,dl,v,hl,q,vl="시작과 끝에 추가된 토큰 ID가 있습니다. 두 시퀀스의 ID가 무엇을 의미하는지 확인하기 위해 디코딩해보겠습니다.",gl,G,Ql,Y,Vl,F,ql="토크나이저는 문장이 시작할 떄 <code>[CLS]</code>라는 특별한 토큰을 붙이고, 끝날 때는 <code>[SEP]</code> 토큰을 붙입니다. 이런 특별한 토큰을 사용하는 이유는 모델이 사전학습될 때 이 토큰들을 사용했기 때문에 추론 시 동일한 결과를 얻기 위함입니다. 참고로 몇몇 모델은 특수 토큰을 추가하지 않아도 되고, 어떤 모델은 다른 토큰을 추가하기도 합니다. 또한, 이러한 특수 토큰을 시작 부분이나 끝 부분에만 추가하는 모델도 있습니다. 어떤 경우든 토크나이저는 토크나이저로 어떤 내용이 들어올지 알고 있고 이 내용을 처리해줄 것입니다.",El,H,kl,L,Gl="지금까지 <code>tokenizer</code> 객체가 텍스트에 적용될 때 거치는 개별적인 단계를 모두 살펴보았습니다. 이제 마지막으로 이 객체가 패딩을 이용해 여러 시퀀스를 어떻게 처리하는지, 잘라내기를 통해 매우 긴 문장을 어떻게 처리하는지, 주요 API에 따라 다양한 텐서를 다루는 법을 알아봅시다.",Rl,d,h,sl,P,Zl,tl,$l;g=new re({props:{fw:y[0]}}),Q=new ie({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),V=new Ol({props:{title:"한 번에 실행하기",local:"putting-it-all-together",headingTag:"h1"}});const Yl=[Te,ue],K=[];function Fl(l,e){return l[0]==="pt"?0:1}w=Fl(y),f=K[w]=Yl[w](y),R=new b({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),W=new b({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),O=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| model_inputs = tokenizer(sequences)`,wrap:!1}}),A=new b({props:{code:"JTIzJTIwJUVBJUIwJTgwJUVDJTlFJUE1JTIwJUVBJUI4JUI0JTIwJUVDJThCJTlDJUVEJTgwJTgwJUVDJThBJUE0JUVDJTlEJTk4JTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVDJTk3JTkwJTIwJUVCJUE3JTlFJUVBJUIyJThDJTIwJUVEJThDJUE4JUVCJTk0JUE5JUVDJTlEJTg0JTIwJUVDJUI2JTk0JUVBJUIwJTgwJUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybG9uZ2VzdCUyMiklMEElMEElMjMlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlQjQlMjAlRUMlQTclODAlRUMlOUIlOTAlRUQlOTUlOTglRUIlOEElOTQlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUQlOEMlQTglRUIlOTQlQTklRUMlOUQlODQlMjAlRUMlQjYlOTQlRUElQjAlODAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTIwKEJFUlQlRUIlODIlOTglMjBEaXN0aWxCRVJUJUVDJTlEJTk4JTIwJUVDJUI1JTlDJUVCJThDJTgwJTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVCJThBJTk0JTIwNTEyKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiklMEElMEElMjMlMjAlRUMlQTclODAlRUMlQTAlOTUlRUQlOTUlOUMlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUQlOEMlQTglRUIlOTQlQTklRUMlOUQlODQlMjAlRUMlQjYlOTQlRUElQjAlODAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0QlMjJtYXhfbGVuZ3RoJTIyJTJDJTIwbWF4X2xlbmd0aCUzRDgp",highlighted:`<span class="hljs-comment"># 가장 긴 시퀀스의 길이에 맞게 패딩을 추가합니다.</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"longest"</span>) | |
| <span class="hljs-comment"># 모델이 지원하는 최대 시퀀스 길이에 맞게 패딩을 추가합니다.</span> | |
| <span class="hljs-comment"># (BERT나 DistilBERT의 최대 길이는 512)</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>) | |
| <span class="hljs-comment"># 지정한 길이에 맞게 패딩을 추가합니다.</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),_=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlQjQlMjAlRUMlQTclODAlRUMlOUIlOTAlRUQlOTUlOTglRUIlOEElOTQlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUIlQTUlQkMlMjAlRUMlOUUlOTglRUIlOUQlQkMlRUIlODMlODUlRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTIwKEJFUlQlRUIlODIlOTglMjBEaXN0aWxCRVJUJUVDJTlEJTk4JTIwJUVDJUI1JTlDJUVCJThDJTgwJTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVCJThBJTk0JTIwNTEyKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSklMEElMEElMjMlMjAlRUMlQTclODAlRUMlQTAlOTUlRUQlOTUlOUMlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUIlQTUlQkMlMjAlRUMlOUUlOTglRUIlOUQlQkMlRUIlODMlODUlRUIlOEIlODglRUIlOEIlQTQuJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMG1heF9sZW5ndGglM0Q4JTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUp",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># 모델이 지원하는 최대 시퀀스 길이에 맞게 시퀀스 길이를 잘라냅니다.</span> | |
| <span class="hljs-comment"># (BERT나 DistilBERT의 최대 길이는 512)</span> | |
| model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>) | |
| <span class="hljs-comment"># 지정한 최대 길이에 맞게 시퀀스 길이를 잘라냅니다.</span> | |
| model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),X=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBQeVRvcmNoJTIwJUVEJTg1JTkwJUVDJTg0JTlDJUVCJUE1JUJDJTIwJUVCJUIwJTk4JUVEJTk5JTk4JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwVGVuc29yRmxvdyUyMCVFRCU4NSU5MCVFQyU4NCU5QyVFQiVBNSVCQyUyMCVFQiVCMCU5OCVFRCU5OSU5OCVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNC4lMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQSUwQSUyMyUyME51bVB5JTIwJUVCJUIwJUIwJUVDJTk3JUI0JUVDJTlEJTg0JTIwJUVCJUIwJTk4JUVEJTk5JTk4JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIybnAlMjIp",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># PyTorch 텐서를 반환합니다.</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># TensorFlow 텐서를 반환합니다.</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-comment"># NumPy 배열을 반환합니다.</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"np"</span>)`,wrap:!1}}),D=new Ol({props:{title:"특수 토큰",local:"special-tokens",headingTag:"h2"}}),x=new b({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence) | |
| <span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">"input_ids"</span>]) | |
| tokens = tokenizer.tokenize(sequence) | |
| ids = tokenizer.convert_tokens_to_ids(tokens) | |
| <span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),v=new b({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>] | |
| [<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),G=new b({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">"input_ids"</span>])) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),Y=new b({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"</span> | |
| <span class="hljs-string">"i've been waiting for a huggingface course my whole life."</span>`,wrap:!1}}),H=new Ol({props:{title:"마무리: 토크나이저에서 모델까지",local:"wrapping-up-from-tokenizer-to-model",headingTag:"h2"}});const Hl=[me,ce],ll=[];function Ll(l,e){return l[0]==="pt"?0:1}return d=Ll(y),h=ll[d]=Hl[d](y),P=new Ue({props:{source:"https://github.com/huggingface/course/blob/main/chapters/ko/chapter2/6.mdx"}}),{c(){a=c("meta"),M=p(),t=c("p"),j=p(),U(g.$$.fragment),nl=p(),U(Q.$$.fragment),al=p(),U(V.$$.fragment),pl=p(),f.c(),el=p(),E=c("p"),E.textContent=Bl,Jl=p(),k=c("p"),k.innerHTML=Al,Ml=p(),U(R.$$.fragment),ol=p(),Z=c("p"),Z.innerHTML=Nl,il=p(),$=c("p"),$.textContent=_l,Ul=p(),U(W.$$.fragment),rl=p(),C=c("p"),C.textContent=zl,ul=p(),U(O.$$.fragment),Tl=p(),B=c("p"),B.textContent=Xl,cl=p(),U(A.$$.fragment),ml=p(),N=c("p"),N.textContent=Dl,jl=p(),U(_.$$.fragment),Il=p(),z=c("p"),z.innerHTML=Sl,bl=p(),U(X.$$.fragment),yl=p(),U(D.$$.fragment),wl=p(),S=c("p"),S.textContent=xl,fl=p(),U(x.$$.fragment),dl=p(),U(v.$$.fragment),hl=p(),q=c("p"),q.textContent=vl,gl=p(),U(G.$$.fragment),Ql=p(),U(Y.$$.fragment),Vl=p(),F=c("p"),F.innerHTML=ql,El=p(),U(H.$$.fragment),kl=p(),L=c("p"),L.innerHTML=Gl,Rl=p(),h.c(),sl=p(),U(P.$$.fragment),Zl=p(),tl=c("p"),this.h()},l(l){const e=Me("svelte-u9bgzb",document.head);a=m(e,"META",{name:!0,content:!0}),e.forEach(s),M=J(l),t=m(l,"P",{}),Kl(t).forEach(s),j=J(l),r(g.$$.fragment,l),nl=J(l),r(Q.$$.fragment,l),al=J(l),r(V.$$.fragment,l),pl=J(l),f.l(l),el=J(l),E=m(l,"P",{"data-svelte-h":!0}),I(E)!=="svelte-2hwo3u"&&(E.textContent=Bl),Jl=J(l),k=m(l,"P",{"data-svelte-h":!0}),I(k)!=="svelte-tb9qid"&&(k.innerHTML=Al),Ml=J(l),r(R.$$.fragment,l),ol=J(l),Z=m(l,"P",{"data-svelte-h":!0}),I(Z)!=="svelte-11so93b"&&(Z.innerHTML=Nl),il=J(l),$=m(l,"P",{"data-svelte-h":!0}),I($)!=="svelte-bvmnox"&&($.textContent=_l),Ul=J(l),r(W.$$.fragment,l),rl=J(l),C=m(l,"P",{"data-svelte-h":!0}),I(C)!=="svelte-1y7spzk"&&(C.textContent=zl),ul=J(l),r(O.$$.fragment,l),Tl=J(l),B=m(l,"P",{"data-svelte-h":!0}),I(B)!=="svelte-1j2pysy"&&(B.textContent=Xl),cl=J(l),r(A.$$.fragment,l),ml=J(l),N=m(l,"P",{"data-svelte-h":!0}),I(N)!=="svelte-168onl5"&&(N.textContent=Dl),jl=J(l),r(_.$$.fragment,l),Il=J(l),z=m(l,"P",{"data-svelte-h":!0}),I(z)!=="svelte-xpsxoa"&&(z.innerHTML=Sl),bl=J(l),r(X.$$.fragment,l),yl=J(l),r(D.$$.fragment,l),wl=J(l),S=m(l,"P",{"data-svelte-h":!0}),I(S)!=="svelte-5tlyj5"&&(S.textContent=xl),fl=J(l),r(x.$$.fragment,l),dl=J(l),r(v.$$.fragment,l),hl=J(l),q=m(l,"P",{"data-svelte-h":!0}),I(q)!=="svelte-pt8kc1"&&(q.textContent=vl),gl=J(l),r(G.$$.fragment,l),Ql=J(l),r(Y.$$.fragment,l),Vl=J(l),F=m(l,"P",{"data-svelte-h":!0}),I(F)!=="svelte-1798v56"&&(F.innerHTML=ql),El=J(l),r(H.$$.fragment,l),kl=J(l),L=m(l,"P",{"data-svelte-h":!0}),I(L)!=="svelte-1cd4adl"&&(L.innerHTML=Gl),Rl=J(l),h.l(l),sl=J(l),r(P.$$.fragment,l),Zl=J(l),tl=m(l,"P",{}),Kl(tl).forEach(s),this.h()},h(){le(a,"name","hf:doc:metadata"),le(a,"content",Ie)},m(l,e){oe(document.head,a),n(l,M,e),n(l,t,e),n(l,j,e),u(g,l,e),n(l,nl,e),u(Q,l,e),n(l,al,e),u(V,l,e),n(l,pl,e),K[w].m(l,e),n(l,el,e),n(l,E,e),n(l,Jl,e),n(l,k,e),n(l,Ml,e),u(R,l,e),n(l,ol,e),n(l,Z,e),n(l,il,e),n(l,$,e),n(l,Ul,e),u(W,l,e),n(l,rl,e),n(l,C,e),n(l,ul,e),u(O,l,e),n(l,Tl,e),n(l,B,e),n(l,cl,e),u(A,l,e),n(l,ml,e),n(l,N,e),n(l,jl,e),u(_,l,e),n(l,Il,e),n(l,z,e),n(l,bl,e),u(X,l,e),n(l,yl,e),u(D,l,e),n(l,wl,e),n(l,S,e),n(l,fl,e),u(x,l,e),n(l,dl,e),u(v,l,e),n(l,hl,e),n(l,q,e),n(l,gl,e),u(G,l,e),n(l,Ql,e),u(Y,l,e),n(l,Vl,e),n(l,F,e),n(l,El,e),u(H,l,e),n(l,kl,e),n(l,L,e),n(l,Rl,e),ll[d].m(l,e),n(l,sl,e),u(P,l,e),n(l,Zl,e),n(l,tl,e),$l=!0},p(l,[e]){const Pl={};e&1&&(Pl.fw=l[0]),g.$set(Pl);let Wl=w;w=Fl(l),w!==Wl&&(se(),o(K[Wl],1,1,()=>{K[Wl]=null}),ee(),f=K[w],f||(f=K[w]=Yl[w](l),f.c()),i(f,1),f.m(el.parentNode,el));let Cl=d;d=Ll(l),d!==Cl&&(se(),o(ll[Cl],1,1,()=>{ll[Cl]=null}),ee(),h=ll[d],h||(h=ll[d]=Hl[d](l),h.c()),i(h,1),h.m(sl.parentNode,sl))},i(l){$l||(i(g.$$.fragment,l),i(Q.$$.fragment,l),i(V.$$.fragment,l),i(f),i(R.$$.fragment,l),i(W.$$.fragment,l),i(O.$$.fragment,l),i(A.$$.fragment,l),i(_.$$.fragment,l),i(X.$$.fragment,l),i(D.$$.fragment,l),i(x.$$.fragment,l),i(v.$$.fragment,l),i(G.$$.fragment,l),i(Y.$$.fragment,l),i(H.$$.fragment,l),i(h),i(P.$$.fragment,l),$l=!0)},o(l){o(g.$$.fragment,l),o(Q.$$.fragment,l),o(V.$$.fragment,l),o(f),o(R.$$.fragment,l),o(W.$$.fragment,l),o(O.$$.fragment,l),o(A.$$.fragment,l),o(_.$$.fragment,l),o(X.$$.fragment,l),o(D.$$.fragment,l),o(x.$$.fragment,l),o(v.$$.fragment,l),o(G.$$.fragment,l),o(Y.$$.fragment,l),o(H.$$.fragment,l),o(h),o(P.$$.fragment,l),$l=!1},d(l){l&&(s(M),s(t),s(j),s(nl),s(al),s(pl),s(el),s(E),s(Jl),s(k),s(Ml),s(ol),s(Z),s(il),s($),s(Ul),s(rl),s(C),s(ul),s(Tl),s(B),s(cl),s(ml),s(N),s(jl),s(Il),s(z),s(bl),s(yl),s(wl),s(S),s(fl),s(dl),s(hl),s(q),s(gl),s(Ql),s(Vl),s(F),s(El),s(kl),s(L),s(Rl),s(sl),s(Zl),s(tl)),s(a),T(g,l),T(Q,l),T(V,l),K[w].d(l),T(R,l),T(W,l),T(O,l),T(A,l),T(_,l),T(X,l),T(D,l),T(x,l),T(v,l),T(G,l),T(Y,l),T(H,l),ll[d].d(l),T(P,l)}}}const Ie='{"title":"한 번에 실행하기","local":"putting-it-all-together","sections":[{"title":"특수 토큰","local":"special-tokens","sections":[],"depth":2},{"title":"마무리: 토크나이저에서 모델까지","local":"wrapping-up-from-tokenizer-to-model","sections":[],"depth":2}],"depth":1}';function be(y,a,M){let t="pt";return ae(()=>{const j=new URLSearchParams(window.location.search);M(0,t=j.get("fw")||"pt")}),[t]}class Qe extends pe{constructor(a){super(),Je(this,a,be,je,ne,{})}}export{Qe as component}; | |
Xet Storage Details
- Size:
- 25.7 kB
- Xet hash:
- 95d2dc2dcd2d267c829ddd0cb411a73c895ea6f43b9857a9133653387b3c0451
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.