Buckets:

rtrm's picture
download
raw
25.4 kB
import{s as se,o as te}from"../chunks/scheduler.37c15a92.js";import{S as ne,i as ae,g as T,s as p,r as i,A as pe,h as m,f as s,c as J,j as Ll,u as r,x as I,k as Pl,y as Je,a as n,v as c,t as o,b as Kl,d as U,w as u,p as le}from"../chunks/index.2bf4358c.js";import{C as b}from"../chunks/CodeBlock.4e987730.js";import{C as ee}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{F as Me}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Wl,E as oe}from"../chunks/getInferenceSnippets.1837c472.js";function Ue(w){let a,M;return a=new ee({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_tf.ipynb"}]}}),{c(){i(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){c(a,t,j),M=!0},i(t){M||(U(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){u(a,t)}}}function ie(w){let a,M;return a=new ee({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section6_pt.ipynb"}]}}),{c(){i(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){c(a,t,j),M=!0},i(t){M||(U(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){u(a,t)}}}function re(w){let a,M;return a=new b({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){i(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){c(a,t,j),M=!0},i(t){M||(U(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){u(a,t)}}}function ce(w){let a,M;return a=new b({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
output = model(**tokens)`,wrap:!1}}),{c(){i(a.$$.fragment)},l(t){r(a.$$.fragment,t)},m(t,j){c(a,t,j),M=!0},i(t){M||(U(a.$$.fragment,t),M=!0)},o(t){o(a.$$.fragment,t),M=!1},d(t){u(a,t)}}}function ue(w){let a,M,t,j,g,tl,Q,nl,y,d,ll,V,Ol="지난 섹션에서는 대부분의 과정을 하나씩 수행해왔습니다. 토크나이저의 작동 방식을 살펴보고 토큰화, 입력 ID로의 변환, 패딩, 잘라내기 그리고 어텐션 마스크에 대해 알아봤습니다.",al,E,Cl="하지만 2장에서 보았듯이 우리는 🤗 Transformers API의 고수준 함수로 이 모든 것을 처리할 수 있습니다. 문장을 이용해 <code>tokenizer</code>를 호출하면 모델로 넘겨줄 수 있는 입력을 얻게 됩니다.",pl,k,Jl,R,Bl="이제 <code>model_inputs</code> 변수는 모델이 잘 동작하기 위해 필요한 모든 것을 가지고 있습니다. DistilBERT는 어텐션 마스크뿐만 아니라 입력 ID도 포함합니다. 추가적인 입력을 받는 다른 모델들도 <code>tokenizer</code> 객체에 의해 생기는 결과물을 가지고 있습니다.",Ml,Z,Al="아래의 예시를 보면 tokenizer 메서드는 매우 강력합니다. 먼저, 이 메서드는 단일 시퀀스를 토큰화할 수 있습니다.",ol,$,Ul,W,Nl="또한 API의 변경 없이 여러 개의 시퀀스를 한 번에 처리할 수 있습니다.",il,O,rl,C,_l="원하는대로 패딩을 추가할 수 있습니다.",cl,B,ul,A,zl="시퀀스 길이를 잘라낼 수도 있습니다.",Tl,N,ml,_,Xl="<code>tokenizer</code> 객체를 이용해 결과를 특정 프레임워크의 텐서로 변환할 수 있으며, 이는 모델에 바로 보내질 수 있습니다. 예를 들어 아래 코드 예시에서 토크나이저가 프레임워크에 따라 다른 텐서를 반환하게 했습니다 - <code>&quot;pt&quot;</code>는 PyTorch 텐서를 반환하고 <code>&quot;tf&quot;</code>는 TensorFlow 텐서를 반환하며, <code>&quot;np&quot;</code>는 NumPy 배열을 반환합니다.",jl,z,Il,X,bl,D,Dl="토크나이저가 반환한 입력 ID를 자세히 살펴보면 이전에 봤던 결과와 조금 다르다는 것을 알 수 있습니다.",wl,S,yl,v,dl,G,Sl="시작과 끝에 추가된 토큰 ID가 있습니다. 두 시퀀스의 ID가 무엇을 의미하는지 확인하기 위해 디코딩해보겠습니다.",hl,q,fl,x,gl,Y,vl="토크나이저는 문장이 시작할 떄 <code>[CLS]</code>라는 특별한 토큰을 붙이고, 끝날 때는 <code>[SEP]</code> 토큰을 붙입니다. 이런 특별한 토큰을 사용하는 이유는 모델이 사전학습될 때 이 토큰들을 사용했기 때문에 추론 시 동일한 결과를 얻기 위함입니다. 참고로 몇몇 모델은 특수 토큰을 추가하지 않아도 되고, 어떤 모델은 다른 토큰을 추가하기도 합니다. 또한, 이러한 특수 토큰을 시작 부분이나 끝 부분에만 추가하는 모델도 있습니다. 어떤 경우든 토크나이저는 토크나이저로 어떤 내용이 들어올지 알고 있고 이 내용을 처리해줄 것입니다.",Ql,F,Vl,H,Gl="지금까지 <code>tokenizer</code> 객체가 텍스트에 적용될 때 거치는 개별적인 단계를 모두 살펴보았습니다. 이제 마지막으로 이 객체가 패딩을 이용해 여러 시퀀스를 어떻게 처리하는지, 잘라내기를 통해 매우 긴 문장을 어떻게 처리하는지, 주요 API에 따라 다양한 텐서를 다루는 법을 알아봅시다.",El,h,f,el,L,kl,sl,Rl;g=new Me({props:{fw:w[0]}}),Q=new Wl({props:{title:"한 번에 실행하기",local:"putting-it-all-together",headingTag:"h1"}});const ql=[ie,Ue],P=[];function xl(l,e){return l[0]==="pt"?0:1}y=xl(w),d=P[y]=ql[y](w),k=new b({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
checkpoint = <span class="hljs-string">&quot;distilbert-base-uncased-finetuned-sst-2-english&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),$=new b({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)`,wrap:!1}}),O=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
model_inputs = tokenizer(sequences)`,wrap:!1}}),B=new b({props:{code:"JTIzJTIwJUVBJUIwJTgwJUVDJTlFJUE1JTIwJUVBJUI4JUI0JTIwJUVDJThCJTlDJUVEJTgwJTgwJUVDJThBJUE0JUVDJTlEJTk4JTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVDJTk3JTkwJTIwJUVCJUE3JTlFJUVBJUIyJThDJTIwJUVEJThDJUE4JUVCJTk0JUE5JUVDJTlEJTg0JTIwJUVDJUI2JTk0JUVBJUIwJTgwJUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybG9uZ2VzdCUyMiklMEElMEElMjMlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlQjQlMjAlRUMlQTclODAlRUMlOUIlOTAlRUQlOTUlOTglRUIlOEElOTQlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUQlOEMlQTglRUIlOTQlQTklRUMlOUQlODQlMjAlRUMlQjYlOTQlRUElQjAlODAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTIwKEJFUlQlRUIlODIlOTglMjBEaXN0aWxCRVJUJUVDJTlEJTk4JTIwJUVDJUI1JTlDJUVCJThDJTgwJTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVCJThBJTk0JTIwNTEyKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiklMEElMEElMjMlMjAlRUMlQTclODAlRUMlQTAlOTUlRUQlOTUlOUMlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUQlOEMlQTglRUIlOTQlQTklRUMlOUQlODQlMjAlRUMlQjYlOTQlRUElQjAlODAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0QlMjJtYXhfbGVuZ3RoJTIyJTJDJTIwbWF4X2xlbmd0aCUzRDgp",highlighted:`<span class="hljs-comment"># 가장 긴 시퀀스의 길이에 맞게 패딩을 추가합니다.</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;longest&quot;</span>)
<span class="hljs-comment"># 모델이 지원하는 최대 시퀀스 길이에 맞게 패딩을 추가합니다.</span>
<span class="hljs-comment"># (BERT나 DistilBERT의 최대 길이는 512)</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>)
<span class="hljs-comment"># 지정한 길이에 맞게 패딩을 추가합니다.</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-string">&quot;max_length&quot;</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),N=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlQjQlMjAlRUMlQTclODAlRUMlOUIlOTAlRUQlOTUlOTglRUIlOEElOTQlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUIlQTUlQkMlMjAlRUMlOUUlOTglRUIlOUQlQkMlRUIlODMlODUlRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTIwKEJFUlQlRUIlODIlOTglMjBEaXN0aWxCRVJUJUVDJTlEJTk4JTIwJUVDJUI1JTlDJUVCJThDJTgwJTIwJUVBJUI4JUI4JUVDJTlEJUI0JUVCJThBJTk0JTIwNTEyKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSklMEElMEElMjMlMjAlRUMlQTclODAlRUMlQTAlOTUlRUQlOTUlOUMlMjAlRUMlQjUlOUMlRUIlOEMlODAlMjAlRUElQjglQjglRUMlOUQlQjQlRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUMlOEIlOUMlRUQlODAlODAlRUMlOEElQTQlMjAlRUElQjglQjglRUMlOUQlQjQlRUIlQTUlQkMlMjAlRUMlOUUlOTglRUIlOUQlQkMlRUIlODMlODUlRUIlOEIlODglRUIlOEIlQTQuJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMG1heF9sZW5ndGglM0Q4JTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUp",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># 모델이 지원하는 최대 시퀀스 길이에 맞게 시퀀스 길이를 잘라냅니다.</span>
<span class="hljs-comment"># (BERT나 DistilBERT의 최대 길이는 512)</span>
model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># 지정한 최대 길이에 맞게 시퀀스 길이를 잘라냅니다.</span>
model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),z=new b({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBQeVRvcmNoJTIwJUVEJTg1JTkwJUVDJTg0JTlDJUVCJUE1JUJDJTIwJUVCJUIwJTk4JUVEJTk5JTk4JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwVGVuc29yRmxvdyUyMCVFRCU4NSU5MCVFQyU4NCU5QyVFQiVBNSVCQyUyMCVFQiVCMCU5OCVFRCU5OSU5OCVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNC4lMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnRmJTIyKSUwQSUwQSUyMyUyME51bVB5JTIwJUVCJUIwJUIwJUVDJTk3JUI0JUVDJTlEJTg0JTIwJUVCJUIwJTk4JUVEJTk5JTk4JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIybnAlMjIp",highlighted:`sequences = [<span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>, <span class="hljs-string">&quot;So have I!&quot;</span>]
<span class="hljs-comment"># PyTorch 텐서를 반환합니다.</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-comment"># TensorFlow 텐서를 반환합니다.</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;tf&quot;</span>)
<span class="hljs-comment"># NumPy 배열을 반환합니다.</span>
model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;np&quot;</span>)`,wrap:!1}}),X=new Wl({props:{title:"특수 토큰",local:"special-tokens",headingTag:"h2"}}),S=new b({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">&quot;I&#x27;ve been waiting for a HuggingFace course my whole life.&quot;</span>
model_inputs = tokenizer(sequence)
<span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>])
tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
<span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),v=new b({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>]
[<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),q=new b({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">&quot;input_ids&quot;</span>]))
<span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),x=new b({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">&quot;[CLS] i&#x27;ve been waiting for a huggingface course my whole life. [SEP]&quot;</span>
<span class="hljs-string">&quot;i&#x27;ve been waiting for a huggingface course my whole life.&quot;</span>`,wrap:!1}}),F=new Wl({props:{title:"마무리: 토크나이저에서 모델까지",local:"wrapping-up-from-tokenizer-to-model",headingTag:"h2"}});const Yl=[ce,re],K=[];function Fl(l,e){return l[0]==="pt"?0:1}return h=Fl(w),f=K[h]=Yl[h](w),L=new oe({props:{source:"https://github.com/huggingface/course/blob/main/chapters/ko/chapter2/6.mdx"}}),{c(){a=T("meta"),M=p(),t=T("p"),j=p(),i(g.$$.fragment),tl=p(),i(Q.$$.fragment),nl=p(),d.c(),ll=p(),V=T("p"),V.textContent=Ol,al=p(),E=T("p"),E.innerHTML=Cl,pl=p(),i(k.$$.fragment),Jl=p(),R=T("p"),R.innerHTML=Bl,Ml=p(),Z=T("p"),Z.textContent=Al,ol=p(),i($.$$.fragment),Ul=p(),W=T("p"),W.textContent=Nl,il=p(),i(O.$$.fragment),rl=p(),C=T("p"),C.textContent=_l,cl=p(),i(B.$$.fragment),ul=p(),A=T("p"),A.textContent=zl,Tl=p(),i(N.$$.fragment),ml=p(),_=T("p"),_.innerHTML=Xl,jl=p(),i(z.$$.fragment),Il=p(),i(X.$$.fragment),bl=p(),D=T("p"),D.textContent=Dl,wl=p(),i(S.$$.fragment),yl=p(),i(v.$$.fragment),dl=p(),G=T("p"),G.textContent=Sl,hl=p(),i(q.$$.fragment),fl=p(),i(x.$$.fragment),gl=p(),Y=T("p"),Y.innerHTML=vl,Ql=p(),i(F.$$.fragment),Vl=p(),H=T("p"),H.innerHTML=Gl,El=p(),f.c(),el=p(),i(L.$$.fragment),kl=p(),sl=T("p"),this.h()},l(l){const e=pe("svelte-u9bgzb",document.head);a=m(e,"META",{name:!0,content:!0}),e.forEach(s),M=J(l),t=m(l,"P",{}),Ll(t).forEach(s),j=J(l),r(g.$$.fragment,l),tl=J(l),r(Q.$$.fragment,l),nl=J(l),d.l(l),ll=J(l),V=m(l,"P",{"data-svelte-h":!0}),I(V)!=="svelte-2hwo3u"&&(V.textContent=Ol),al=J(l),E=m(l,"P",{"data-svelte-h":!0}),I(E)!=="svelte-tb9qid"&&(E.innerHTML=Cl),pl=J(l),r(k.$$.fragment,l),Jl=J(l),R=m(l,"P",{"data-svelte-h":!0}),I(R)!=="svelte-11so93b"&&(R.innerHTML=Bl),Ml=J(l),Z=m(l,"P",{"data-svelte-h":!0}),I(Z)!=="svelte-bvmnox"&&(Z.textContent=Al),ol=J(l),r($.$$.fragment,l),Ul=J(l),W=m(l,"P",{"data-svelte-h":!0}),I(W)!=="svelte-1y7spzk"&&(W.textContent=Nl),il=J(l),r(O.$$.fragment,l),rl=J(l),C=m(l,"P",{"data-svelte-h":!0}),I(C)!=="svelte-1j2pysy"&&(C.textContent=_l),cl=J(l),r(B.$$.fragment,l),ul=J(l),A=m(l,"P",{"data-svelte-h":!0}),I(A)!=="svelte-168onl5"&&(A.textContent=zl),Tl=J(l),r(N.$$.fragment,l),ml=J(l),_=m(l,"P",{"data-svelte-h":!0}),I(_)!=="svelte-xpsxoa"&&(_.innerHTML=Xl),jl=J(l),r(z.$$.fragment,l),Il=J(l),r(X.$$.fragment,l),bl=J(l),D=m(l,"P",{"data-svelte-h":!0}),I(D)!=="svelte-5tlyj5"&&(D.textContent=Dl),wl=J(l),r(S.$$.fragment,l),yl=J(l),r(v.$$.fragment,l),dl=J(l),G=m(l,"P",{"data-svelte-h":!0}),I(G)!=="svelte-pt8kc1"&&(G.textContent=Sl),hl=J(l),r(q.$$.fragment,l),fl=J(l),r(x.$$.fragment,l),gl=J(l),Y=m(l,"P",{"data-svelte-h":!0}),I(Y)!=="svelte-1798v56"&&(Y.innerHTML=vl),Ql=J(l),r(F.$$.fragment,l),Vl=J(l),H=m(l,"P",{"data-svelte-h":!0}),I(H)!=="svelte-1cd4adl"&&(H.innerHTML=Gl),El=J(l),f.l(l),el=J(l),r(L.$$.fragment,l),kl=J(l),sl=m(l,"P",{}),Ll(sl).forEach(s),this.h()},h(){Pl(a,"name","hf:doc:metadata"),Pl(a,"content",Te)},m(l,e){Je(document.head,a),n(l,M,e),n(l,t,e),n(l,j,e),c(g,l,e),n(l,tl,e),c(Q,l,e),n(l,nl,e),P[y].m(l,e),n(l,ll,e),n(l,V,e),n(l,al,e),n(l,E,e),n(l,pl,e),c(k,l,e),n(l,Jl,e),n(l,R,e),n(l,Ml,e),n(l,Z,e),n(l,ol,e),c($,l,e),n(l,Ul,e),n(l,W,e),n(l,il,e),c(O,l,e),n(l,rl,e),n(l,C,e),n(l,cl,e),c(B,l,e),n(l,ul,e),n(l,A,e),n(l,Tl,e),c(N,l,e),n(l,ml,e),n(l,_,e),n(l,jl,e),c(z,l,e),n(l,Il,e),c(X,l,e),n(l,bl,e),n(l,D,e),n(l,wl,e),c(S,l,e),n(l,yl,e),c(v,l,e),n(l,dl,e),n(l,G,e),n(l,hl,e),c(q,l,e),n(l,fl,e),c(x,l,e),n(l,gl,e),n(l,Y,e),n(l,Ql,e),c(F,l,e),n(l,Vl,e),n(l,H,e),n(l,El,e),K[h].m(l,e),n(l,el,e),c(L,l,e),n(l,kl,e),n(l,sl,e),Rl=!0},p(l,[e]){const Hl={};e&1&&(Hl.fw=l[0]),g.$set(Hl);let Zl=y;y=xl(l),y!==Zl&&(le(),o(P[Zl],1,1,()=>{P[Zl]=null}),Kl(),d=P[y],d||(d=P[y]=ql[y](l),d.c()),U(d,1),d.m(ll.parentNode,ll));let $l=h;h=Fl(l),h!==$l&&(le(),o(K[$l],1,1,()=>{K[$l]=null}),Kl(),f=K[h],f||(f=K[h]=Yl[h](l),f.c()),U(f,1),f.m(el.parentNode,el))},i(l){Rl||(U(g.$$.fragment,l),U(Q.$$.fragment,l),U(d),U(k.$$.fragment,l),U($.$$.fragment,l),U(O.$$.fragment,l),U(B.$$.fragment,l),U(N.$$.fragment,l),U(z.$$.fragment,l),U(X.$$.fragment,l),U(S.$$.fragment,l),U(v.$$.fragment,l),U(q.$$.fragment,l),U(x.$$.fragment,l),U(F.$$.fragment,l),U(f),U(L.$$.fragment,l),Rl=!0)},o(l){o(g.$$.fragment,l),o(Q.$$.fragment,l),o(d),o(k.$$.fragment,l),o($.$$.fragment,l),o(O.$$.fragment,l),o(B.$$.fragment,l),o(N.$$.fragment,l),o(z.$$.fragment,l),o(X.$$.fragment,l),o(S.$$.fragment,l),o(v.$$.fragment,l),o(q.$$.fragment,l),o(x.$$.fragment,l),o(F.$$.fragment,l),o(f),o(L.$$.fragment,l),Rl=!1},d(l){l&&(s(M),s(t),s(j),s(tl),s(nl),s(ll),s(V),s(al),s(E),s(pl),s(Jl),s(R),s(Ml),s(Z),s(ol),s(Ul),s(W),s(il),s(rl),s(C),s(cl),s(ul),s(A),s(Tl),s(ml),s(_),s(jl),s(Il),s(bl),s(D),s(wl),s(yl),s(dl),s(G),s(hl),s(fl),s(gl),s(Y),s(Ql),s(Vl),s(H),s(El),s(el),s(kl),s(sl)),s(a),u(g,l),u(Q,l),P[y].d(l),u(k,l),u($,l),u(O,l),u(B,l),u(N,l),u(z,l),u(X,l),u(S,l),u(v,l),u(q,l),u(x,l),u(F,l),K[h].d(l),u(L,l)}}}const Te='{"title":"한 번에 실행하기","local":"putting-it-all-together","sections":[{"title":"특수 토큰","local":"special-tokens","sections":[],"depth":2},{"title":"마무리: 토크나이저에서 모델까지","local":"wrapping-up-from-tokenizer-to-model","sections":[],"depth":2}],"depth":1}';function me(w,a,M){let t="pt";return te(()=>{const j=new URLSearchParams(window.location.search);M(0,t=j.get("fw")||"pt")}),[t]}class he extends ne{constructor(a){super(),ae(this,a,me,ue,se,{})}}export{he as component};

Xet Storage Details

Size:
25.4 kB
·
Xet hash:
e3ced6b71f7a27903c5faa6c04ebd91563dd6469c30aa0c70260691c91e18521

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.