Buckets:
| import{s as sl,o as tl}from"../chunks/scheduler.37c15a92.js";import{S as nl,i as al,g as J,s as p,r,A as pl,h as y,f as s,c as o,j as Ke,u as m,x as w,k as Le,y as ol,a as n,v as M,t as u,b as Oe,d as c,w as b,p as el}from"../chunks/index.2bf4358c.js";import{C as j}from"../chunks/CodeBlock.4e987730.js";import{C as ll}from"../chunks/CourseFloatingBanner.9ff4c771.js";import{F as il}from"../chunks/FrameworkSwitchCourse.8d4d4ab6.js";import{H as Xe,E as ul}from"../chunks/getInferenceSnippets.24b50994.js";function cl(d){let a,i;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/zh-CN/chapter2/section6_tf.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/zh-CN/chapter2/section6_tf.ipynb"}]}}),{c(){r(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,T){M(a,t,T),i=!0},i(t){i||(c(a.$$.fragment,t),i=!0)},o(t){u(a.$$.fragment,t),i=!1},d(t){b(a,t)}}}function rl(d){let a,i;return a=new ll({props:{chapter:2,classNames:"absolute z-10 right-0 top-0",notebooks:[{label:"Google Colab",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/zh-CN/chapter2/section6_pt.ipynb"},{label:"Aws Studio",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/zh-CN/chapter2/section6_pt.ipynb"}]}}),{c(){r(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,T){M(a,t,T),i=!0},i(t){i||(c(a.$$.fragment,t),i=!0)},o(t){u(a.$$.fragment,t),i=!1},d(t){b(a,t)}}}function ml(d){let a,i;return a=new j({props:{code:"aW1wb3J0JTIwdGVuc29yZmxvdyUyMGFzJTIwdGYlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFRGQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQWNoZWNrcG9pbnQlMjAlM0QlMjAlMjJkaXN0aWxiZXJ0LWJhc2UtdW5jYXNlZC1maW5ldHVuZWQtc3N0LTItZW5nbGlzaCUyMiUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24uZnJvbV9wcmV0cmFpbmVkKGNoZWNrcG9pbnQpJTBBc2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEF0b2tlbnMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBb3V0cHV0JTIwJTNEJTIwbW9kZWwoKip0b2tlbnMp",highlighted:`<span class="hljs-keyword">import</span> tensorflow <span class="hljs-keyword">as</span> tf | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, TFAutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){r(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,T){M(a,t,T),i=!0},i(t){i||(c(a.$$.fragment,t),i=!0)},o(t){u(a.$$.fragment,t),i=!1},d(t){b(a,t)}}}function Ml(d){let a,i;return a=new j({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Nb2RlbEZvclNlcXVlbmNlQ2xhc3NpZmljYXRpb24lMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbi5mcm9tX3ByZXRyYWluZWQoY2hlY2twb2ludCklMEFzZXF1ZW5jZXMlMjAlM0QlMjAlNUIlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMkMlMjAlMjJTbyUyMGhhdmUlMjBJISUyMiU1RCUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHRydW5jYXRpb24lM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXQlMjAlM0QlMjBtb2RlbCgqKnRva2Vucyk=",highlighted:`<span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSequenceClassification | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForSequenceClassification.from_pretrained(checkpoint) | |
| sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| tokens = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| output = model(**tokens)`,wrap:!1}}),{c(){r(a.$$.fragment)},l(t){m(a.$$.fragment,t)},m(t,T){M(a,t,T),i=!0},i(t){i||(c(a.$$.fragment,t),i=!0)},o(t){u(a.$$.fragment,t),i=!1},d(t){b(a,t)}}}function bl(d){let a,i,t,T,Z,te,g,ne,h,f,ee,V,Ne="在最後幾節中,我們一直在盡最大努力手工完成大部分工作。我們探討了標記化器的工作原理,並研究了標記化、到輸入ID的轉換、填充、截斷和注意掩碼。",ae,$,_e="然而,正如我們在第2節中所看到的,🤗 Transformers API可以通過一個高級函數為我們處理所有這些,我們將在這裡深入討論。當你直接在句子上調用標記器時,你會得到準備通過模型傳遞的輸入",pe,k,oe,W,xe=`這裡,<code>model_inputs</code> | |
| 變量包含模型良好運行所需的一切。對於DistilBERT,它包括輸入 ID和注意力掩碼(attention mask)。其他接受額外輸入的模型也會有標記器對象的輸出。`,ie,B,ze="正如我們將在下面的一些示例中看到的,這種方法非常強大。首先,它可以標記單個序列:",ue,G,ce,X,Ee="它還一次處理多個序列,並且API沒有任何變化:",re,N,me,_,Se="它可以根據幾個目標進行填充:",Me,x,be,z,Ce="它還可以截斷序列:",Je,E,ye,S,Re="標記器對象可以處理到特定框架張量的轉換,然後可以直接發送到模型。例如,在下面的代碼示例中,我們提示標記器從不同的框架返回張量——<code>"pt"</code>返回Py Torch張量,<code>"tf"</code>返回TensorFlow張量,<code>"np"</code>返回NumPy數組:",Te,C,we,R,je,q,qe="如果我們看一下標記器返回的輸入 ID,我們會發現它們與之前的略有不同:",de,v,he,Y,fe,Q,ve="一個在開始時添加了一個標記(token) ID,一個在結束時添加了一個標記(token) ID。讓我們解碼上面的兩個ID序列,看看這是怎麼回事:",Ie,H,Ue,F,Ze,A,Ye="標記器在開頭添加了特殊單詞<code>[CLS]</code>,在結尾添加了特殊單詞<code>[SEP]</code>。這是因為模型是用這些數據預訓練的,所以為了得到相同的推理結果,我們還需要添加它們。請注意,有些模型不添加特殊單詞,或者添加不同的單詞;模型也可能只在開頭或結尾添加這些特殊單詞。在任何情況下,標記器都知道需要哪些詞符,並將為您處理這些詞符。",ge,D,Ve,P,Qe="現在我們已經看到了標記器對象在應用於文本時使用的所有單獨步驟,讓我們最後一次看看它如何處理多個序列(填充!),非常長的序列(截斷!),以及多種類型的張量及其主要API:",$e,I,U,le,K,ke,se,We;Z=new il({props:{fw:d[0]}}),g=new Xe({props:{title:"把它們放在一起",local:"把它們放在一起",headingTag:"h1"}});const He=[rl,cl],L=[];function Fe(e,l){return e[0]==="pt"?0:1}h=Fe(d),f=L[h]=He[h](d),k=new j({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMEElMEFjaGVja3BvaW50JTIwJTNEJTIwJTIyZGlzdGlsYmVydC1iYXNlLXVuY2FzZWQtZmluZXR1bmVkLXNzdC0yLWVuZ2xpc2glMjIlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZChjaGVja3BvaW50KSUwQSUwQXNlcXVlbmNlJTIwJTNEJTIwJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTBBJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),G=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2Up",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence)`,wrap:!1}}),N=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzKQ==",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| model_inputs = tokenizer(sequences)`,wrap:!1}}),x=new j({props:{code:"JTIzJTIwV2lsbCUyMHBhZCUyMHRoZSUyMHNlcXVlbmNlcyUyMHVwJTIwdG8lMjB0aGUlMjBtYXhpbXVtJTIwc2VxdWVuY2UlMjBsZW5ndGglMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRCUyMmxvbmdlc3QlMjIpJTBBJTBBJTIzJTIwV2lsbCUyMHBhZCUyMHRoZSUyMHNlcXVlbmNlcyUyMHVwJTIwdG8lMjB0aGUlMjBtb2RlbCUyMG1heCUyMGxlbmd0aCUwQSUyMyUyMCg1MTIlMjBmb3IlMjBCRVJUJTIwb3IlMjBEaXN0aWxCRVJUKSUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiklMEElMEElMjMlMjBXaWxsJTIwcGFkJTIwdGhlJTIwc2VxdWVuY2VzJTIwdXAlMjB0byUyMHRoZSUyMHNwZWNpZmllZCUyMG1heCUyMGxlbmd0aCUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEJTIybWF4X2xlbmd0aCUyMiUyQyUyMG1heF9sZW5ndGglM0Q4KQ==",highlighted:`<span class="hljs-comment"># Will pad the sequences up to the maximum sequence length</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"longest"</span>) | |
| <span class="hljs-comment"># Will pad the sequences up to the model max length</span> | |
| <span class="hljs-comment"># (512 for BERT or DistilBERT)</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>) | |
| <span class="hljs-comment"># Will pad the sequences up to the specified max length</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-string">"max_length"</span>, max_length=<span class="hljs-number">8</span>)`,wrap:!1}}),E=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBXaWxsJTIwdHJ1bmNhdGUlMjB0aGUlMjBzZXF1ZW5jZXMlMjB0aGF0JTIwYXJlJTIwbG9uZ2VyJTIwdGhhbiUyMHRoZSUyMG1vZGVsJTIwbWF4JTIwbGVuZ3RoJTBBJTIzJTIwKDUxMiUyMGZvciUyMEJFUlQlMjBvciUyMERpc3RpbEJFUlQpJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHRydW5jYXRpb24lM0RUcnVlKSUwQSUwQSUyMyUyMFdpbGwlMjB0cnVuY2F0ZSUyMHRoZSUyMHNlcXVlbmNlcyUyMHRoYXQlMjBhcmUlMjBsb25nZXIlMjB0aGFuJTIwdGhlJTIwc3BlY2lmaWVkJTIwbWF4JTIwbGVuZ3RoJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMG1heF9sZW5ndGglM0Q4JTJDJTIwdHJ1bmNhdGlvbiUzRFRydWUp",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># Will truncate the sequences that are longer than the model max length</span> | |
| <span class="hljs-comment"># (512 for BERT or DistilBERT)</span> | |
| model_inputs = tokenizer(sequences, truncation=<span class="hljs-literal">True</span>) | |
| <span class="hljs-comment"># Will truncate the sequences that are longer than the specified max length</span> | |
| model_inputs = tokenizer(sequences, max_length=<span class="hljs-number">8</span>, truncation=<span class="hljs-literal">True</span>)`,wrap:!1}}),C=new j({props:{code:"c2VxdWVuY2VzJTIwJTNEJTIwJTVCJTIySSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMEh1Z2dpbmdGYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIyJTJDJTIwJTIyU28lMjBoYXZlJTIwSSElMjIlNUQlMEElMEElMjMlMjBSZXR1cm5zJTIwUHlUb3JjaCUyMHRlbnNvcnMlMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2VzJTJDJTIwcGFkZGluZyUzRFRydWUlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQSUwQSUyMyUyMFJldHVybnMlMjBUZW5zb3JGbG93JTIwdGVuc29ycyUwQW1vZGVsX2lucHV0cyUyMCUzRCUyMHRva2VuaXplcihzZXF1ZW5jZXMlMkMlMjBwYWRkaW5nJTNEVHJ1ZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIydGYlMjIpJTBBJTBBJTIzJTIwUmV0dXJucyUyME51bVB5JTIwYXJyYXlzJTBBbW9kZWxfaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHNlcXVlbmNlcyUyQyUyMHBhZGRpbmclM0RUcnVlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJucCUyMik=",highlighted:`sequences = [<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>, <span class="hljs-string">"So have I!"</span>] | |
| <span class="hljs-comment"># Returns PyTorch tensors</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-comment"># Returns TensorFlow tensors</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"tf"</span>) | |
| <span class="hljs-comment"># Returns NumPy arrays</span> | |
| model_inputs = tokenizer(sequences, padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"np"</span>)`,wrap:!1}}),R=new Xe({props:{title:"特殊詞符(token)",local:"特殊詞符token",headingTag:"h2"}}),v=new j({props:{code:"c2VxdWVuY2UlMjAlM0QlMjAlMjJJJ3ZlJTIwYmVlbiUyMHdhaXRpbmclMjBmb3IlMjBhJTIwSHVnZ2luZ0ZhY2UlMjBjb3Vyc2UlMjBteSUyMHdob2xlJTIwbGlmZS4lMjIlMEElMEFtb2RlbF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoc2VxdWVuY2UpJTBBcHJpbnQobW9kZWxfaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQSUwQXRva2VucyUyMCUzRCUyMHRva2VuaXplci50b2tlbml6ZShzZXF1ZW5jZSklMEFpZHMlMjAlM0QlMjB0b2tlbml6ZXIuY29udmVydF90b2tlbnNfdG9faWRzKHRva2VucyklMEFwcmludChpZHMp",highlighted:`sequence = <span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span> | |
| model_inputs = tokenizer(sequence) | |
| <span class="hljs-built_in">print</span>(model_inputs[<span class="hljs-string">"input_ids"</span>]) | |
| tokens = tokenizer.tokenize(sequence) | |
| ids = tokenizer.convert_tokens_to_ids(tokens) | |
| <span class="hljs-built_in">print</span>(ids)`,wrap:!1}}),Y=new j({props:{code:"JTVCMTAxJTJDJTIwMTA0NSUyQyUyMDEwMDUlMkMlMjAyMzEwJTJDJTIwMjA0MiUyQyUyMDM0MDMlMkMlMjAyMDA1JTJDJTIwMTAzNyUyQyUyMDE3NjYyJTJDJTIwMTIxNzIlMkMlMjAyNjA3JTJDJTIwMjAyNiUyQyUyMDI4NzglMkMlMjAyMTY2JTJDJTIwMTAxMiUyQyUyMDEwMiU1RCUwQSU1QjEwNDUlMkMlMjAxMDA1JTJDJTIwMjMxMCUyQyUyMDIwNDIlMkMlMjAzNDAzJTJDJTIwMjAwNSUyQyUyMDEwMzclMkMlMjAxNzY2MiUyQyUyMDEyMTcyJTJDJTIwMjYwNyUyQyUyMDIwMjYlMkMlMjAyODc4JTJDJTIwMjE2NiUyQyUyMDEwMTIlNUQ=",highlighted:`[<span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>] | |
| [<span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>]`,wrap:!1}}),H=new j({props:{code:"cHJpbnQodG9rZW5pemVyLmRlY29kZShtb2RlbF9pbnB1dHMlNUIlMjJpbnB1dF9pZHMlMjIlNUQpKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUoaWRzKSk=",highlighted:`<span class="hljs-built_in">print</span>(tokenizer.decode(model_inputs[<span class="hljs-string">"input_ids"</span>])) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(ids))`,wrap:!1}}),F=new j({props:{code:"JTIyJTVCQ0xTJTVEJTIwaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIwJTVCU0VQJTVEJTIyJTBBJTIyaSd2ZSUyMGJlZW4lMjB3YWl0aW5nJTIwZm9yJTIwYSUyMGh1Z2dpbmdmYWNlJTIwY291cnNlJTIwbXklMjB3aG9sZSUyMGxpZmUuJTIy",highlighted:`<span class="hljs-string">"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"</span> | |
| <span class="hljs-string">"i've been waiting for a huggingface course my whole life."</span>`,wrap:!1}}),D=new Xe({props:{title:"結束:從標記器到模型",local:"結束從標記器到模型",headingTag:"h2"}});const Ae=[Ml,ml],O=[];function De(e,l){return e[0]==="pt"?0:1}return I=De(d),U=O[I]=Ae[I](d),K=new ul({props:{source:"https://github.com/huggingface/course/blob/main/chapters/zh-TW/chapter2/6.mdx"}}),{c(){a=J("meta"),i=p(),t=J("p"),T=p(),r(Z.$$.fragment),te=p(),r(g.$$.fragment),ne=p(),f.c(),ee=p(),V=J("p"),V.textContent=Ne,ae=p(),$=J("p"),$.textContent=_e,pe=p(),r(k.$$.fragment),oe=p(),W=J("p"),W.innerHTML=xe,ie=p(),B=J("p"),B.textContent=ze,ue=p(),r(G.$$.fragment),ce=p(),X=J("p"),X.textContent=Ee,re=p(),r(N.$$.fragment),me=p(),_=J("p"),_.textContent=Se,Me=p(),r(x.$$.fragment),be=p(),z=J("p"),z.textContent=Ce,Je=p(),r(E.$$.fragment),ye=p(),S=J("p"),S.innerHTML=Re,Te=p(),r(C.$$.fragment),we=p(),r(R.$$.fragment),je=p(),q=J("p"),q.textContent=qe,de=p(),r(v.$$.fragment),he=p(),r(Y.$$.fragment),fe=p(),Q=J("p"),Q.textContent=ve,Ie=p(),r(H.$$.fragment),Ue=p(),r(F.$$.fragment),Ze=p(),A=J("p"),A.innerHTML=Ye,ge=p(),r(D.$$.fragment),Ve=p(),P=J("p"),P.textContent=Qe,$e=p(),U.c(),le=p(),r(K.$$.fragment),ke=p(),se=J("p"),this.h()},l(e){const l=pl("svelte-u9bgzb",document.head);a=y(l,"META",{name:!0,content:!0}),l.forEach(s),i=o(e),t=y(e,"P",{}),Ke(t).forEach(s),T=o(e),m(Z.$$.fragment,e),te=o(e),m(g.$$.fragment,e),ne=o(e),f.l(e),ee=o(e),V=y(e,"P",{"data-svelte-h":!0}),w(V)!=="svelte-ri56tg"&&(V.textContent=Ne),ae=o(e),$=y(e,"P",{"data-svelte-h":!0}),w($)!=="svelte-u0jeao"&&($.textContent=_e),pe=o(e),m(k.$$.fragment,e),oe=o(e),W=y(e,"P",{"data-svelte-h":!0}),w(W)!=="svelte-5h1ym8"&&(W.innerHTML=xe),ie=o(e),B=y(e,"P",{"data-svelte-h":!0}),w(B)!=="svelte-1bzsmax"&&(B.textContent=ze),ue=o(e),m(G.$$.fragment,e),ce=o(e),X=y(e,"P",{"data-svelte-h":!0}),w(X)!=="svelte-p5w8o5"&&(X.textContent=Ee),re=o(e),m(N.$$.fragment,e),me=o(e),_=y(e,"P",{"data-svelte-h":!0}),w(_)!=="svelte-1fjb09c"&&(_.textContent=Se),Me=o(e),m(x.$$.fragment,e),be=o(e),z=y(e,"P",{"data-svelte-h":!0}),w(z)!=="svelte-slyf0"&&(z.textContent=Ce),Je=o(e),m(E.$$.fragment,e),ye=o(e),S=y(e,"P",{"data-svelte-h":!0}),w(S)!=="svelte-15ecj1e"&&(S.innerHTML=Re),Te=o(e),m(C.$$.fragment,e),we=o(e),m(R.$$.fragment,e),je=o(e),q=y(e,"P",{"data-svelte-h":!0}),w(q)!=="svelte-ziny2a"&&(q.textContent=qe),de=o(e),m(v.$$.fragment,e),he=o(e),m(Y.$$.fragment,e),fe=o(e),Q=y(e,"P",{"data-svelte-h":!0}),w(Q)!=="svelte-1c0bvep"&&(Q.textContent=ve),Ie=o(e),m(H.$$.fragment,e),Ue=o(e),m(F.$$.fragment,e),Ze=o(e),A=y(e,"P",{"data-svelte-h":!0}),w(A)!=="svelte-njyngw"&&(A.innerHTML=Ye),ge=o(e),m(D.$$.fragment,e),Ve=o(e),P=y(e,"P",{"data-svelte-h":!0}),w(P)!=="svelte-evt9m7"&&(P.textContent=Qe),$e=o(e),U.l(e),le=o(e),m(K.$$.fragment,e),ke=o(e),se=y(e,"P",{}),Ke(se).forEach(s),this.h()},h(){Le(a,"name","hf:doc:metadata"),Le(a,"content",Jl)},m(e,l){ol(document.head,a),n(e,i,l),n(e,t,l),n(e,T,l),M(Z,e,l),n(e,te,l),M(g,e,l),n(e,ne,l),L[h].m(e,l),n(e,ee,l),n(e,V,l),n(e,ae,l),n(e,$,l),n(e,pe,l),M(k,e,l),n(e,oe,l),n(e,W,l),n(e,ie,l),n(e,B,l),n(e,ue,l),M(G,e,l),n(e,ce,l),n(e,X,l),n(e,re,l),M(N,e,l),n(e,me,l),n(e,_,l),n(e,Me,l),M(x,e,l),n(e,be,l),n(e,z,l),n(e,Je,l),M(E,e,l),n(e,ye,l),n(e,S,l),n(e,Te,l),M(C,e,l),n(e,we,l),M(R,e,l),n(e,je,l),n(e,q,l),n(e,de,l),M(v,e,l),n(e,he,l),M(Y,e,l),n(e,fe,l),n(e,Q,l),n(e,Ie,l),M(H,e,l),n(e,Ue,l),M(F,e,l),n(e,Ze,l),n(e,A,l),n(e,ge,l),M(D,e,l),n(e,Ve,l),n(e,P,l),n(e,$e,l),O[I].m(e,l),n(e,le,l),M(K,e,l),n(e,ke,l),n(e,se,l),We=!0},p(e,[l]){const Pe={};l&1&&(Pe.fw=e[0]),Z.$set(Pe);let Be=h;h=Fe(e),h!==Be&&(el(),u(L[Be],1,1,()=>{L[Be]=null}),Oe(),f=L[h],f||(f=L[h]=He[h](e),f.c()),c(f,1),f.m(ee.parentNode,ee));let Ge=I;I=De(e),I!==Ge&&(el(),u(O[Ge],1,1,()=>{O[Ge]=null}),Oe(),U=O[I],U||(U=O[I]=Ae[I](e),U.c()),c(U,1),U.m(le.parentNode,le))},i(e){We||(c(Z.$$.fragment,e),c(g.$$.fragment,e),c(f),c(k.$$.fragment,e),c(G.$$.fragment,e),c(N.$$.fragment,e),c(x.$$.fragment,e),c(E.$$.fragment,e),c(C.$$.fragment,e),c(R.$$.fragment,e),c(v.$$.fragment,e),c(Y.$$.fragment,e),c(H.$$.fragment,e),c(F.$$.fragment,e),c(D.$$.fragment,e),c(U),c(K.$$.fragment,e),We=!0)},o(e){u(Z.$$.fragment,e),u(g.$$.fragment,e),u(f),u(k.$$.fragment,e),u(G.$$.fragment,e),u(N.$$.fragment,e),u(x.$$.fragment,e),u(E.$$.fragment,e),u(C.$$.fragment,e),u(R.$$.fragment,e),u(v.$$.fragment,e),u(Y.$$.fragment,e),u(H.$$.fragment,e),u(F.$$.fragment,e),u(D.$$.fragment,e),u(U),u(K.$$.fragment,e),We=!1},d(e){e&&(s(i),s(t),s(T),s(te),s(ne),s(ee),s(V),s(ae),s($),s(pe),s(oe),s(W),s(ie),s(B),s(ue),s(ce),s(X),s(re),s(me),s(_),s(Me),s(be),s(z),s(Je),s(ye),s(S),s(Te),s(we),s(je),s(q),s(de),s(he),s(fe),s(Q),s(Ie),s(Ue),s(Ze),s(A),s(ge),s(Ve),s(P),s($e),s(le),s(ke),s(se)),s(a),b(Z,e),b(g,e),L[h].d(e),b(k,e),b(G,e),b(N,e),b(x,e),b(E,e),b(C,e),b(R,e),b(v,e),b(Y,e),b(H,e),b(F,e),b(D,e),O[I].d(e),b(K,e)}}}const Jl='{"title":"把它們放在一起","local":"把它們放在一起","sections":[{"title":"特殊詞符(token)","local":"特殊詞符token","sections":[],"depth":2},{"title":"結束:從標記器到模型","local":"結束從標記器到模型","sections":[],"depth":2}],"depth":1}';function yl(d,a,i){let t="pt";return tl(()=>{const T=new URLSearchParams(window.location.search);i(0,t=T.get("fw")||"pt")}),[t]}class Il extends nl{constructor(a){super(),al(this,a,yl,bl,sl,{})}}export{Il as component}; | |
Xet Storage Details
- Size:
- 23.1 kB
- Xet hash:
- 77427d910a4a53f65548a0c9d552b861e356212831a5c0666b64690a0d54b4c7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.