Buckets:

rtrm's picture
download
raw
35.4 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;章末小測驗&quot;,&quot;local&quot;:&quot;章末小測驗&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1.你應該什麼時候訓練一個新的標記器?&quot;,&quot;local&quot;:&quot;1你應該什麼時候訓練一個新的標記器&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;2.當使用“ train_new_from_iterator()”時,使用文本列表生成器與文本列表相比有什麼優點?&quot;,&quot;local&quot;:&quot;2當使用-trainnewfromiterator時使用文本列表生成器與文本列表相比有什麼優點&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;3.使用“快速”標記器的優點是什麼?&quot;,&quot;local&quot;:&quot;3使用快速標記器的優點是什麼&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;4.“token-classification”管道如何處理跨多個標記的實體?&quot;,&quot;local&quot;:&quot;4token-classification管道如何處理跨多個標記的實體&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;5.“question-answering”流水線如何處理長上下文?&quot;,&quot;local&quot;:&quot;5question-answering流水線如何處理長上下文&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;6.什麼是標準化?&quot;,&quot;local&quot;:&quot;6什麼是標準化&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;7.什麼是子詞標記化的前標記化?&quot;,&quot;local&quot;:&quot;7什麼是子詞標記化的前標記化&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;8.選擇描述標記化 BPE 模式最準確的句子。&quot;,&quot;local&quot;:&quot;8選擇描述標記化-bpe-模式最準確的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;9.選擇適用於 WordPiece 標記模型的句子。&quot;,&quot;local&quot;:&quot;9選擇適用於-wordpiece-標記模型的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;10.選擇適用於 Unigram 標記模式的句子。&quot;,&quot;local&quot;:&quot;10選擇適用於-unigram-標記模式的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:1}">
<link href="/docs/course/pr_1069/zh-TW/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/entry/start.01945326.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/scheduler.37c15a92.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/singletons.40763523.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/index.18351ede.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/paths.677dc4ce.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/entry/app.e8597ff2.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/index.2bf4358c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/nodes/0.decbb3b3.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/nodes/44.6979dc17.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/CourseFloatingBanner.9ff4c771.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/DocNotebookDropdown.efc1fb7c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/Question.668688bc.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/getInferenceSnippets.24b50994.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;章末小測驗&quot;,&quot;local&quot;:&quot;章末小測驗&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1.你應該什麼時候訓練一個新的標記器?&quot;,&quot;local&quot;:&quot;1你應該什麼時候訓練一個新的標記器&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;2.當使用“ train_new_from_iterator()”時,使用文本列表生成器與文本列表相比有什麼優點?&quot;,&quot;local&quot;:&quot;2當使用-trainnewfromiterator時使用文本列表生成器與文本列表相比有什麼優點&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;3.使用“快速”標記器的優點是什麼?&quot;,&quot;local&quot;:&quot;3使用快速標記器的優點是什麼&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;4.“token-classification”管道如何處理跨多個標記的實體?&quot;,&quot;local&quot;:&quot;4token-classification管道如何處理跨多個標記的實體&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;5.“question-answering”流水線如何處理長上下文?&quot;,&quot;local&quot;:&quot;5question-answering流水線如何處理長上下文&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;6.什麼是標準化?&quot;,&quot;local&quot;:&quot;6什麼是標準化&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;7.什麼是子詞標記化的前標記化?&quot;,&quot;local&quot;:&quot;7什麼是子詞標記化的前標記化&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;8.選擇描述標記化 BPE 模式最準確的句子。&quot;,&quot;local&quot;:&quot;8選擇描述標記化-bpe-模式最準確的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;9.選擇適用於 WordPiece 標記模型的句子。&quot;,&quot;local&quot;:&quot;9選擇適用於-wordpiece-標記模型的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;10.選擇適用於 Unigram 標記模式的句子。&quot;,&quot;local&quot;:&quot;10選擇適用於-unigram-標記模式的句子&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="章末小測驗" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#章末小測驗"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>章末小測驗</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-6-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> </div> <p data-svelte-h="svelte-7nh7yy">讓我們測試一下您在本章中學到了什麼!</p> <h3 class="relative group"><a id="1你應該什麼時候訓練一個新的標記器" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1你應該什麼時候訓練一個新的標記器"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1.你應該什麼時候訓練一個新的標記器?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->當您的數據集與現有的預訓練模型所使用的數據集相似時,並且您希望預訓練一個新模型<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->當您的數據集與現有的預訓練模型所使用的數據集相似時,並且您希望使用此預訓練模型對新模型進行微調<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->當您的數據集與現有預訓練模型所使用的數據集不同時,您希望預訓練一個新模型<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->當您的數據集與現有預訓練模型所使用的數據集不同時,但是您希望使用此預訓練模型對新模型進行微調<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="2當使用-trainnewfromiterator時使用文本列表生成器與文本列表相比有什麼優點" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2當使用-trainnewfromiterator時使用文本列表生成器與文本列表相比有什麼優點"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2.當使用“ train_new_from_iterator()”時,使用文本列表生成器與文本列表相比有什麼優點?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->這是方法 < code > train_new_from_iterator() </code > 接受的唯一類型。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->您將避免立即將整個數據集載入內存中。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->這將允許 Tokenizers 庫使用並行處理。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->你訓練的標記器將產生更好的文本。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="3使用快速標記器的優點是什麼" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3使用快速標記器的優點是什麼"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3.使用“快速”標記器的優點是什麼?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->當你批處理大量的輸入時,它可以比一個滿速的標記器更快地處理輸入。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->快速的標記法總是比慢速的標記法快。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->它可以填充和截斷文本。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->它有一些額外的功能,允許你將標記映射到創建它們的文本範圍。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="4token-classification管道如何處理跨多個標記的實體" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4token-classification管道如何處理跨多個標記的實體"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4.“token-classification”管道如何處理跨多個標記的實體?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->具有相同標籤的實體合併為一個實體。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->實體的開始有一個標籤,實體的持續有一個標籤。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->在給定的單詞中,只要第一個標記具有實體的標籤,則認為整個單詞都帶有該實體的標籤。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->當一個標記具有給定實體的標記時,除非標記為新實體的開始,否則具有相同標記的任何其他後續標記都被視為同一實體的一部分。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="5question-answering流水線如何處理長上下文" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#5question-answering流水線如何處理長上下文"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>5.“question-answering”流水線如何處理長上下文?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->實際上並不是這樣,因為它在模型接受的最大長度上截斷了長上下文。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->它將上下文分成若干部分,並對所得結果進行平均。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->它將上下文拆分為若干部分(有重疊部分) ,並在每個部分中查找一個答案的最大得分。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->它將上下文分成若干部分(不重疊,以提高效率) ,並在每個部分中找到一個答案的最大得分。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="6什麼是標準化" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#6什麼是標準化"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>6.什麼是標準化?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->這是 tokenizer 在初始階段對文本執行的任何清理。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->這是一種數據增強技術,包括通過刪除稀有單詞使文本更加標準。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->在最後的後處理步驟中,tokenizer 添加特殊標記。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->這是當嵌入的平均值為0和標準差為1時,通過減去平均值和除以標準差。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="7什麼是子詞標記化的前標記化" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#7什麼是子詞標記化的前標記化"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>7.什麼是子詞標記化的前標記化?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->這是標記化之前的步驟,應用數據增強(如隨機遮罩)。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->這是標記化之前的步驟,在這個步驟中,所需的清理操作應用於文本。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->這是應用 tokenizer 模型之前的步驟,將輸入拆分為單詞。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->這是應用 tokenizer 模型之前的步驟,將輸入拆分為標記。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="8選擇描述標記化-bpe-模式最準確的句子" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#8選擇描述標記化-bpe-模式最準確的句子"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>8.選擇描述標記化 BPE 模式最準確的句子。</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->BPE 是一個子詞標記算法,從小詞彙表開始,學習合併規則。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->BPE 是一種子詞標記算法,從大詞彙表開始,逐步從中刪除標記。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->BPE 標記器通過合併最常見的一對標記來學習合併規則。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->BPE 記號賦予器通過合併一對記號來學習合併規則,該記號最大化了特權頻繁對和較少個別部分的分數。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->BPE 通過將單詞分割成字符,然後應用合併規則將單詞分解成子單詞。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->BPE 通過從詞彙表的開頭找到最長的子詞,然後在文本的其餘部分重複這個過程,將單詞轉化為子詞。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="9選擇適用於-wordpiece-標記模型的句子" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#9選擇適用於-wordpiece-標記模型的句子"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>9.選擇適用於 WordPiece 標記模型的句子。</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->WordPiece 是一個子詞標記算法,它從一個小詞彙表開始,學習合併規則。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->WordPiece 是一種子詞標記算法,從大詞彙表開始,逐步從中刪除標記。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->WordPiece 標記器通過合併最常見的兩個標記來學習合併規則。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->WordPiece 標記器通過合併兩個標記來學習合併規則,這兩個標記最大限度地提高了頻繁出現的標記的分數,而這兩個標記的獨立部分出現頻率較低。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->根據模型,WordPiece 通過找到最有可能的切分符號,將單詞標記為子單詞。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->WordPiece 通過從詞彙表的開頭找到最長的子詞,然後在文本的其餘部分重複這個過程,將單詞轉化為子詞。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="10選擇適用於-unigram-標記模式的句子" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#10選擇適用於-unigram-標記模式的句子"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>10.選擇適用於 Unigram 標記模式的句子。</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Unigram 是一個子詞標記算法,它從一個很小的詞彙表開始學習合併規則。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Unigram 是一種子詞標記算法,它從大詞彙表開始,逐步從中刪除標記。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Unigram 通過最小化在整個語料庫中的損失來調整詞彙量。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Unigram 通過保留最頻繁的子詞來調整它的詞彙量。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->根據模型,Unigram 通過找到最可能的分割符號來將詞轉化為子詞。<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->Unigram 通過將單詞分割成字符,然後應用合併規則將其分解成子單詞。<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/zh-TW/chapter6/10.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_14z83n9 = {
assets: "/docs/course/pr_1069/zh-TW",
base: "/docs/course/pr_1069/zh-TW",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/course/pr_1069/zh-TW/_app/immutable/entry/start.01945326.js"),
import("/docs/course/pr_1069/zh-TW/_app/immutable/entry/app.e8597ff2.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 44],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
35.4 kB
·
Xet hash:
2efacac9163a8086a28a852d3cf70f2ac46accc8dc4ba3740d64eec633ebf254

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.