Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Kuis akhir bab","local":"end-of-chapter-quiz","sections":[{"title":"1. Kapan kamu sebaiknya melatih tokenizer baru?","local":"1-kapan-kamu-sebaiknya-melatih-tokenizer-baru","sections":[],"depth":3},{"title":"2. Apa keuntungan menggunakan generator daftar teks dibandingkan dengan daftar daftar teks saat memakai train_new_from_iterator() ?","local":"2-apa-keuntungan-menggunakan-generator-daftar-teks-dibandingkan-dengan-daftar-daftar-teks-saat-memakai-trainnewfromiterator-","sections":[],"depth":3},{"title":"3. Apa keuntungan menggunakan tokenizer “fast”?","local":"3-apa-keuntungan-menggunakan-tokenizer-fast","sections":[],"depth":3},{"title":"4. Bagaimana pipeline token-classification menangani entitas yang terdiri dari beberapa token?","local":"4-bagaimana-pipeline-token-classification-menangani-entitas-yang-terdiri-dari-beberapa-token","sections":[],"depth":3},{"title":"5. Bagaimana pipeline question-answering menangani konteks yang panjang?","local":"5-bagaimana-pipeline-question-answering-menangani-konteks-yang-panjang","sections":[],"depth":3},{"title":"6. Apa itu normalisasi?","local":"6-apa-itu-normalisasi","sections":[],"depth":3},{"title":"7. Apa itu pre-tokenization dalam subword tokenizer?","local":"7-apa-itu-pre-tokenization-dalam-subword-tokenizer","sections":[],"depth":3},{"title":"8. Pilih pernyataan yang sesuai dengan model tokenisasi BPE.","local":"8-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-bpe","sections":[],"depth":3},{"title":"9. Pilih pernyataan yang sesuai dengan model tokenisasi WordPiece.","local":"9-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-wordpiece","sections":[],"depth":3},{"title":"10. Pilih pernyataan yang sesuai dengan model tokenisasi Unigram.","local":"10-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-unigram","sections":[],"depth":3}],"depth":1}"> | |
| <link href="/docs/course/pr_1054/id/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/entry/start.4f92af03.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/scheduler.36a0863c.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/singletons.7dc7b9a4.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/index.733708bb.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/paths.cf097d06.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/entry/app.19cef1b6.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/index.156fee99.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/nodes/0.1203e4a0.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/nodes/45.d31ea6b2.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/CourseFloatingBanner.16bb8bff.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/Question.aeeaa180.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/stores.8f4efe8a.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1054/id/_app/immutable/chunks/getInferenceSnippets.472bc46d.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Kuis akhir bab","local":"end-of-chapter-quiz","sections":[{"title":"1. Kapan kamu sebaiknya melatih tokenizer baru?","local":"1-kapan-kamu-sebaiknya-melatih-tokenizer-baru","sections":[],"depth":3},{"title":"2. Apa keuntungan menggunakan generator daftar teks dibandingkan dengan daftar daftar teks saat memakai train_new_from_iterator() ?","local":"2-apa-keuntungan-menggunakan-generator-daftar-teks-dibandingkan-dengan-daftar-daftar-teks-saat-memakai-trainnewfromiterator-","sections":[],"depth":3},{"title":"3. Apa keuntungan menggunakan tokenizer “fast”?","local":"3-apa-keuntungan-menggunakan-tokenizer-fast","sections":[],"depth":3},{"title":"4. Bagaimana pipeline token-classification menangani entitas yang terdiri dari beberapa token?","local":"4-bagaimana-pipeline-token-classification-menangani-entitas-yang-terdiri-dari-beberapa-token","sections":[],"depth":3},{"title":"5. Bagaimana pipeline question-answering menangani konteks yang panjang?","local":"5-bagaimana-pipeline-question-answering-menangani-konteks-yang-panjang","sections":[],"depth":3},{"title":"6. Apa itu normalisasi?","local":"6-apa-itu-normalisasi","sections":[],"depth":3},{"title":"7. Apa itu pre-tokenization dalam subword tokenizer?","local":"7-apa-itu-pre-tokenization-dalam-subword-tokenizer","sections":[],"depth":3},{"title":"8. Pilih pernyataan yang sesuai dengan model tokenisasi BPE.","local":"8-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-bpe","sections":[],"depth":3},{"title":"9. Pilih pernyataan yang sesuai dengan model tokenisasi WordPiece.","local":"9-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-wordpiece","sections":[],"depth":3},{"title":"10. Pilih pernyataan yang sesuai dengan model tokenisasi Unigram.","local":"10-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-unigram","sections":[],"depth":3}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="end-of-chapter-quiz" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#end-of-chapter-quiz"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kuis akhir bab</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-6-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> </div> <p data-svelte-h="svelte-b5r4c5">Ayo uji apa yang telah kamu pelajari di bab ini!</p> <h3 class="relative group"><a id="1-kapan-kamu-sebaiknya-melatih-tokenizer-baru" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-kapan-kamu-sebaiknya-melatih-tokenizer-baru"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1. Kapan kamu sebaiknya melatih tokenizer baru?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Ketika dataset-mu mirip dengan dataset yang digunakan oleh model pralatih yang ada, dan kamu ingin melakukan pretraining model baru<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Ketika dataset-mu mirip dengan dataset yang digunakan oleh model pralatih yang ada, dan kamu ingin melakukan fine-tune model baru menggunakan model pralatih ini<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Ketika dataset-mu berbeda dari dataset yang digunakan oleh model pralatih yang ada, dan kamu ingin melakukan pretraining model baru<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Ketika dataset-mu berbeda dari dataset yang digunakan oleh model pralatih yang ada, tetapi kamu ingin melakukan fine-tune model baru menggunakan model pralatih ini<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="2-apa-keuntungan-menggunakan-generator-daftar-teks-dibandingkan-dengan-daftar-daftar-teks-saat-memakai-trainnewfromiterator-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-apa-keuntungan-menggunakan-generator-daftar-teks-dibandingkan-dengan-daftar-daftar-teks-saat-memakai-trainnewfromiterator-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2. Apa keuntungan menggunakan generator daftar teks dibandingkan dengan daftar daftar teks saat memakai train_new_from_iterator() ?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Itulah satu-satunya tipe yang diterima oleh metode <code>train_new_from_iterator()</code>.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Kamu akan menghindari memuat seluruh dataset ke dalam memori sekaligus.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Ini akan memungkinkan pustaka 🤗 Tokenizers untuk menggunakan multiprocessing.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Tokenizer yang kamu latih akan menghasilkan teks yang lebih baik.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="3-apa-keuntungan-menggunakan-tokenizer-fast" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-apa-keuntungan-menggunakan-tokenizer-fast"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3. Apa keuntungan menggunakan tokenizer “fast”?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Tokenizer ini dapat memproses input lebih cepat dibandingkan tokenizer lambat saat kamu memproses banyak input sekaligus.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Tokenizer cepat selalu lebih cepat daripada versi lambatnya.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Tokenizer ini dapat melakukan padding dan truncation.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Tokenizer ini memiliki fitur tambahan yang memungkinkan kamu memetakan token ke bagian teks asalnya.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="4-bagaimana-pipeline-token-classification-menangani-entitas-yang-terdiri-dari-beberapa-token" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-bagaimana-pipeline-token-classification-menangani-entitas-yang-terdiri-dari-beberapa-token"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4. Bagaimana pipeline token-classification menangani entitas yang terdiri dari beberapa token?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Entitas dengan label yang sama digabung menjadi satu entitas.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Ada label untuk awal entitas dan label untuk kelanjutan entitas.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Dalam satu kata, selama token pertama memiliki label entitas, seluruh kata dianggap memiliki entitas tersebut.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Ketika satu token memiliki label entitas, token-token setelahnya yang memiliki label sama dianggap bagian dari entitas tersebut, kecuali jika diberi label sebagai entitas baru.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="5-bagaimana-pipeline-question-answering-menangani-konteks-yang-panjang" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#5-bagaimana-pipeline-question-answering-menangani-konteks-yang-panjang"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>5. Bagaimana pipeline question-answering menangani konteks yang panjang?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Pipeline ini tidak benar-benar menanganinya, hanya memotong konteks panjang hingga batas maksimum model.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Pipeline ini membagi konteks menjadi beberapa bagian dan mengambil rata-rata dari hasilnya.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Pipeline ini membagi konteks menjadi beberapa bagian (dengan overlap) dan mengambil skor maksimum dari setiap bagian.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Pipeline ini membagi konteks menjadi beberapa bagian (tanpa overlap, demi efisiensi) dan mengambil skor maksimum dari setiap bagian.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="6-apa-itu-normalisasi" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#6-apa-itu-normalisasi"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>6. Apa itu normalisasi?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Ini adalah proses pembersihan awal yang dilakukan tokenizer pada teks.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Ini adalah teknik augmentasi data yang membuat teks lebih ‘normal’ dengan menghapus kata-kata langka.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Ini adalah langkah terakhir pasca-pemrosesan di mana tokenizer menambahkan token khusus.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Ini adalah saat embedding dibuat dengan rata-rata 0 dan deviasi standar 1.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="7-apa-itu-pre-tokenization-dalam-subword-tokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#7-apa-itu-pre-tokenization-dalam-subword-tokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>7. Apa itu pre-tokenization dalam subword tokenizer?</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Ini adalah tahap sebelum tokenisasi, di mana augmentasi data diterapkan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Ini adalah tahap sebelum tokenisasi, di mana pembersihan dilakukan pada teks.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Ini adalah tahap sebelum model tokenizer digunakan, untuk memecah input menjadi kata-kata.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Ini adalah tahap sebelum model tokenizer digunakan, untuk memecah input menjadi token.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="8-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-bpe" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#8-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-bpe"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>8. Pilih pernyataan yang sesuai dengan model tokenisasi BPE.</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->BPE adalah algoritma tokenisasi subword yang dimulai dengan kosakata kecil dan mempelajari aturan penggabungan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->BPE adalah algoritma tokenisasi subword yang dimulai dengan kosakata besar dan secara bertahap menghapus token.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Tokenizer BPE mempelajari aturan penggabungan dengan menggabungkan pasangan token yang paling sering muncul.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Tokenizer BPE mempelajari aturan penggabungan berdasarkan skor yang mengutamakan pasangan sering dengan bagian individual yang jarang.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->BPE memecah kata menjadi karakter lalu menerapkan aturan penggabungan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->BPE memecah kata menjadi subword dengan mencari subword terpanjang dari awal yang ada di kosakata.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="9-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-wordpiece" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#9-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-wordpiece"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>9. Pilih pernyataan yang sesuai dengan model tokenisasi WordPiece.</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->WordPiece adalah algoritma tokenisasi subword yang dimulai dengan kosakata kecil dan mempelajari aturan penggabungan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->WordPiece adalah algoritma tokenisasi subword yang dimulai dengan kosakata besar dan menghapus token.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Tokenizer WordPiece menggabungkan pasangan token yang paling sering.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Tokenizer WordPiece memilih pasangan token yang memaksimalkan skor berdasarkan frekuensi dan informasi tambahan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->WordPiece mencari segmentasi token yang paling mungkin menurut model.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->WordPiece memecah kata jadi subword dengan mencari subword terpanjang dari awal yang ada di kosakata, lalu mengulanginya untuk sisanya.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="10-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-unigram" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#10-pilih-pernyataan-yang-sesuai-dengan-model-tokenisasi-unigram"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>10. Pilih pernyataan yang sesuai dengan model tokenisasi Unigram.</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->Unigram adalah algoritma tokenisasi subword yang dimulai dengan kosakata kecil dan mempelajari aturan penggabungan.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->Unigram adalah algoritma tokenisasi subword yang dimulai dengan kosakata besar dan secara bertahap menghapus token.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->Unigram menyesuaikan kosakatanya dengan meminimalkan loss yang dihitung dari seluruh korpus.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="3"> <!-- HTML_TAG_START -->Unigram menyesuaikan kosakatanya dengan menyimpan subword yang paling sering.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="4"> <!-- HTML_TAG_START -->Unigram memecah kata menjadi subword dengan mencari segmentasi paling mungkin menurut model.<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="5"> <!-- HTML_TAG_START -->Unigram memecah kata menjadi karakter lalu menerapkan aturan penggabungan.<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/id/chapter6/10.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_ojy514 = { | |
| assets: "/docs/course/pr_1054/id", | |
| base: "/docs/course/pr_1054/id", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/course/pr_1054/id/_app/immutable/entry/start.4f92af03.js"), | |
| import("/docs/course/pr_1054/id/_app/immutable/entry/app.19cef1b6.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 45], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 36.4 kB
- Xet hash:
- 8680439fbca623c166c5d9540e30c8247ca37c20ba7e1dc6d53b731a057fe0a4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.