Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1107 /my /chapter2 /2.html

rtrm

about 1 month ago

download

raw

85.4 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Pipeline နောက်ကွယ်မှ အကြောင်းအရာများ","local":"behind-the-pipeline","sections":[{"title":"Tokenizer ဖြင့် Preprocessing ပြုလုပ်ခြင်း","local":"preprocessing-with-a-tokenizer","sections":[],"depth":2},{"title":"Model ကို ဖြတ်သန်းခြင်း","local":"going-through-the-model","sections":[{"title":"High-dimensional vector တစ်ခုလား။","local":"a-high-dimensional-vector","sections":[],"depth":3},{"title":"Model heads: ဂဏန်းတွေကနေ အဓိပ္ပာယ်ထုတ်ယူခြင်း","local":"model-heads-making-sense-out-of-numbers","sections":[],"depth":3}],"depth":2},{"title":"Output ကို Postprocessing ပြုလုပ်ခြင်း","local":"postprocessing-the-output","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1107/my/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/entry/start.5c6233a8.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/scheduler.0835143d.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/singletons.c8b11329.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/index.1bab75e2.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/paths.e4a366ea.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/entry/app.55586789.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/preload-helper.5f7c8393.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/index.3d7efe79.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/nodes/0.0cec3d6c.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/nodes/15.b1d23cfc.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/Youtube.96e00463.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/CodeBlock.116ed840.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/CourseFloatingBanner.860ea6e4.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/FrameworkSwitchCourse.ff2bd9ab.js">
	<link rel="modulepreload" href="/docs/course/pr_1107/my/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.0b02b772.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Pipeline နောက်ကွယ်မှ အကြောင်းအရာများ","local":"behind-the-pipeline","sections":[{"title":"Tokenizer ဖြင့် Preprocessing ပြုလုပ်ခြင်း","local":"preprocessing-with-a-tokenizer","sections":[],"depth":2},{"title":"Model ကို ဖြတ်သန်းခြင်း","local":"going-through-the-model","sections":[{"title":"High-dimensional vector တစ်ခုလား။","local":"a-high-dimensional-vector","sections":[],"depth":3},{"title":"Model heads: ဂဏန်းတွေကနေ အဓိပ္ပာယ်ထုတ်ယူခြင်း","local":"model-heads-making-sense-out-of-numbers","sections":[],"depth":3}],"depth":2},{"title":"Output ကို Postprocessing ပြုလုပ်ခြင်း","local":"postprocessing-the-output","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="bg-white leading-none border border-gray-100 rounded-lg flex p-0.5 w-56 text-sm mb-4"><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-l bg-red-50 dark:bg-transparent text-red-600" href="?fw=pt"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> Pytorch </a><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-r text-gray-500 filter grayscale" href="?fw=tf"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.94em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 274"><path d="M145.726 42.065v42.07l72.861 42.07v-42.07l-72.86-42.07zM0 84.135v42.07l36.43 21.03V105.17L0 84.135zm109.291 21.035l-36.43 21.034v126.2l36.43 21.035v-84.135l36.435 21.035v-42.07l-36.435-21.034V105.17z" fill="#E55B2D"></path><path d="M145.726 42.065L36.43 105.17v42.065l72.861-42.065v42.065l36.435-21.03v-84.14zM255.022 63.1l-36.435 21.035v42.07l36.435-21.035V63.1zm-72.865 84.135l-36.43 21.035v42.07l36.43-21.036v-42.07zm-36.43 63.104l-36.436-21.035v84.135l36.435-21.035V210.34z" fill="#ED8E24"></path><path d="M145.726 0L0 84.135l36.43 21.035l109.296-63.105l72.861 42.07L255.022 63.1L145.726 0zm0 126.204l-36.435 21.03l36.435 21.036l36.43-21.035l-36.43-21.03z" fill="#F8BF3C"></path></svg> TensorFlow </a></div> <h1 class="relative group"><a id="behind-the-pipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#behind-the-pipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pipeline နောက်ကွယ်မှ အကြောင်းအရာများ</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-2-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter2/section2_pt.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter2/section2_pt.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/1pedAIvTWXk" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1odik3q"><a href="/course/chapter1">Chapter 1</a> မှာ အောက်ပါ code ကို run တဲ့အခါ ဘာတွေဖြစ်ပျက်သွားလဲဆိုတာကို ကြည့်ခြင်းဖြင့် ဥပမာတစ်ခုနဲ့ စလိုက်ရအောင်…။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline

	classifier = pipeline(<span class="hljs-string">"sentiment-analysis"</span>)
	classifier(
	[
	<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>,
	<span class="hljs-string">"I hate this so much!"</span>,
	]
	)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k17eab">အောက်ပါရလဒ်ကို ရရှိခဲ့ပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[{<span class="hljs-string">'label'</span>: <span class="hljs-string">'POSITIVE'</span>, <span class="hljs-string">'score'</span>: <span class="hljs-number">0.9598047137260437</span>},
	{<span class="hljs-string">'label'</span>: <span class="hljs-string">'NEGATIVE'</span>, <span class="hljs-string">'score'</span>: <span class="hljs-number">0.9994558095932007</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1i708yy"><a href="/course/chapter1">Chapter 1</a> မှာ ကျွန်တော်တို့ တွေ့ခဲ့ရသလို၊ ဒီ pipeline ဟာ အဆင့်သုံးဆင့်ကို ပေါင်းစပ်ထားပါတယ်၊ preprocessing လုပ်ခြင်း၊ model ကနေတဆင့် inputs တွေကို ပေးပို့ခြင်း၊ နဲ့ postprocessing လုပ်ခြင်းတို့ ဖြစ်ပါတယ်။</p> <div class="flex justify-center" data-svelte-h="svelte-fbe70j"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."></div> <p data-svelte-h="svelte-1stp149">ဒါတွေကို အမြန်ဆုံး တစ်ခုချင်းစီ လေ့လာကြည့်ရအောင်။</p> <h2 class="relative group"><a id="preprocessing-with-a-tokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocessing-with-a-tokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tokenizer ဖြင့် Preprocessing ပြုလုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-f5jr8w">အခြား neural network များကဲ့သို့ Transformer မော်ဒယ်များသည် raw text များကို တိုက်ရိုက်လုပ်ဆောင်၍ မရပါ။ ထို့ကြောင့် ကျွန်တော်တို့ pipeline ၏ ပထမအဆင့်မှာ text inputs များကို မော်ဒယ်နားလည်နိုင်သော ဂဏန်းများအဖြစ် ပြောင်းလဲခြင်းဖြစ်သည်။ ၎င်းကို ပြုလုပ်ရန် ကျွန်တော်တို့သည် <em>tokenizer</em> ကို အသုံးပြုပါသည်။ ၎င်းသည် အောက်ပါတို့ကို လုပ်ဆောင်ရန် တာဝန်ရှိသည်-</p> <ul data-svelte-h="svelte-18twsho"><li>input ကို <em>tokens</em> ဟုခေါ်သော စကားလုံးများ၊ subwords များ သို့မဟုတ် သင်္ကေတများ (ဥပမာ- ပုဒ်ဖြတ်သံ) အဖြစ် ပိုင်းခြားခြင်း</li> <li>token တစ်ခုစီကို integer တစ်ခုသို့ တွဲချိတ်ခြင်း</li> <li>မော်ဒယ်အတွက် အသုံးဝင်နိုင်သော အပို inputs များကို ထည့်သွင်းခြင်း</li></ul> <p data-svelte-h="svelte-1ymw3bp">ဒီ preprocessing အားလုံးကို မော်ဒယ်ကို pre-trained လုပ်ခဲ့စဉ်က အတိအကျလုပ်ခဲ့တဲ့ နည်းလမ်းအတိုင်း ပြုလုပ်ဖို့ လိုအပ်ပါတယ်။ ဒါကြောင့် ကျွန်တော်တို့ အရင်ဆုံး <a href="https://huggingface.co/models" rel="nofollow">Model Hub</a> ကနေ အဲဒီအချက်အလက်တွေကို download လုပ်ဖို့ လိုပါတယ်။ ဒါကို လုပ်ဖို့အတွက် <code>AutoTokenizer</code> class နဲ့ သူ့ရဲ့ <code>from_pretrained()</code> method ကို ကျွန်တော်တို့ အသုံးပြုပါတယ်။ ကျွန်တော်တို့ model ရဲ့ checkpoint name ကို အသုံးပြုပြီး၊ ၎င်းသည် model ရဲ့ tokenizer နဲ့ ဆက်စပ်နေတဲ့ ဒေတာတွေကို အလိုအလျောက် ရယူပြီး cache လုပ်ပါလိမ့်မယ် (ဒါကြောင့် အောက်က code ကို ပထမဆုံးအကြိမ် run မှသာ download လုပ်ပါလိမ့်မယ်)။</p> <p data-svelte-h="svelte-1rtuiyg"><code>sentiment-analysis</code> pipeline ရဲ့ default checkpoint က <code>distilbert-base-uncased-finetuned-sst-2-english</code> ဖြစ်တာကြောင့် (၎င်းရဲ့ model card ကို <a href="https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english" rel="nofollow">ဒီနေရာမှာ</a> ကြည့်နိုင်ပါတယ်)၊ အောက်ပါ code ကို ကျွန်တော်တို့ run ပြုလုပ်ပေးပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer

	checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span>
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4w3tf2">tokenizer ကို ရရှိပြီဆိုတာနဲ့၊ ကျွန်တော်တို့ရဲ့ စာကြောင်းတွေကို တိုက်ရိုက် ပေးပို့နိုင်ပြီး model ကို ထည့်သွင်းဖို့ အဆင်သင့်ဖြစ်နေတဲ့ dictionary တစ်ခု ပြန်ရပါလိမ့်မယ်။ လုပ်ဆောင်ဖို့ ကျန်ရှိတာကတော့ input IDs တွေရဲ့ list ကို tensors တွေအဖြစ် ပြောင်းလဲဖို့ပါပဲ။</p> <p data-svelte-h="svelte-wxw4gb">သင်ဟာ backend မှာ ဘယ် ML framework ကို အသုံးပြုလဲဆိုတာ စိုးရိမ်စရာမလိုဘဲ 🤗 Transformers ကို အသုံးပြုနိုင်ပါတယ်။ အချို့မော်ဒယ်တွေအတွက် PyTorch ဒါမှမဟုတ် Flax ဖြစ်နိုင်ပါတယ်။ သို့သော် Transformer မော်ဒယ်တွေက <em>tensors</em> တွေကိုပဲ input အဖြစ် လက်ခံပါတယ်။ tensors တွေအကြောင်းကို အခုမှ စကြားဖူးတာဆိုရင်၊ ၎င်းတို့ကို NumPy arrays တွေအဖြစ် တွေးကြည့်နိုင်ပါတယ်။ NumPy array တစ်ခုက scalar (0D)၊ vector (1D)၊ matrix (2D) သို့မဟုတ် dimension များစွာရှိနိုင်ပါတယ်။ ဒါက တကယ်တော့ tensor တစ်ခုပါပဲ။ အခြား ML frameworks တွေရဲ့ tensors တွေလည်း အလားတူပဲ အလုပ်လုပ်ပြီး၊ NumPy arrays တွေလိုပဲ လွယ်ကူစွာ instantiate လုပ်နိုင်ပါတယ်။</p> <p data-svelte-h="svelte-1e3gqau">ကျွန်တော်တို့ ပြန်လိုချင်တဲ့ tensors (PyTorch သို့မဟုတ် plain NumPy) အမျိုးအစားကို သတ်မှတ်ဖို့အတွက် <code>return_tensors</code> argument ကို အသုံးပြုနိုင်ပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->raw_inputs = [
	<span class="hljs-string">"I've been waiting for a HuggingFace course my whole life."</span>,
	<span class="hljs-string">"I hate this so much!"</span>,
	]
	inputs = tokenizer(raw_inputs, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>)
	<span class="hljs-built_in">print</span>(inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3nnsiw">padding နဲ့ truncation အကြောင်းကို အခုထိ စိတ်ပူမနေပါနဲ့၊ ဒါတွေကို နောက်မှ ရှင်းပြပါမယ်။ ဒီနေရာမှာ မှတ်ထားရမယ့် အဓိကအချက်တွေကတော့ သင်ဟာ စာကြောင်းတစ်ကြောင်း ဒါမှမဟုတ် စာကြောင်းများစွာပါတဲ့ list ကို ပေးပို့နိုင်သလို၊ သင်ပြန်လိုချင်တဲ့ tensors အမျိုးအစားကိုလည်း သတ်မှတ်နိုင်ပါတယ် (မည်သည့် type ကိုမျှ မပေးပို့ရင် list of lists အဖြစ် ရလဒ်ရပါလိမ့်မယ်)။</p> <p data-svelte-h="svelte-palrm1">PyTorch tensors အဖြစ် ရလဒ်တွေက အောက်ပါအတိုင်း ဖြစ်ပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{
	<span class="hljs-string">'input_ids'</span>: tensor([
	[ <span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">1005</span>, <span class="hljs-number">2310</span>, <span class="hljs-number">2042</span>, <span class="hljs-number">3403</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">17662</span>, <span class="hljs-number">12172</span>, <span class="hljs-number">2607</span>, <span class="hljs-number">2026</span>, <span class="hljs-number">2878</span>, <span class="hljs-number">2166</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>],
	[ <span class="hljs-number">101</span>, <span class="hljs-number">1045</span>, <span class="hljs-number">5223</span>, <span class="hljs-number">2023</span>, <span class="hljs-number">2061</span>, <span class="hljs-number">2172</span>, <span class="hljs-number">999</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]
	]),
	<span class="hljs-string">'attention_mask'</span>: tensor([
	[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
	[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]
	])
	}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ez50u8">output ကိုယ်တိုင်က <code>input_ids</code> နဲ့ <code>attention_mask</code> ဆိုတဲ့ key နှစ်ခုပါဝင်တဲ့ dictionary တစ်ခု ဖြစ်ပါတယ်။ <code>input_ids</code> မှာ integer row နှစ်ခု (စာကြောင်းတစ်ကြောင်းစီအတွက် တစ်ခု) ပါဝင်ပြီး ၎င်းတို့ဟာ စာကြောင်းတစ်ကြောင်းစီရှိ tokens တွေရဲ့ ထူးခြားတဲ့ identifiers တွေ ဖြစ်ပါတယ်။ <code>attention_mask</code> ဆိုတာ ဘာလဲဆိုတာကို ဒီအခန်းရဲ့ နောက်ပိုင်းမှာ ကျွန်တော်တို့ ရှင်းပြပါမယ်။</p> <h2 class="relative group"><a id="going-through-the-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#going-through-the-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model ကို ဖြတ်သန်းခြင်း</span></h2> <p data-svelte-h="svelte-1t0rbjp">ကျွန်တော်တို့ tokenizer ကို လုပ်ခဲ့သလိုပဲ pre-trained model ကို download လုပ်နိုင်ပါတယ်။ 🤗 Transformers က <code>from_pretrained()</code> method ပါဝင်တဲ့ <code>AutoModel</code> class ကို ပံ့ပိုးပေးပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel

	checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span>
	model = AutoModel.from_pretrained(checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rn83x7">ဒီ code snippet မှာ ကျွန်တော်တို့ဟာ ယခင်က pipeline မှာ အသုံးပြုခဲ့တဲ့ checkpoint အတူတူကို download လုပ်ပြီး model တစ်ခုကို instantiate လုပ်ခဲ့ပါတယ်။ (ဒါကို အမှန်တကယ်တော့ cache လုပ်ထားပြီးသား ဖြစ်သင့်ပါတယ်)။</p> <p data-svelte-h="svelte-1kgrxsr">ဒီ architecture မှာ base Transformer module သာ ပါဝင်ပါတယ်- inputs အချို့ကို ပေးလိုက်တဲ့အခါ ၎င်းသည် <em>hidden states</em> ဟုခေါ်သော အရာများကို ထုတ်ပေးပါတယ်။ ၎င်းတို့ကို <em>features</em> ဟုလည်း ခေါ်ပါတယ်။ model input တစ်ခုစီအတွက် <strong>Transformer model က အဲဒီ input ကို အကြောင်းအရာအရ နားလည်ထားမှုကို ကိုယ်စားပြုတဲ့ high-dimensional vector တစ်ခုကို</strong> ကျွန်တော်တို့ ပြန်ရပါလိမ့်မယ်။</p> <p data-svelte-h="svelte-1wc7otq">ဒါကို နားမလည်ရင် စိတ်မပူပါနဲ့။ ဒါတွေကို နောက်မှ အားလုံးရှင်းပြပါမယ်။</p> <p data-svelte-h="svelte-14vsyol">ဒီ hidden states တွေက သူ့ဘာသာသူ အသုံးဝင်နိုင်ပေမယ့်၊ ၎င်းတို့ဟာ များသောအားဖြင့် <em>head</em> လို့ခေါ်တဲ့ model ရဲ့ နောက်ထပ်အစိတ်အပိုင်းတစ်ခုရဲ့ inputs တွေ ဖြစ်ပါတယ်။ <a href="/course/chapter1">Chapter 1</a> မှာ မတူညီတဲ့ လုပ်ငန်းတာဝန်တွေကို architecture တူတူနဲ့ လုပ်ဆောင်နိုင်ခဲ့ပေမယ့်၊ ဒီလုပ်ငန်းတာဝန်တစ်ခုစီမှာ ၎င်းနဲ့ ဆက်စပ်နေတဲ့ head တစ်ခုစီ ရှိပါတယ်။</p> <h3 class="relative group"><a id="a-high-dimensional-vector" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a-high-dimensional-vector"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>High-dimensional vector တစ်ခုလား။</span></h3> <p data-svelte-h="svelte-t09vtr">Transformer module ကနေ ထုတ်ပေးတဲ့ vector ဟာ များသောအားဖြင့် ကြီးမားပါတယ်။ ဒါက အများအားဖြင့် dimensions သုံးခု ရှိပါတယ်-</p> <ul data-svelte-h="svelte-cofhig"><li><strong>Batch size</strong>: တစ်ကြိမ်တည်း လုပ်ဆောင်တဲ့ sequence အရေအတွက် (ကျွန်တော်တို့ ဥပမာမှာ ၂ ခု)။</li> <li><strong>Sequence length</strong>: sequence ရဲ့ ဂဏန်းဆိုင်ရာ ကိုယ်စားပြုမှုရဲ့ အရှည် (ကျွန်တော်တို့ ဥပမာမှာ ၁၆ ခု)။</li> <li><strong>Hidden size</strong>: model input တစ်ခုစီရဲ့ vector dimension။</li></ul> <p data-svelte-h="svelte-1xsg0jn">နောက်ဆုံးတန်ဖိုးကြောင့် “high dimensional” လို့ ခေါ်တာ ဖြစ်ပါတယ်။ hidden size က အလွန်ကြီးမားနိုင်ပါတယ် (768 က ပိုသေးငယ်တဲ့ မော်ဒယ်တွေအတွက် အများအားဖြင့်ဖြစ်ပြီး၊ ပိုကြီးတဲ့ မော်ဒယ်တွေမှာ ဒါက 3072 ဒါမှမဟုတ် ပိုများနိုင်ပါတယ်)။</p> <p data-svelte-h="svelte-1vf39lh">ကျွန်တော်တို့ preprocessing လုပ်ထားတဲ့ inputs တွေကို model ကို ပေးပို့ကြည့်ရင် ဒါကို တွေ့နိုင်ပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->outputs = model(**inputs)
	<span class="hljs-built_in">print</span>(outputs.last_hidden_state.shape)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">16</span>, <span class="hljs-number">768</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ev8ub4">🤗 Transformers မော်ဒယ်တွေရဲ့ outputs တွေဟာ <code>namedtuple</code> တွေ ဒါမှမဟုတ် dictionaries တွေလို အလုပ်လုပ်တယ်ဆိုတာကို သတိပြုပါ။ attribute တွေ (ကျွန်တော်တို့ လုပ်ခဲ့သလို) ဒါမှမဟုတ် key ( <code>outputs["last_hidden_state"]</code> ) နဲ့ ဒါမှမဟုတ် သင်ရှာနေတဲ့အရာ ဘယ်နေရာမှာရှိတယ်ဆိုတာ အတိအကျသိရင် index ( <code>outputs[0]</code> ) နဲ့ပါ ဝင်ရောက်ကြည့်ရှုနိုင်ပါတယ်။</p> <h3 class="relative group"><a id="model-heads-making-sense-out-of-numbers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-heads-making-sense-out-of-numbers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model heads: ဂဏန်းတွေကနေ အဓိပ္ပာယ်ထုတ်ယူခြင်း</span></h3> <p data-svelte-h="svelte-jma70z">Model heads တွေက hidden states တွေရဲ့ high-dimensional vector ကို input အဖြစ် ယူပြီး ၎င်းတို့ကို မတူညီတဲ့ dimension တစ်ခုပေါ်သို့ project လုပ်ပါတယ်။ ၎င်းတို့ဟာ များသောအားဖြင့် linear layers တစ်ခု ဒါမှမဟုတ် အနည်းငယ်နဲ့ ဖွဲ့စည်းထားပါတယ်-</p> <div class="flex justify-center" data-svelte-h="svelte-8mo6lt"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head.svg" alt="A Transformer network alongside its head."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg" alt="A Transformer network alongside its head."></div> <p data-svelte-h="svelte-3z54xm">Transformer model ရဲ့ output ကို model head ကို တိုက်ရိုက်ပို့ပြီး လုပ်ဆောင်ပါတယ်။</p> <p data-svelte-h="svelte-2w7zcd">ဒီပုံမှာ မော်ဒယ်ကို embeddings layer နဲ့ နောက်ဆက်တွဲ layers တွေနဲ့ ကိုယ်စားပြုထားပါတယ်။ embeddings layer က tokenized input ထဲက input ID တစ်ခုစီကို ၎င်းနဲ့ ဆက်စပ်နေတဲ့ token ကို ကိုယ်စားပြုတဲ့ vector တစ်ခုအဖြစ် ပြောင်းလဲပေးပါတယ်။ နောက်ဆက်တွဲ layers တွေက attention mechanism ကို အသုံးပြုပြီး အဲဒီ vectors တွေကို စီမံခန့်ခွဲကာ စာကြောင်းတွေရဲ့ နောက်ဆုံးကိုယ်စားပြုမှုကို ထုတ်ပေးပါတယ်။</p> <p data-svelte-h="svelte-1r4i9la">🤗 Transformers မှာ မတူညီတဲ့ architecture များစွာ ရရှိနိုင်ပြီး၊ တစ်ခုချင်းစီကို သီးခြားလုပ်ငန်းတစ်ခုကို ဖြေရှင်းဖို့ ဒီဇိုင်းထုတ်ထားပါတယ်။ အောက်ပါတို့ကတော့ မပြည့်စုံသေးသော စာရင်းတစ်ခု ဖြစ်ပါတယ်-</p> <ul data-svelte-h="svelte-a8nn2x"><li><code>Model</code> (hidden states များကို ပြန်ရယူခြင်း)</li> <li><code>ForCausalLM</code></li> <li><code>ForMaskedLM</code></li> <li><code>ForMultipleChoice</code></li> <li><code>ForQuestionAnswering</code></li> <li><code>ForSequenceClassification</code></li> <li><code>*ForTokenClassification</code></li> <li>နဲ့ အခြားအရာများ 🤗</li></ul> <p data-svelte-h="svelte-7uvrcg">ကျွန်တော်တို့ရဲ့ ဥပမာအတွက်၊ sequence classification head ပါဝင်တဲ့ မော်ဒယ်တစ်ခု လိုအပ်ပါလိမ့်မယ် (စာကြောင်းတွေကို positive သို့မဟုတ် negative အဖြစ် ခွဲခြားသတ်မှတ်နိုင်ဖို့)။ ဒါကြောင့် ကျွန်တော်တို့ဟာ <code>AutoModel</code> class ကို အမှန်တကယ် အသုံးပြုမှာ မဟုတ်ဘဲ <code>AutoModelForSequenceClassification</code> ကို အသုံးပြုပါမယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSequenceClassification

	checkpoint = <span class="hljs-string">"distilbert-base-uncased-finetuned-sst-2-english"</span>
	model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
	outputs = model(**inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7wnjzb">အခု outputs တွေရဲ့ shape ကို ကြည့်လိုက်ရင်၊ dimensionality က အများကြီး နိမ့်သွားပါလိမ့်မယ်၊ model head က ယခင်က ကျွန်တော်တို့ တွေ့ခဲ့တဲ့ high-dimensional vectors တွေကို input အဖြစ် ယူပြီး၊ တန်ဖိုးနှစ်ခု (label တစ်ခုစီအတွက် တစ်ခု) ပါဝင်တဲ့ vectors တွေကို ထုတ်ပေးပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(outputs.logits.shape)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">2</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7tl8qp">ကျွန်တော်တို့မှာ စာကြောင်းနှစ်ကြောင်းနဲ့ label နှစ်ခုပဲ ရှိတာကြောင့်၊ ကျွန်တော်တို့ model ကနေ ရရှိတဲ့ ရလဒ်ဟာ 2 x 2 shape ဖြစ်ပါတယ်။</p> <h2 class="relative group"><a id="postprocessing-the-output" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessing-the-output"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Output ကို Postprocessing ပြုလုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-ec20s8">ကျွန်တော်တို့ model ကနေ output အဖြစ် ရရှိတဲ့ တန်ဖိုးတွေက သူ့ဘာသာသူ အဓိပ္ပာယ်ရှိတာ မဟုတ်ပါဘူး။ ကြည့်ကြည့်ရအောင်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(outputs.logits)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tensor([[-<span class="hljs-number">1.5607</span>, <span class="hljs-number">1.6123</span>],
	[ <span class="hljs-number">4.1692</span>, -<span class="hljs-number">3.3464</span>]], grad_fn=<AddmmBackward>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-bhjel4">ကျွန်တော်တို့ရဲ့ model က ပထမစာကြောင်းအတွက် <code>[-1.5607, 1.6123]</code> ကို ခန့်မှန်းခဲ့ပြီး၊ ဒုတိယစာကြောင်းအတွက် <code>[ 4.1692, -3.3464]</code> ကို ခန့်မှန်းခဲ့ပါတယ်။ ဒါတွေက ဖြစ်နိုင်ခြေတွေ မဟုတ်ဘဲ <em>logits</em> တွေ ဖြစ်ပါတယ်။ ၎င်းတို့က model ရဲ့ နောက်ဆုံး layer ကနေ ထုတ်ပေးတဲ့ raw, unnormalized scores တွေပါ။ ဖြစ်နိုင်ခြေတွေအဖြစ် ပြောင်းလဲဖို့အတွက် <a href="https://en.wikipedia.org/wiki/Softmax_function" rel="nofollow">SoftMax</a> layer ကို ဖြတ်သန်းဖို့ လိုအပ်ပါတယ် (🤗 Transformers model အားလုံးက logits တွေကို ထုတ်ပေးပါတယ်၊ ဘာလို့လဲဆိုတော့ training အတွက် loss function က SoftMax လိုမျိုး နောက်ဆုံး activation function နဲ့ cross entropy လိုမျိုး loss function အမှန်တကယ်ကို ပေါင်းစပ်ထားတာ ဖြစ်လေ့ရှိပါတယ်)။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch

	predictions = torch.nn.functional.softmax(outputs.logits, dim=-<span class="hljs-number">1</span>)
	<span class="hljs-built_in">print</span>(predictions)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tensor([[<span class="hljs-number">4.0195e-02</span>, <span class="hljs-number">9.5980e-01</span>],
	[<span class="hljs-number">9.9946e-01</span>, <span class="hljs-number">5.4418e-04</span>]], grad_fn=<SoftmaxBackward>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18yrc8">အခု ကျွန်တော်တို့ model က ပထမစာကြောင်းအတွက် <code>[0.0402, 0.9598]</code> ကို ခန့်မှန်းခဲ့ပြီး၊ ဒုတိယစာကြောင်းအတွက် <code>[0.9995, 0.0005]</code> ကို ခန့်မှန်းခဲ့တယ်ဆိုတာ တွေ့ရပါပြီ။ ဒါတွေက အသိအမှတ်ပြုနိုင်တဲ့ ဖြစ်နိုင်ခြေ scores တွေ ဖြစ်ပါတယ်။</p> <p data-svelte-h="svelte-s1m8fi">position တစ်ခုစီနဲ့ ကိုက်ညီတဲ့ labels တွေကို ရယူဖို့အတွက် model config ရဲ့ <code>id2label</code> attribute ကို စစ်ဆေးနိုင်ပါတယ် (ဒီအကြောင်းကို နောက်အပိုင်းမှာ ပိုမိုသိရှိရပါလိမ့်မယ်)။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.config.id2label<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-number">0</span>: <span class="hljs-string">'NEGATIVE'</span>, <span class="hljs-number">1</span>: <span class="hljs-string">'POSITIVE'</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mqutw2">Nအခု ကျွန်တော်တို့ model က အောက်ပါအတိုင်း ခန့်မှန်းခဲ့တယ်လို့ ကောက်ချက်ချနိုင်ပါပြီ-</p> <ul data-svelte-h="svelte-4k4fe3"><li>ပထမစာကြောင်း - NEGATIVE: 0.0402, POSITIVE: 0.9598</li> <li>ဒုတိယစာကြောင်း - NEGATIVE: 0.9995, POSITIVE: 0.0005</li></ul> <p data-svelte-h="svelte-hzojbo">ကျွန်တော်တို့ pipeline ရဲ့ အဆင့်သုံးဆင့်လုံးကို အောင်မြင်စွာ ပြန်လည်ဖန်တီးနိုင်ခဲ့ပါပြီ- tokenizers တွေနဲ့ preprocessing လုပ်ခြင်း၊ model ကနေတဆင့် inputs တွေကို ပေးပို့ခြင်း၊ နဲ့ postprocessing လုပ်ခြင်းတို့ ဖြစ်ပါတယ်။ အခုတော့ ဒီအဆင့်တစ်ခုချင်းစီကို ပိုပြီး နက်နက်နဲနဲ လေ့လာကြည့်ရအောင်။</p> <blockquote class="tip" data-svelte-h="svelte-1uap3oa"><p>✏️ <strong>စမ်းသပ်ကြည့်ပါ။</strong> သင်ကိုယ်တိုင် စာသား (၂) ခု (သို့မဟုတ် ပိုမိုများပြား) ရွေးချယ်ပြီး <code>sentiment-analysis</code> pipeline ကနေတဆင့် run ပါ။ ထို့နောက် ဒီနေရာမှာ သင်တွေ့ခဲ့ရတဲ့ အဆင့်တွေကို ကိုယ်တိုင်ပြန်လုပ်ပြီး တူညီတဲ့ ရလဒ်တွေ ရရှိမရရှိ စစ်ဆေးပါ။</p></blockquote> <h2 class="relative group"><a id="ဝဟရ-ရငလငခက-glossary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ဝဟရ-ရငလငခက-glossary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ဝေါဟာရ ရှင်းလင်းချက် (Glossary)</span></h2> <ul data-svelte-h="svelte-124bvrr"><li><strong>Pipeline</strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ လုပ်ဆောင်ချက်တစ်ခုဖြစ်ပြီး မော်ဒယ်တွေကို သီးခြားလုပ်ငန်းတာဝန်များ (ဥပမာ- စာသားခွဲခြားသတ်မှတ်ခြင်း၊ စာသားထုတ်လုပ်ခြင်း) အတွက် အသုံးပြုရလွယ်ကူအောင် ပြုလုပ်ပေးပါတယ်။</li> <li><strong>Preprocessing</strong>: Machine Learning မော်ဒယ်တစ်ခုကို မထည့်သွင်းမီ raw data များကို လုပ်ဆောင်ရန် အသင့်ဖြစ်အောင် ပြင်ဆင်ခြင်း။</li> <li><strong>Postprocessing</strong>: Machine Learning မော်ဒယ်တစ်ခု၏ output များကို ပိုမိုနားလည်လွယ်သော သို့မဟုတ် အသုံးဝင်သော ပုံစံသို့ ပြောင်းလဲခြင်း။</li> <li><strong>Neural Networks</strong>: လူသားဦးနှောက်၏ လုပ်ဆောင်မှုပုံစံကို အတုယူထားသော ကွန်ပျူတာစနစ်များ။</li> <li><strong>Transformer Models</strong>: Natural Language Processing (NLP) မှာ အောင်မြင်မှုများစွာရရှိခဲ့တဲ့ deep learning architecture တစ်မျိုးပါ။ ၎င်းတို့ဟာ စာသားတွေထဲက စကားလုံးတွေရဲ့ ဆက်နွယ်မှုတွေကို “attention mechanism” သုံးပြီး နားလည်အောင် သင်ကြားပေးပါတယ်။</li> <li><strong>Raw Text</strong>: မည်သည့်လုပ်ဆောင်မှုမျှ မပြုလုပ်ရသေးသော သို့မဟုတ် ပုံစံမချရသေးသော မူရင်းစာသား။</li> <li><strong>Tokenizer</strong>: စာသား (သို့မဟုတ် အခြားဒေတာ) ကို AI မော်ဒယ်များ စီမံဆောင်ရွက်နိုင်ရန် tokens တွေအဖြစ် ပိုင်းခြားပေးသည့် ကိရိယာ သို့မဟုတ် လုပ်ငန်းစဉ်။</li> <li><strong>Tokens</strong>: စာသားကို ခွဲခြမ်းစိတ်ဖြာရာတွင် အသုံးပြုသော အသေးငယ်ဆုံးယူနစ်များ (ဥပမာ- စကားလုံးများ၊ subwords များ သို့မဟုတ် ပုဒ်ဖြတ်သံများ)။</li> <li><strong>Integer</strong>: အပြည့်အစုံ ကိန်းဂဏန်း။</li> <li><strong>Pretrained</strong>: ဒေတာအမြောက်အမြားပေါ်တွင် ကြိုတင်လေ့ကျင့်ထားပြီးသား Artificial Intelligence (AI) မော်ဒယ်တစ်ခု။</li> <li><strong>Model Hub</strong>: Hugging Face ပေါ်ရှိ pre-trained model များနှင့် datasets များကို ရှာဖွေ၊ မျှဝေပြီး အသုံးပြုနိုင်သော online platform။</li> <li><strong><code>AutoTokenizer</code> Class</strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ class တစ်ခုဖြစ်ပြီး မော်ဒယ်အမည်ကို အသုံးပြုပြီး သက်ဆိုင်ရာ tokenizer ကို အလိုအလျောက် load လုပ်ပေးသည်။</li> <li><strong><code>from_pretrained()</code> Method</strong>: Pre-trained model သို့မဟုတ် tokenizer ကို load လုပ်ရန် အသုံးပြုသော method။</li> <li><strong>Checkpoint Name</strong>: အင်တာနက်ပေါ်ရှိ Hugging Face Hub မှ pre-trained model သို့မဟုတ် tokenizer ကို ဖော်ထုတ်ရန် အသုံးပြုသော အမည်။</li> <li><strong>Cache</strong>: မကြာခဏ အသုံးပြုရသော ဒေတာများကို အမြန်ဆုံး ဝင်ရောက်ရယူနိုင်ရန် ယာယီသိုလှောင်ထားသော နေရာ။</li> <li><strong><code>sentiment-analysis</code> pipeline</strong>: စာသားတစ်ခု၏ စိတ်ခံစားမှု (အပြုသဘော သို့မဟုတ် အနုတ်သဘော) ကို ခွဲခြမ်းစိတ်ဖြာပေးသော pipeline။</li> <li><strong><code>distilbert-base-uncased-finetuned-sst-2-english</code></strong>: <code>sentiment-analysis</code> pipeline ၏ default checkpoint အဖြစ် အသုံးပြုသော DistilBERT မော်ဒယ်၏ အမည်။ <code>base</code> သည် မော်ဒယ်၏ အရွယ်အစားကို ဖော်ပြပြီး <code>uncased</code> သည် စာလုံးအကြီးအသေး ခွဲခြားခြင်းမရှိဘဲ လေ့ကျင့်ထားကြောင်း ဖော်ပြသည်။ <code>finetuned-sst-2-english</code> က SST-2 dataset တွင် English ဘာသာစကားအတွက် fine-tune လုပ်ထားသည်ကို ဆိုလိုသည်။</li> <li><strong>Model Card</strong>: Hugging Face Hub ပေါ်ရှိ မော်ဒယ်တစ်ခု၏ အချက်အလက်များ၊ အသုံးပြုပုံနှင့် စွမ်းဆောင်ရည်များကို အကျဉ်းချုပ်ဖော်ပြထားသော စာမျက်နှာ။</li> <li><strong>Dictionary</strong>: key-value pair များဖြင့် ဒေတာများကို သိုလှောင်သော ဒေတာဖွဲ့စည်းပုံ။</li> <li><strong>Tensors</strong>: Machine Learning frameworks (PyTorch, TensorFlow) များတွင် ဒေတာများကို ကိုယ်စားပြုသော multi-dimensional array များ။</li> <li><strong>NumPy Arrays</strong>: Python တွင် ဂဏန်းတွက်ချက်မှုများအတွက် အသုံးပြုသော multi-dimensional array များအတွက် library။</li> <li><strong>Scalar (0D)</strong>: Dimension မရှိသော တစ်ခုတည်းသော ကိန်းဂဏန်းတန်ဖိုး။</li> <li><strong>Vector (1D)</strong>: ကိန်းဂဏန်းတန်ဖိုးများ၏ တစ်ကြောင်းတည်းသော sequence။</li> <li><strong>Matrix (2D)</strong>: ကိန်းဂဏန်းတန်ဖိုးများ၏ နှစ်ကြောင်းအတန်းလိုက် စီစဉ်ထားသော အစုအဝေး။</li> <li><strong><code>return_tensors</code> Argument</strong>: tokenizer ကို ခေါ်ဆိုသောအခါ ပြန်လိုချင်သော tensor အမျိုးအစားကို သတ်မှတ်ရန် အသုံးပြုသော argument။</li> <li><strong><code>padding</code></strong>: မတူညီသော အရှည်ရှိသည့် input sequence များကို အရှည်တူညီအောင် သတ်မှတ်ထားသော တန်ဖိုးများဖြင့် ဖြည့်စွက်ခြင်း။</li> <li><strong><code>truncation</code></strong>: အရှည်ကန့်သတ်ချက်ထက် ပိုနေသော input sequence များကို ဖြတ်တောက်ခြင်း။</li> <li><strong><code>input_ids</code></strong>: Tokenizer မှ ထုတ်ပေးသော tokens တစ်ခုစီ၏ ထူးခြားသော ဂဏန်းဆိုင်ရာ ID များ။</li> <li><strong><code>attention_mask</code></strong>: မော်ဒယ်ကို အာရုံစိုက်သင့်သည့် tokens များနှင့် လျစ်လျူရှုသင့်သည့် (padding) tokens များကို ခွဲခြားပေးသည့် binary mask။</li> <li><strong><code>AutoModel</code> Class</strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ class တစ်ခုဖြစ်ပြီး မော်ဒယ်အမည်ကို အသုံးပြုပြီး Transformer model ကို အလိုအလျောက် load လုပ်ပေးသည်။</li> <li><strong>Hidden States</strong>: Transformer model ၏ အလယ်အလတ် layers များမှ ထုတ်ပေးသော output များ။ ၎င်းတို့သည် input ၏ အကြောင်းအရာဆိုင်ရာ ကိုယ်စားပြုမှုများကို ဖမ်းယူထားသည်။</li> <li><strong>Features</strong>: Hidden states များကို ရည်ညွှန်းသော အခြားအသုံးအနှုန်းတစ်ခု။</li> <li><strong>High-dimensional Vector</strong>: dimension များစွာရှိသော vector တစ်ခု။</li> <li><strong>Batch Size</strong>: မော်ဒယ်က တစ်ပြိုင်နက်တည်း လုပ်ဆောင်သော input sequence အရေအတွက်။</li> <li><strong>Sequence Length</strong>: input sequence ၏ token အရေအတွက်။</li> <li><strong>Hidden Size</strong>: hidden states vector တစ်ခု၏ dimension အရွယ်အစား။</li> <li><strong><code>namedtuple</code>s</strong>: Python တွင် tuple ကဲ့သို့ အလုပ်လုပ်သော်လည်း attribute name များဖြင့် elements များကို ဝင်ရောက်ကြည့်ရှုနိုင်သော data type။</li> <li><strong>Model Heads</strong>: Transformer model ၏ hidden states များကို သီးခြားလုပ်ငန်းတစ်ခုအတွက် လိုအပ်သော output များအဖြစ် ပြောင်းလဲပေးသော အစိတ်အပိုင်း။ များသောအားဖြင့် linear layers များဖြင့် ဖွဲ့စည်းထားသည်။</li> <li><strong>Embeddings Layer</strong>: input IDs များကို vector representations များအဖြစ် ပြောင်းလဲပေးသော model layer။</li> <li><strong><code>Model</code></strong>: base Transformer model (hidden states များကို ပြန်ရယူရန်) ကို ကိုယ်စားပြုသော Hugging Face model class family။</li> <li><strong><code>ForCausalLM</code></strong>: Causal Language Modeling (နောက်ထပ် token ကို ခန့်မှန်းခြင်း) အတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>ForMaskedLM</code></strong>: Masked Language Modeling (ပျောက်ဆုံးနေသော token များကို ဖြည့်စွက်ခြင်း) အတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>ForMultipleChoice</code></strong>: Multiple Choice question answering အတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>ForQuestionAnswering</code></strong>: Question Answering လုပ်ငန်းတာဝန်များအတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>ForSequenceClassification</code></strong>: Sequence Classification လုပ်ငန်းတာဝန်များ (ဥပမာ- sentiment analysis) အတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>*ForTokenClassification</code></strong>: Token Classification လုပ်ငန်းတာဝန်များ (ဥပမာ- Named Entity Recognition) အတွက် ဒီဇိုင်းထုတ်ထားသော model class family။</li> <li><strong><code>AutoModelForSequenceClassification</code></strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ class တစ်ခုဖြစ်ပြီး sequence classification အတွက် pre-trained model ကို အလိုအလျောက် load လုပ်ပေးသည်။</li> <li><strong><code>outputs.logits</code></strong>: မော်ဒယ်၏ နောက်ဆုံး layer မှ ထုတ်ပေးသော raw, unnormalized scores များ။</li> <li><strong>Logits</strong>: မော်ဒယ်၏ နောက်ဆုံး layer မှ ထုတ်ပေးသော raw, unnormalized scores များ။ ၎င်းတို့သည် ဖြစ်နိုင်ခြေများအဖြစ်သို့ ပြောင်းလဲခြင်းမရှိသေးပါ။</li> <li><strong>SoftMax Layer</strong>: input numbers များကို 0 နှင့် 1 ကြားရှိ ဖြစ်နိုင်ခြေများအဖြစ်သို့ ပြောင်းလဲပေးသော activation function တစ်ခု။ ၎င်းတို့၏ စုစုပေါင်းသည် 1 ဖြစ်သည်။</li> <li><strong>Loss Function</strong>: မော်ဒယ်၏ ခန့်မှန်းချက်များနှင့် အမှန်တကယ်တန်ဖိုးများကြား ကွာခြားမှုကို တိုင်းတာသော function တစ်ခု။</li> <li><strong>Cross Entropy</strong>: Classification လုပ်ငန်းများတွင် အသုံးများသော loss function တစ်ခု။</li> <li><strong><code>torch.nn.functional.softmax(outputs.logits, dim=-1)</code></strong>: PyTorch တွင် softmax function ကို <code>outputs.logits</code> ပေါ်တွင် နောက်ဆုံး dimension (dim=-1) အတိုင်း အသုံးပြုခြင်း။</li> <li><strong><code>model.config.id2label</code></strong>: Model configuration ထဲတွင် <code>id</code> (ဂဏန်း) မှ <code>label</code> (စာသား) သို့ တွဲချိတ်ပေးသော dictionary တစ်ခု။</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/my/chapter2/2.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_dep9rk = {
	assets: "/docs/course/pr_1107/my",
	base: "/docs/course/pr_1107/my",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1107/my/_app/immutable/entry/start.5c6233a8.js"),
	import("/docs/course/pr_1107/my/_app/immutable/entry/app.55586789.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 15],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 85.4 kB
Xet hash:: 7e391b9055c588e04c8f9826c4ca486702d0fed4b5d0ade5de2a6d77ddfca600

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.