Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1095 /my /chapter11 /3.html

rtrm

about 1 month ago

download

raw

112 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Supervised Fine-Tuning","local":"supervised-fine-tuning","sections":[{"title":"SFT ကို ဘယ်အချိန်မှာ အသုံးပြုသင့်သလဲ","local":"sft-က-ဘယအခနမ-အသပသငသလ","sections":[{"title":"Template Control","local":"template-control","sections":[],"depth":3},{"title":"Domain Adaptation","local":"domain-adaptation","sections":[],"depth":3}],"depth":2},{"title":"Dataset Preparation","local":"dataset-preparation","sections":[],"depth":2},{"title":"Training Configuration","local":"training-configuration","sections":[],"depth":2},{"title":"TRL ဖြင့် Implementation","local":"trl-ဖင-implementation","sections":[],"depth":2},{"title":"Dataset ကို Packing လုပ်ခြင်း","local":"dataset-က-packing-လပခင","sections":[],"depth":2},{"title":"Training Progress ကို Monitoring လုပ်ခြင်း","local":"training-progress-က-monitoring-လပခင","sections":[{"title":"Loss Patterns များကို နားလည်ခြင်း","local":"loss-patterns-မက-နလညခင","sections":[],"depth":3},{"title":"စောင့်ကြည့်ရမည့် Metrics များ","local":"စငကညရမည-metrics-မ","sections":[],"depth":3},{"title":"Convergence သို့ လမ်းကြောင်း","local":"convergence-သ-လမကင","sections":[],"depth":3},{"title":"Training Progress ကို Monitoring လုပ်ခြင်း","local":"training-progress-က-monitoring-လပခင","sections":[],"depth":3},{"title":"စောင့်ကြည့်ရမည့် သတိပေးအမှတ်အသားများ","local":"စငကညရမည-သတပအမတအသမ","sections":[],"depth":3}],"depth":2},{"title":"SFT ပြီးနောက် Evaluation","local":"sft-ပနက-evaluation","sections":[],"depth":2},{"title":"မေးခွန်းများ","local":"မခနမ","sections":[{"title":"၁။ SFT တွင် training ကြာချိန်ကို ထိန်းချုပ်သော parameters များက ဘာတွေလဲ။","local":"၁-sft-တင-training-ကခနက-ထနခပသ-parameters-မက-ဘတလ","sections":[],"depth":3},{"title":"၂။ loss curves တွေမှာ ဘယ်ပုံစံက overfitting ဖြစ်နိုင်ခြေကို ညွှန်ပြသလဲ။","local":"၂-loss-curves-တမ-ဘယပစက-overfitting-ဖစနငခက-ညနပသလ","sections":[],"depth":3},{"title":"၃။ gradient_accumulation_steps ကို ဘာအတွက် အသုံးပြုသလဲ။","local":"၃-gradientaccumulationsteps-က-ဘအတက-အသပသလ","sections":[],"depth":3},{"title":"၄။ SFT training လုပ်နေစဉ် ဘာတွေကို စောင့်ကြည့်သင့်သလဲ။","local":"၄-sft-training-လပနစဉ-ဘတက-စငကညသငသလ","sections":[],"depth":3},{"title":"၅။ training လုပ်နေစဉ် ကျန်းမာသော convergence ကို ဘာက ညွှန်ပြသလဲ။","local":"၅-training-လပနစဉ-ကနမသ-convergence-က-ဘက-ညနပသလ","sections":[],"depth":3}],"depth":2},{"title":"💐 ကောင်းပါပြီ!","local":"-ကငပပ","sections":[],"depth":2},{"title":"ထပ်ဆောင်း အရင်းအမြစ်များ","local":"ထပဆင-အရငအမစမ","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1095/my/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/scheduler.893fe8c9.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/singletons.ba455c5c.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.bce52c8a.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/paths.9a7be869.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/preload-helper.b5ee8f74.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.b1df2166.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/0.77c840e7.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/23.f58153f2.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.e6d31e72.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/CodeBlock.abb4f40e.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/CourseFloatingBanner.c1c08878.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/Question.ea6d4cb0.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/stores.db603902.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Supervised Fine-Tuning","local":"supervised-fine-tuning","sections":[{"title":"SFT ကို ဘယ်အချိန်မှာ အသုံးပြုသင့်သလဲ","local":"sft-က-ဘယအခနမ-အသပသငသလ","sections":[{"title":"Template Control","local":"template-control","sections":[],"depth":3},{"title":"Domain Adaptation","local":"domain-adaptation","sections":[],"depth":3}],"depth":2},{"title":"Dataset Preparation","local":"dataset-preparation","sections":[],"depth":2},{"title":"Training Configuration","local":"training-configuration","sections":[],"depth":2},{"title":"TRL ဖြင့် Implementation","local":"trl-ဖင-implementation","sections":[],"depth":2},{"title":"Dataset ကို Packing လုပ်ခြင်း","local":"dataset-က-packing-လပခင","sections":[],"depth":2},{"title":"Training Progress ကို Monitoring လုပ်ခြင်း","local":"training-progress-က-monitoring-လပခင","sections":[{"title":"Loss Patterns များကို နားလည်ခြင်း","local":"loss-patterns-မက-နလညခင","sections":[],"depth":3},{"title":"စောင့်ကြည့်ရမည့် Metrics များ","local":"စငကညရမည-metrics-မ","sections":[],"depth":3},{"title":"Convergence သို့ လမ်းကြောင်း","local":"convergence-သ-လမကင","sections":[],"depth":3},{"title":"Training Progress ကို Monitoring လုပ်ခြင်း","local":"training-progress-က-monitoring-လပခင","sections":[],"depth":3},{"title":"စောင့်ကြည့်ရမည့် သတိပေးအမှတ်အသားများ","local":"စငကညရမည-သတပအမတအသမ","sections":[],"depth":3}],"depth":2},{"title":"SFT ပြီးနောက် Evaluation","local":"sft-ပနက-evaluation","sections":[],"depth":2},{"title":"မေးခွန်းများ","local":"မခနမ","sections":[{"title":"၁။ SFT တွင် training ကြာချိန်ကို ထိန်းချုပ်သော parameters များက ဘာတွေလဲ။","local":"၁-sft-တင-training-ကခနက-ထနခပသ-parameters-မက-ဘတလ","sections":[],"depth":3},{"title":"၂။ loss curves တွေမှာ ဘယ်ပုံစံက overfitting ဖြစ်နိုင်ခြေကို ညွှန်ပြသလဲ။","local":"၂-loss-curves-တမ-ဘယပစက-overfitting-ဖစနငခက-ညနပသလ","sections":[],"depth":3},{"title":"၃။ gradient_accumulation_steps ကို ဘာအတွက် အသုံးပြုသလဲ။","local":"၃-gradientaccumulationsteps-က-ဘအတက-အသပသလ","sections":[],"depth":3},{"title":"၄။ SFT training လုပ်နေစဉ် ဘာတွေကို စောင့်ကြည့်သင့်သလဲ။","local":"၄-sft-training-လပနစဉ-ဘတက-စငကညသငသလ","sections":[],"depth":3},{"title":"၅။ training လုပ်နေစဉ် ကျန်းမာသော convergence ကို ဘာက ညွှန်ပြသလဲ။","local":"၅-training-လပနစဉ-ကနမသ-convergence-က-ဘက-ညနပသလ","sections":[],"depth":3}],"depth":2},{"title":"💐 ကောင်းပါပြီ!","local":"-ကငပပ","sections":[],"depth":2},{"title":"ထပ်ဆောင်း အရင်းအမြစ်များ","local":"ထပဆင-အရငအမစမ","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0" style=""><a href="https://discuss.huggingface.co/t/chapter-2-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/course/en/chapter11/section3.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> </div> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="supervised-fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supervised-fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supervised Fine-Tuning</span></h1> <p data-svelte-h="svelte-jih6ks">Supervised Fine-Tuning (SFT) ဟာ ကြိုတင်လေ့ကျင့်ထားပြီးသား language models တွေကို ညွှန်ကြားချက်တွေကို လိုက်နာဖို့၊ စကားပြောဆိုမှုတွေမှာ ပါဝင်ဖို့နဲ့ သတ်မှတ်ထားတဲ့ output formats တွေကို အသုံးပြုနိုင်ဖို့ အဓိကအသုံးပြုတဲ့ လုပ်ငန်းစဉ်တစ်ခု ဖြစ်ပါတယ်။ pre-trained models တွေမှာ အထင်ကြီးစရာကောင်းတဲ့ အထွေထွေစွမ်းရည်တွေ ရှိပေမယ့်၊ SFT က ၎င်းတို့ကို assistant-like models တွေအဖြစ် ပြောင်းလဲပေးရာမှာ ကူညီပေးပါတယ်။ ဒါမှ အသုံးပြုသူရဲ့ prompts တွေကို ပိုမိုနားလည်ပြီး တုံ့ပြန်နိုင်မှာပါ။ ဒီလိုလုပ်တာက များသောအားဖြင့် လူသားတွေရေးသားထားတဲ့ စကားပြောဆိုမှုတွေနဲ့ ညွှန်ကြားချက် datasets တွေပေါ်မှာ training လုပ်ခြင်းဖြင့် ဆောင်ရွက်ပါတယ်။</p> <p data-svelte-h="svelte-1if1jdb">ဒီစာမျက်နှာက <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" rel="nofollow"><code>deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</code></a> model ကို <a href="https://huggingface.co/docs/trl/en/sft_trainer" rel="nofollow"><code>SFTTrainer</code></a> ကို အသုံးပြုပြီး fine-tuning လုပ်ဖို့အတွက် အဆင့်ဆင့် လမ်းညွှန်ချက်တွေကို ပေးထားပါတယ်။ ဒီအဆင့်တွေကို လိုက်နာခြင်းဖြင့် သင်ဟာ model ကို သီးခြား tasks တွေကို ပိုမိုထိရောက်စွာ လုပ်ဆောင်နိုင်အောင် ပြုပြင်နိုင်ပါလိမ့်မယ်။</p> <h2 class="relative group"><a id="sft-က-ဘယအခနမ-အသပသငသလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sft-က-ဘယအခနမ-အသပသငသလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>SFT ကို ဘယ်အချိန်မှာ အသုံးပြုသင့်သလဲ</span></h2> <p data-svelte-h="svelte-1b7grhd">Implementation ထဲကို မဝင်ရောက်ခင်မှာ၊ SFT က သင့် project အတွက် မှန်ကန်တဲ့ ရွေးချယ်မှု ဘယ်အချိန်မှာ ဖြစ်မလဲဆိုတာ နားလည်ဖို့ အရေးကြီးပါတယ်။ ပထမအဆင့်အနေနဲ့၊ လက်ရှိ instruction-tuned model တစ်ခုကို ကောင်းမွန်စွာ ဖန်တီးထားတဲ့ prompts တွေနဲ့ အသုံးပြုတာက သင့် use case အတွက် လုံလောက်မလားဆိုတာ စဉ်းစားသင့်ပါတယ်။ SFT မှာ computational resources နဲ့ engineering effort တွေ အများအပြား ပါဝင်တဲ့အတွက်၊ လက်ရှိ models တွေကို prompting လုပ်တာက မလုံလောက်မှသာ ဒီနည်းလမ်းကို ဆက်လက်လုပ်ဆောင်သင့်ပါတယ်။</p> <blockquote class="tip" data-svelte-h="svelte-1401v16"><p>SFT ကို အောက်ပါအခြေအနေများမှသာ စဉ်းစားပါ။</p> <ul><li>prompting ဖြင့် ရရှိနိုင်သည်ထက် ပိုမိုကောင်းမွန်သော စွမ်းဆောင်ရည် လိုအပ်ခြင်း။</li> <li>large general-purpose model တစ်ခုကို အသုံးပြုခြင်း၏ ကုန်ကျစရိတ်သည် smaller model တစ်ခုကို fine-tuning လုပ်ခြင်း၏ ကုန်ကျစရိတ်ထက် ပိုများသော သီးခြား use case တစ်ခု ရှိခြင်း။</li> <li>လက်ရှိ models များက ကိုင်တွယ်ရခက်ခဲသော specialized output formats များ သို့မဟုတ် domain-specific knowledge များ လိုအပ်ခြင်း။</li></ul></blockquote> <p data-svelte-h="svelte-h8gstr">SFT က လိုအပ်တယ်လို့ သင်ဆုံးဖြတ်ပြီဆိုရင်၊ ဆက်လက်လုပ်ဆောင်ဖို့ ဆုံးဖြတ်ချက်က အဓိကအချက်နှစ်ချက်ပေါ်မှာ မူတည်ပါတယ်-</p> <h3 class="relative group"><a id="template-control" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#template-control"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Template Control</span></h3> <p data-svelte-h="svelte-176uc7a">SFT က model ရဲ့ output structure အပေါ်မှာ တိကျတဲ့ control ကို ခွင့်ပြုပါတယ်။ ဒါက model ကို အောက်ပါတို့ကို လုပ်ဆောင်ဖို့ လိုအပ်တဲ့အခါ အထူးအသုံးဝင်ပါတယ်-
	၁။ သီးခြား chat template format တစ်ခုနဲ့ responses တွေ ထုတ်လုပ်ရန်။
	၂။ တင်းကြပ်သော output schemas များကို လိုက်နာရန်။
	၃။ responses များတစ်လျှောက် တသမတ်တည်းဖြစ်သော styling ကို ထိန်းသိမ်းရန်။</p> <h3 class="relative group"><a id="domain-adaptation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#domain-adaptation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Domain Adaptation</span></h3> <p data-svelte-h="svelte-11atu6b">Specialized domains တွေမှာ အလုပ်လုပ်တဲ့အခါ၊ SFT က model ကို domain-specific requirements တွေနဲ့ ကိုက်ညီအောင် ကူညီပေးပါတယ်။ ဒါတွေကတော့…
	၁။ domain terminology နဲ့ concepts တွေ သင်ကြားပေးခြင်း။
	၂။ ပရော်ဖက်ရှင်နယ် စံနှုန်းများ ချမှတ်ခြင်း။
	၃။ နည်းပညာဆိုင်ရာ မေးမြန်းမှုများကို သင့်လျော်စွာ ကိုင်တွယ်ဖြေရှင်းခြင်း။
	၄။ Industry-specific guidelines များကို လိုက်နာခြင်း။</p> <blockquote class="tip" data-svelte-h="svelte-6wr6up"><p>SFT ကို မစတင်မီ၊ သင်၏ use case သည် အောက်ပါတို့ကို လိုအပ်ခြင်းရှိမရှိ အကဲဖြတ်ပါ။</p> <ul><li>တိကျသော output formatting</li> <li>Domain-specific knowledge</li> <li>တသမတ်တည်းသော response patterns</li> <li>သီးခြား guidelines များကို လိုက်နာခြင်း</li></ul> <p>ဤအကဲဖြတ်ခြင်းသည် SFT သည် သင့်လိုအပ်ချက်များအတွက် မှန်ကန်သောချဉ်းကပ်မှုဟုတ်မဟုတ် ဆုံးဖြတ်ရန် ကူညီလိမ့်မည်။</p></blockquote> <h2 class="relative group"><a id="dataset-preparation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-preparation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset Preparation</span></h2> <p data-svelte-h="svelte-1gu5d93">supervised fine-tuning လုပ်ငန်းစဉ်အတွက် input-output pairs များဖြင့် ဖွဲ့စည်းထားသော task-specific dataset တစ်ခု လိုအပ်ပါတယ်။ pair တစ်ခုစီမှာ အောက်ပါတို့ ပါဝင်သင့်ပါတယ်။
	၁။ input prompt တစ်ခု။
	၂။ မျှော်လင့်ထားသော model response။
	၃။ အပိုဆောင်း context သို့မဟုတ် metadata တစ်ခုခု။</p> <p data-svelte-h="svelte-1ow7nia">သင်၏ training data ၏ အရည်အသွေးသည် အောင်မြင်သော fine-tuning အတွက် အရေးကြီးပါတယ်။ သင်၏ dataset ကို ဘယ်လိုပြင်ဆင်ပြီး validate လုပ်ရမလဲဆိုတာ ကြည့်ရအောင်…</p> <iframe src="https://huggingface.co/datasets/HuggingFaceTB/smoltalk/embed/viewer/all/train?row=0" frameborder="0" width="100%" height="360px"></iframe> <h2 class="relative group"><a id="training-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training Configuration</span></h2> <p data-svelte-h="svelte-3h4jnr">သင်၏ fine-tuning အောင်မြင်မှုသည် မှန်ကန်သော training parameters များကို ရွေးချယ်ခြင်းအပေါ် များစွာမူတည်ပါတယ်။ အရေးကြီးသော parameter တစ်ခုစီနှင့် ၎င်းတို့ကို ထိရောက်စွာ configure လုပ်နည်းကို လေ့လာကြည့်ကြပါစို့။</p> <p data-svelte-h="svelte-1uiw6jx">SFTTrainer configuration အတွက် training လုပ်ငန်းစဉ်ကို ထိန်းချုပ်သော parameters အများအပြားကို ထည့်သွင်းစဉ်းစားရန် လိုအပ်ပါတယ်။ parameter တစ်ခုစီနှင့် ၎င်းတို့၏ ရည်ရွယ်ချက်ကို လေ့လာကြည့်ကြပါစို့။</p> <p data-svelte-h="svelte-2z1bl3">၁။ <strong>Training Duration Parameters</strong>:</p> <ul data-svelte-h="svelte-2zjoy3"><li><code>num_train_epochs</code>: စုစုပေါင်း training ကြာချိန်ကို ထိန်းချုပ်သည်။</li> <li><code>max_steps</code>: epochs ၏ အစားထိုးတစ်ခုဖြစ်ပြီး၊ အမြင့်ဆုံး training steps အရေအတွက်ကို သတ်မှတ်သည်။</li> <li>epochs ပိုများလေ၊ သင်ယူမှု ပိုကောင်းလေဖြစ်သော်လည်း overfitting ဖြစ်နိုင်ခြေရှိသည်။</li></ul> <p data-svelte-h="svelte-1bqj9q9">၂။ <strong>Batch Size Parameters</strong>:</p> <ul data-svelte-h="svelte-mpvcfz"><li><code>per_device_train_batch_size</code>: memory အသုံးပြုမှုနှင့် training stability ကို ဆုံးဖြတ်သည်။</li> <li><code>gradient_accumulation_steps</code>: ပိုကြီးမားသော effective batch sizes များကို လုပ်ဆောင်နိုင်စေသည်။</li> <li>batches ပိုကြီးလေ၊ gradients ပိုမိုတည်ငြိမ်လေဖြစ်သော်လည်း memory ပိုလိုအပ်သည်။</li></ul> <p data-svelte-h="svelte-w0ybz">၃။ <strong>Learning Rate Parameters</strong>:</p> <ul data-svelte-h="svelte-1rjsywi"><li><code>learning_rate</code>: weights များကို update လုပ်မည့် အရွယ်အစားကို ထိန်းချုပ်သည်။</li> <li><code>warmup_ratio</code>: training ၏ မည်သည့်အပိုင်းကို learning rate warmup အတွက် အသုံးပြုမည်နည်း။</li> <li>မြင့်လွန်းပါက instability ဖြစ်စေနိုင်ပြီး၊ နိမ့်လွန်းပါက သင်ယူမှု နှေးကွေးစေသည်။</li></ul> <p data-svelte-h="svelte-lljae8">၄။ <strong>Monitoring Parameters</strong>:</p> <ul data-svelte-h="svelte-1ttd10g"><li><code>logging_steps</code>: metrics များကို မှတ်တမ်းတင်သည့် အကြိမ်ရေ။</li> <li><code>eval_steps</code>: validation data ပေါ်တွင် မည်မျှကြာကြာ evaluation လုပ်မည်နည်း။</li> <li><code>save_steps</code>: model checkpoint များကို သိမ်းဆည်းသည့် အကြိမ်ရေ။</li></ul> <blockquote class="tip" data-svelte-h="svelte-10pdq46"><p>monitoring အပေါ် အခြေခံ၍ ကနဦးတန်ဖိုးများဖြင့် စတင်ပြီး ချိန်ညှိပါ။</p> <ul><li>1-3 epochs ဖြင့် စတင်ပါ။</li> <li>ကနဦးတွင် batch sizes ပိုသေးငယ်သည်ကို အသုံးပြုပါ။</li> <li>validation metrics များကို အနီးကပ် စောင့်ကြည့်ပါ။</li> <li>training သည် unstable ဖြစ်ပါက learning rate ကို ချိန်ညှိပါ။</li></ul></blockquote> <h2 class="relative group"><a id="trl-ဖင-implementation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl-ဖင-implementation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TRL ဖြင့် Implementation</span></h2> <p data-svelte-h="svelte-5nhalm">အခု အဓိကအစိတ်အပိုင်းတွေကို ကျွန်တော်တို့ နားလည်ပြီဆိုတော့၊ သင့်လျော်တဲ့ validation နဲ့ monitoring တွေနဲ့ training ကို implement လုပ်ကြရအောင်။ ကျွန်တော်တို့ Transformers Reinforcement Learning (TRL) library မှ <code>SFTTrainer</code> class ကို အသုံးပြုပါမယ်။ ဒီ library ကို <code>transformers</code> library ရဲ့ အပေါ်မှာ တည်ဆောက်ထားတာပါ။ TRL library ကို အသုံးပြုထားတဲ့ ဥပမာအပြည့်အစုံကတော့…</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
	<span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> SFTConfig, SFTTrainer
	<span class="hljs-keyword">import</span> torch

	<span class="hljs-comment"># Set device</span>
	device = <span class="hljs-string">"cuda"</span> <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> <span class="hljs-string">"cpu"</span>

	<span class="hljs-comment"># Load dataset</span>
	dataset = load_dataset(<span class="hljs-string">"HuggingFaceTB/smoltalk"</span>, <span class="hljs-string">"all"</span>)

	<span class="hljs-comment"># Configure model and tokenizer</span>
	model_name = <span class="hljs-string">"HuggingFaceTB/SmolLM2-135M"</span>
	model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name).to(
	device
	)
	tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)
	<span class="hljs-comment"># Setup chat template</span>
	model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

	<span class="hljs-comment"># Configure trainer</span>
	training_args = SFTConfig(
	output_dir=<span class="hljs-string">"./sft_output"</span>,
	max_steps=<span class="hljs-number">1000</span>,
	per_device_train_batch_size=<span class="hljs-number">4</span>,
	learning_rate=<span class="hljs-number">5e-5</span>,
	logging_steps=<span class="hljs-number">10</span>,
	save_steps=<span class="hljs-number">100</span>,
	eval_strategy=<span class="hljs-string">"steps"</span>,
	eval_steps=<span class="hljs-number">50</span>,
	)

	<span class="hljs-comment"># Initialize trainer</span>
	trainer = SFTTrainer(
	model=model,
	args=training_args,
	train_dataset=dataset[<span class="hljs-string">"train"</span>],
	eval_dataset=dataset[<span class="hljs-string">"test"</span>],
	processing_class=tokenizer,
	)

	<span class="hljs-comment"># Start training</span>
	trainer.train()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-qzk3x2"><p>“messages” field ပါဝင်သော dataset (အပေါ်က ဥပမာလိုမျိုး) ကို အသုံးပြုတဲ့အခါ၊ SFTTrainer က model ရဲ့ chat template ကို အလိုအလျောက် အသုံးပြုပါတယ်။ ဒီ template ကို hub ကနေ ပြန်လည်ရယူတာပါ။ ဒါက chat-style conversations တွေကို ကိုင်တွယ်ဖို့ အပို configuration လုပ်ဖို့ မလိုအပ်ဘူးလို့ ဆိုလိုပါတယ်။ trainer က messages တွေကို model ရဲ့ မျှော်လင့်ထားတဲ့ template format အတိုင်း format လုပ်ပါလိမ့်မယ်။</p></blockquote> <h2 class="relative group"><a id="dataset-က-packing-လပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-က-packing-လပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset ကို Packing လုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-15rv33i">SFTTrainer က training ထိရောက်မှုကို အကောင်းဆုံးဖြစ်အောင် example packing ကို ထောက်ပံ့ပေးပါတယ်။ ဒီ feature က short examples များစွာကို တူညီတဲ့ input sequence တစ်ခုထဲကို ထည့်သွင်းနိုင်စေပြီး training လုပ်နေစဉ် GPU အသုံးပြုမှုကို အမြင့်ဆုံးဖြစ်စေပါတယ်။ packing ကို ဖွင့်ဖို့ SFTConfig constructor မှာ <code>packing=True</code> လို့ သတ်မှတ်ပေးရုံပါပဲ။ <code>max_steps</code> နဲ့ packed datasets တွေကို အသုံးပြုတဲ့အခါ၊ သင်ရဲ့ packing configuration အပေါ် မူတည်ပြီး မျှော်လင့်ထားသည်ထက် epochs ပိုများများ train လုပ်မိနိုင်ပါတယ်။ examples တွေကို ဘယ်လို ပေါင်းစပ်မလဲဆိုတာကို formatting function တစ်ခု အသုံးပြုပြီး စိတ်ကြိုက်ပြင်ဆင်နိုင်ပါတယ်။ ဒါက question-answer pairs လို multiple fields ပါဝင်တဲ့ datasets တွေနဲ့ အလုပ်လုပ်တဲ့အခါ အထူးအသုံးဝင်ပါတယ်။ evaluation datasets အတွက်၊ SFTConfig မှာ <code>eval_packing=False</code> လို့ သတ်မှတ်ခြင်းဖြင့် packing ကို disable လုပ်နိုင်ပါတယ်။ packing configuration ကို စိတ်ကြိုက်ပြင်ဆင်တဲ့ အခြေခံဥပမာတစ်ခုကတော့-</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># packing ကို configure လုပ်ခြင်း</span>
	training_args = SFTConfig(packing=<span class="hljs-literal">True</span>)

	trainer = SFTTrainer(model=model, train_dataset=dataset, args=training_args)

	trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16pf0iu">multiple fields ပါဝင်တဲ့ dataset ကို packing လုပ်တဲ့အခါ၊ fields တွေကို single input sequence တစ်ခုထဲ ပေါင်းစပ်ဖို့ custom formatting function တစ်ခုကို သတ်မှတ်နိုင်ပါတယ်။ ဒီ function က examples တွေရဲ့ list ကို ယူပြီး packed input sequence ပါဝင်တဲ့ dictionary တစ်ခုကို ပြန်ပေးသင့်ပါတယ်။ custom formatting function ရဲ့ ဥပမာတစ်ခုကတော့…</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">formatting_func</span>(<span class="hljs-params">example</span>):
	text = <span class="hljs-string">f"### Question: <span class="hljs-subst">{example[<span class="hljs-string">'question'</span>]}</span>\n ### Answer: <span class="hljs-subst">{example[<span class="hljs-string">'answer'</span>]}</span>"</span>
	<span class="hljs-keyword">return</span> text


	training_args = SFTConfig(packing=<span class="hljs-literal">True</span>)
	trainer = SFTTrainer(
	<span class="hljs-string">"facebook/opt-350m"</span>,
	train_dataset=dataset,
	args=training_args,
	formatting_func=formatting_func,
	)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="training-progress-က-monitoring-လပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-progress-က-monitoring-လပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training Progress ကို Monitoring လုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-j6mgt3">ထိရောက်တဲ့ monitoring ဟာ အောင်မြင်သော fine-tuning အတွက် အရေးကြီးပါတယ်။ training လုပ်နေစဉ် ဘာတွေကို စောင့်ကြည့်ရမလဲဆိုတာ လေ့လာကြည့်ကြပါစို့။</p> <h3 class="relative group"><a id="loss-patterns-မက-နလညခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loss-patterns-မက-နလညခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loss Patterns များကို နားလည်ခြင်း</span></h3> <p data-svelte-h="svelte-1l62emq">Training loss က များသောအားဖြင့် ကွဲပြားတဲ့ အဆင့်သုံးဆင့်ကို လိုက်နာပါတယ်။
	၁။ ကနဦး သိသိသာသာ ကျဆင်းခြင်း (Initial Sharp Drop): data distribution အသစ်နဲ့ လျင်မြန်စွာ လိုက်လျောညီထွေဖြစ်ခြင်း။
	၂။ တဖြည်းဖြည်း တည်ငြိမ်လာခြင်း (Gradual Stabilization): model က fine-tune လုပ်လာတာနဲ့အမျှ သင်ယူမှု နှေးကွေးလာခြင်း။
	၃။ Convergence: Loss values တွေ တည်ငြိမ်လာပြီး training ပြီးဆုံးခြင်းကို ညွှန်ပြခြင်း။</p> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/nlp_course_sft_loss_graphic.png" alt="SFTTrainer Training"> <h3 class="relative group"><a id="စငကညရမည-metrics-မ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#စငကညရမည-metrics-မ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>စောင့်ကြည့်ရမည့် Metrics များ</span></h3> <p data-svelte-h="svelte-1wwtjea">ထိရောက်သော monitoring တွင် quantitative metrics များကို ခြေရာခံခြင်းနှင့် qualitative metrics များကို အကဲဖြတ်ခြင်းတို့ ပါဝင်ပါတယ်။ ရရှိနိုင်သော metrics များကတော့…</p> <ul data-svelte-h="svelte-acv7jp"><li>Training loss</li> <li>Validation loss</li> <li>Learning rate progression</li> <li>Gradient norms</li></ul> <blockquote class="warning" data-svelte-h="svelte-1fwhkap"><p>training လုပ်နေစဉ် ဒီသတိပေးအမှတ်အသားတွေကို ဂရုစိုက်ပါ။
	၁။ training loss က လျော့နည်းနေချိန်မှာ validation loss က တိုးလာခြင်း (overfitting)။
	၂။ loss values တွေမှာ သိသိသာသာ တိုးတက်မှုမရှိခြင်း (underfitting)။
	၃။ အလွန်နိမ့်သော loss values များ (memorization ဖြစ်နိုင်ခြေ)။
	၄။ မတသမတ်တည်းဖြစ်သော output formatting (template learning ပြဿနာများ)။</p></blockquote> <h3 class="relative group"><a id="convergence-သ-လမကင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#convergence-သ-လမကင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Convergence သို့ လမ်းကြောင်း</span></h3> <p data-svelte-h="svelte-199trcp">training တိုးတက်လာသည်နှင့်အမျှ loss curve က တဖြည်းဖြည်း တည်ငြိမ်လာသင့်ပါတယ်။ ကျန်းမာသော training ၏ အဓိကညွှန်ပြချက်မှာ training နှင့် validation loss အကြား ကွာဟချက်သေးငယ်ခြင်းဖြစ်ပြီး၊ model က သီးခြားဥပမာများကို မှတ်သားထားခြင်းထက် ယေဘုယျကျသော ပုံစံများကို သင်ယူနေကြောင်း ညွှန်ပြနေသည်။ Absolute loss values တွေကတော့ သင်၏ task နှင့် dataset အပေါ် မူတည်ပြီး ကွဲပြားပါလိမ့်မယ်။</p> <h3 class="relative group"><a id="training-progress-က-monitoring-လပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-progress-က-monitoring-လပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training Progress ကို Monitoring လုပ်ခြင်း</span></h3> <p data-svelte-h="svelte-1v4xcyj">အပေါ်က ဂရပ်က ပုံမှန် training တိုးတက်မှုတစ်ခုကို ပြသထားပါတယ်။ training နဲ့ validation loss နှစ်ခုစလုံးက အစပိုင်းမှာ သိသိသာသာ လျော့နည်းပြီး၊ နောက်ပိုင်းမှာ တဖြည်းဖြည်း တည်ငြိမ်လာတာကို သတိပြုပါ။ ဒီပုံစံက model က ထိရောက်စွာ သင်ယူနေပြီး generalization ability ကို ထိန်းသိမ်းထားကြောင်း ညွှန်ပြပါတယ်။</p> <h3 class="relative group"><a id="စငကညရမည-သတပအမတအသမ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#စငကညရမည-သတပအမတအသမ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>စောင့်ကြည့်ရမည့် သတိပေးအမှတ်အသားများ</span></h3> <p data-svelte-h="svelte-njsqwl">loss curves တွေမှာ ပုံစံအမျိုးမျိုးက ဖြစ်နိုင်ချေရှိတဲ့ ပြဿနာတွေကို ညွှန်ပြနိုင်ပါတယ်။ အောက်မှာ common warning signs တွေနဲ့ ကျွန်တော်တို့ စဉ်းစားနိုင်တဲ့ ဖြေရှင်းနည်းတွေကို သရုပ်ပြထားပါတယ်။</p> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/sft_loss_1.png" alt="SFTTrainer Training"> <p data-svelte-h="svelte-138tmg7">အကယ်၍ validation loss က training loss ထက် သိသိသာသာ နှေးကွေးစွာ လျော့နည်းနေတယ်ဆိုရင်၊ သင့် model က training data ပေါ်မှာ overfitting ဖြစ်နေနိုင်ပါတယ်။ အောက်ပါတို့ကို စဉ်းစားပါ-</p> <ul data-svelte-h="svelte-1fqzxfw"><li>training steps တွေ လျှော့ချပါ။</li> <li>dataset size ကို တိုးမြှင့်ပါ။</li> <li>dataset အရည်အသွေးနဲ့ မတူကွဲပြားမှုကို validate လုပ်ပါ။</li></ul> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/sft_loss_2.png" alt="SFTTrainer Training"> <p data-svelte-h="svelte-1uaj957">အကယ်၍ loss က သိသိသာသာ တိုးတက်မှု မပြဘူးဆိုရင်၊ model က…</p> <ul data-svelte-h="svelte-2ll1rl"><li>အလွန်နှေးကွေးစွာ သင်ယူနေခြင်း (learning rate တိုးမြှင့်ကြည့်ပါ)။</li> <li>task နဲ့ ရုန်းကန်နေရခြင်း (data အရည်အသွေးနဲ့ task complexity ကို စစ်ဆေးပါ)။</li> <li>architecture ကန့်သတ်ချက်တွေနဲ့ တိုက်မိခြင်း (မတူညီတဲ့ model တစ်ခုကို စဉ်းစားပါ)။</li></ul> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/sft_loss_3.png" alt="SFTTrainer Training"> <p data-svelte-h="svelte-b7wgsr">အလွန်နိမ့်သော loss values တွေက သင်ယူခြင်းထက် memorization ဖြစ်နေတာကို ညွှန်ပြနိုင်ပါတယ်။ ဒါက အောက်ပါအခြေအနေတွေမှာ အထူးစိုးရိမ်စရာ ဖြစ်ပါတယ်။</p> <ul data-svelte-h="svelte-15zw37v"><li>model က အသစ်၊ ဆင်တူတဲ့ examples တွေပေါ်မှာ စွမ်းဆောင်ရည် နည်းပါးခြင်း။</li> <li>outputs တွေမှာ မတူကွဲပြားမှု နည်းပါးခြင်း။</li> <li>responses တွေက training examples တွေနဲ့ အလွန်ဆင်တူခြင်း။</li></ul> <blockquote class="warning" data-svelte-h="svelte-1t8qsua"><p>training လုပ်နေစဉ် loss values နဲ့ model ရဲ့ actual outputs နှစ်ခုလုံးကို စောင့်ကြည့်ပါ။ တစ်ခါတစ်ရံ loss က ကောင်းမွန်နေပေမယ့် model က မလိုလားအပ်တဲ့ behaviors တွေ ဖြစ်ပေါ်လာနိုင်ပါတယ်။ model ရဲ့ responses တွေကို ပုံမှန် qualitative evaluation လုပ်ခြင်းက metrics တစ်ခုတည်းနဲ့ လွတ်သွားနိုင်တဲ့ ပြဿနာတွေကို ဖမ်းမိအောင် ကူညီပေးပါတယ်။</p></blockquote> <p data-svelte-h="svelte-1gitncg">ကျွန်တော်တို့ ဒီနေရာမှာ ဖော်ပြထားတဲ့ loss values တွေရဲ့ အဓိပ္ပာယ်ဖွင့်ဆိုချက်က အသုံးအများဆုံး အခြေအနေကို ရည်ရွယ်တာဖြစ်ပြီး၊ တကယ်တမ်းမှာတော့ model၊ dataset၊ training parameters စတာတွေအပေါ် မူတည်ပြီး loss values တွေဟာ နည်းလမ်းအမျိုးမျိုးနဲ့ အလုပ်လုပ်နိုင်ပါတယ်။ ဖော်ပြထားတဲ့ ပုံစံတွေအကြောင်း ပိုမိုလေ့လာချင်တယ်ဆိုရင် <a href="https://www.fast.ai/posts/2023-09-04-learning-jumps/" rel="nofollow">Fast AI</a> က လူတွေရဲ့ ဒီ blog post ကို ကြည့်ရှုသင့်ပါတယ်။</p> <h2 class="relative group"><a id="sft-ပနက-evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sft-ပနက-evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>SFT ပြီးနောက် Evaluation</span></h2> <p data-svelte-h="svelte-5ohbtp">အပိုင်း <a href="/en/chapter11/4">11.4</a> မှာ benchmark datasets တွေကို အသုံးပြုပြီး model ကို ဘယ်လို evaluate လုပ်ရမယ်ဆိုတာ ကျွန်တော်တို့ သင်ယူသွားပါမယ်။ အခုလောလောဆယ်မှာတော့ model ရဲ့ qualitative evaluation ကို အာရုံစိုက်ပါမယ်။</p> <p data-svelte-h="svelte-1qtc3i0">SFT ပြီးဆုံးပြီးနောက်၊ အောက်ပါ လုပ်ဆောင်မှုများကို စဉ်းစားပါ။</p> <p data-svelte-h="svelte-100jaaz">၁။ held-out test data ပေါ်တွင် model ကို သေချာစွာ evaluation လုပ်ပါ။
	၂။ မတူညီသော inputs များတစ်လျှောက် template adherence ကို validate လုပ်ပါ။
	၃။ domain-specific knowledge retention ကို စမ်းသပ်ပါ။
	၄။ real-world performance metrics များကို စောင့်ကြည့်ပါ။</p> <blockquote class="tip" data-svelte-h="svelte-grs56l"><p>သင်၏ training လုပ်ငန်းစဉ်ကို မှတ်တမ်းတင်ပါ၊ ၎င်းတွင် အောက်ပါတို့ ပါဝင်သည်-</p> <ul><li>Dataset characteristics</li> <li>Training parameters</li> <li>Performance metrics</li> <li>သိရှိထားသော ကန့်သတ်ချက်များ
	ဤမှတ်တမ်းတင်ခြင်းသည် နောင် model iterations များအတွက် အလွန်အသုံးဝင်ပါလိမ့်မည်။</li></ul></blockquote> <h2 class="relative group"><a id="မခနမ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#မခနမ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>မေးခွန်းများ</span></h2> <h3 class="relative group"><a id="၁-sft-တင-training-ကခနက-ထနခပသ-parameters-မက-ဘတလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၁-sft-တင-training-ကခနက-ထနခပသ-parameters-မက-ဘတလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၁။ SFT တွင် training ကြာချိန်ကို ထိန်းချုပ်သော parameters များက ဘာတွေလဲ။</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->num_train_epochs နှင့် max_steps<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->batch_size နှင့် learning_rate<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->gradient_checkpointing နှင့် warmup_ratio<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="၂-loss-curves-တမ-ဘယပစက-overfitting-ဖစနငခက-ညနပသလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၂-loss-curves-တမ-ဘယပစက-overfitting-ဖစနငခက-ညနပသလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၂။ loss curves တွေမှာ ဘယ်ပုံစံက overfitting ဖြစ်နိုင်ခြေကို ညွှန်ပြသလဲ။</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->training loss က ဆက်လက်လျော့နည်းနေချိန်မှာ validation loss က တိုးလာခြင်း<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->training နဲ့ validation loss နှစ်ခုစလုံး တည်ငြိမ်စွာ လျော့နည်းခြင်း<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->validation loss က လျော့နည်းနေချိန်မှာ training loss က တသမတ်တည်းရှိနေခြင်း<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="၃-gradientaccumulationsteps-က-ဘအတက-အသပသလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၃-gradientaccumulationsteps-က-ဘအတက-အသပသလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၃။ gradient_accumulation_steps ကို ဘာအတွက် အသုံးပြုသလဲ။</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->memory ပိုမိုအသုံးပြုခြင်းမရှိဘဲ effective batch size ကို တိုးမြှင့်ရန်<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->training လုပ်နေစဉ် checkpoints တွေ သိမ်းဆည်းရန်<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->learning rate schedule ကို ထိန်းချုပ်ရန်<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="၄-sft-training-လပနစဉ-ဘတက-စငကညသငသလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၄-sft-training-လပနစဉ-ဘတက-စငကညသငသလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၄။ SFT training လုပ်နေစဉ် ဘာတွေကို စောင့်ကြည့်သင့်သလဲ။</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->quantitative metrics နဲ့ qualitative outputs နှစ်ခုလုံး<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->training loss တစ်ခုတည်းကိုသာ<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->model ရဲ့ output quality တစ်ခုတည်းကိုသာ<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h3 class="relative group"><a id="၅-training-လပနစဉ-ကနမသ-convergence-က-ဘက-ညနပသလ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၅-training-လပနစဉ-ကနမသ-convergence-က-ဘက-ညနပသလ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၅။ training လုပ်နေစဉ် ကျန်းမာသော convergence ကို ဘာက ညွှန်ပြသလဲ။</span></h3> <div><form><label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="0"> <!-- HTML_TAG_START -->training နဲ့ validation loss ကြားက ကွာဟချက်သေးငယ်ခြင်း<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="1"> <!-- HTML_TAG_START -->training loss က သုညသို့ ရောက်ရှိခြင်း<!-- HTML_TAG_END --></label> <label class="block"><input autocomplete="off" class="form-input -mt-1.5 mr-2" name="choice" type="checkbox" value="2"> <!-- HTML_TAG_START -->validation loss က training loss ထက် နိမ့်နေခြင်း<!-- HTML_TAG_END --></label> <div class="flex flex-row items-center mt-3"><button class="btn px-4 mr-4" type="submit" disabled>Submit</button> </div></form></div> <h2 class="relative group"><a id="-ကငပပ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-ကငပပ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>💐 ကောင်းပါပြီ!</span></h2> <p data-svelte-h="svelte-1kszi7o">SFT ကို အသုံးပြုပြီး models တွေကို ဘယ်လို fine-tune လုပ်ရမယ်ဆိုတာ သင်ယူခဲ့ပြီးပါပြီ! ဆက်လက်လေ့လာဖို့…
	၁။ notebook ကို မတူညီတဲ့ parameters တွေနဲ့ စမ်းသပ်ကြည့်ပါ။
	၂။ အခြား datasets တွေနဲ့ စမ်းသပ်ကြည့်ပါ။
	၃။ သင်တန်းပစ္စည်းတွေကို တိုးတက်အောင် ပံ့ပိုးကူညီပါ။</p> <h2 class="relative group"><a id="ထပဆင-အရငအမစမ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ထပဆင-အရငအမစမ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ထပ်ဆောင်း အရင်းအမြစ်များ</span></h2> <ul data-svelte-h="svelte-1qwbi4x"><li><a href="https://huggingface.co/docs/trl" rel="nofollow">TRL Documentation</a></li> <li><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/sft.py" rel="nofollow">SFT Examples Repository</a></li> <li><a href="https://huggingface.co/docs/transformers/training" rel="nofollow">Fine-tuning Best Practices</a></li></ul> <h2 class="relative group"><a id="ဝဟရ-ရငလငခက-glossary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ဝဟရ-ရငလငခက-glossary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ဝေါဟာရ ရှင်းလင်းချက် (Glossary)</span></h2> <ul data-svelte-h="svelte-1f9jeg1"><li><strong>Supervised Fine-Tuning (SFT)</strong>: ကြိုတင်လေ့ကျင့်ထားပြီးသား (pre-trained) မော်ဒယ်တစ်ခုကို တိကျသောလုပ်ငန်းဆောင်တာများ (specific tasks) အတွက် label ပါသော ဒေတာများကို အသုံးပြု၍ ထပ်မံလေ့ကျင့်ခြင်းနည်းလမ်း။ ၎င်းသည် မော်ဒယ်ကို ပိုမိုစွမ်းဆောင်နိုင်ပြီး ဘက်စုံသုံးနိုင်စေသည်။</li> <li><strong>Generative Language Models</strong>: စာသားအသစ်များ၊ code သို့မဟုတ် အခြားဒေတာပုံစံများကို ဖန်တီးထုတ်လုပ်နိုင်သော ဘာသာစကားမော်ဒယ်များ။</li> <li><strong>Summarization</strong>: ရှည်လျားသော စာသားတစ်ခု၏ အနှစ်ချုပ်ကို ထုတ်လုပ်ခြင်း။</li> <li><strong>Question Answering</strong>: ပေးထားသော စာသားတစ်ခုမှ မေးခွန်းတစ်ခု၏ အဖြေကို ရှာဖွေခြင်း။</li> <li><strong>Language Models</strong>: လူသားဘာသာစကား၏ ဖြန့်ဝေမှုကို နားလည်ရန် လေ့ကျင့်ထားသော AI မော်ဒယ်တစ်ခု။ ၎င်းသည် စာသားထုတ်လုပ်ခြင်း၊ ဘာသာပြန်ခြင်း စသည့်လုပ်ငန်းများတွင် အသုံးပြုနိုင်သည်။</li> <li><strong>Tasks</strong>: Artificial Intelligence (AI) သို့မဟုတ် Machine Learning (ML) မော်ဒယ်တစ်ခုက လုပ်ဆောင်ရန် ဒီဇိုင်းထုတ်ထားသော သီးခြားအလုပ်။</li> <li><strong>Versatile</strong>: ကွဲပြားသော အလုပ်များ သို့မဟုတ် အခြေအနေများစွာကို ကိုင်တွယ်နိုင်စွမ်းရှိခြင်း။</li> <li><strong>Use Cases</strong>: ထုတ်ကုန် သို့မဟုတ် စနစ်တစ်ခုကို သီးခြားအခြေအနေတစ်ခုတွင် မည်သို့အသုံးပြုသည်ကို ဖော်ပြခြင်း။</li> <li><strong>LLMs (Large Language Models)</strong>: လူသားဘာသာစကားကို နားလည်ပြီး ထုတ်လုပ်ပေးနိုင်တဲ့ အလွန်ကြီးမားတဲ့ Artificial Intelligence (AI) မော်ဒယ်တွေ ဖြစ်ပါတယ်။</li> <li><strong>ChatGPT</strong>: OpenAI မှ ဖန်တီးထားသော လူသားနှင့်ဆင်တူသော စာသားများကို ဖန်တီးနိုင်သည့် conversational AI မော်ဒယ်။</li> <li><strong>Human Preferences</strong>: လူသားများ၏ နှစ်သက်မှုများ သို့မဟုတ် ရွေးချယ်မှုများ။</li> <li><strong>Chat Templates</strong>: အသုံးပြုသူနှင့် AI မော်ဒယ်များကြား အပြန်အလှန်ဆက်သွယ်မှုများကို စနစ်တကျ ပြုလုပ်ပေးသည့် ဖွဲ့စည်းပုံများ။ ၎င်းတို့သည် တသမတ်တည်းဖြစ်ပြီး အခြေအနေနှင့်ကိုက်ညီသော တုံ့ပြန်မှုများကို သေချာစေသည်။</li> <li><strong>System Prompts</strong>: AI မော်ဒယ်တစ်ခုအား ၎င်း၏ အခန်းကဏ္ဍ၊ ပုံစံ သို့မဟုတ် လုပ်ဆောင်ရမည့်အရာများကို လမ်းညွှန်ပေးသည့် မူလညွှန်ကြားချက်များ။</li> <li><strong>Role-based Messages</strong>: AI model နှင့် အသုံးပြုသူတို့၏ သတ်မှတ်ထားသော အခန်းကဏ္ဍများ (ဥပမာ- user, assistant) အပေါ် အခြေခံ၍ ပေးပို့သော messages များ။</li> <li><strong>Pre-trained Language Models</strong>: အကြီးစား ဒေတာအမြောက်အမြားဖြင့် ကြိုတင်လေ့ကျင့်ထားပြီးဖြစ်သော ဘာသာစကားမော်ဒယ်များ။</li> <li><strong>Task-specific Dataset</strong>: သီးခြားလုပ်ငန်းတစ်ခု (ဥပမာ- sentiment analysis) အတွက် အထူးပြင်ဆင်ထားသော ဒေတာအစုအဝေး။</li> <li><strong>Labeled Examples</strong>: labels များ သို့မဟုတ် မှန်ကန်သောအဖြေများ ပါဝင်သော training data များ။</li> <li><strong>TRL Documentation</strong>: Hugging Face Transformes Reinforcement Learning (TRL) library ၏ တရားဝင် မှတ်တမ်းများ (documentation)။</li> <li><strong>LoRA (Low-Rank Adaptation)</strong>: Transformer မော်ဒယ်များကဲ့သို့သော large models များကို fine-tuning လုပ်ရာတွင် ထိရောက်မှုရှိစေရန်အတွက် model ၏ layers တွေမှာ low-rank matrices တွေကို ထပ်ထည့်သည့် နည်းပညာ။ ၎င်းသည် memory အသုံးပြုမှုကို သိသိသာသာ လျှော့ချနိုင်သည်။</li> <li><strong>Low-Rank Matrices</strong>: သင်္ချာပိုင်းဆိုင်ရာ matrix တစ်မျိုးဖြစ်ပြီး ၎င်း၏ rank သည် ၎င်း၏ dimensions များထက် သိသိသာသာ နည်းပါးသည်။ Machine Learning တွင် parameters အရေအတွက်ကို လျှော့ချရန် အသုံးပြုသည်။</li> <li><strong>Model’s Layers</strong>: Neural network model တစ်ခု၏ အဆင့်များ။</li> <li><strong>Pre-trained Knowledge</strong>: မော်ဒယ်အား မူလ pre-training လုပ်ငန်းစဉ်မှ သင်ယူထားသော ဗဟုသုတများ။</li> <li><strong>Memory Savings</strong>: ကွန်ပျူတာ၏ RAM အသုံးပြုမှုကို လျှော့ချနိုင်ခြင်း။</li> <li><strong>Hardware with Limited Resources</strong>: ကွန်ပျူတာ၏ memory (RAM) သို့မဟုတ် processing power (GPU) အစွမ်းအစ အကန့်အသတ်ရှိသော devices များ။</li> <li><strong>Evaluation</strong>: fine-tuning လုပ်ငန်းစဉ်ပြီးနောက် model ၏ စွမ်းဆောင်ရည်ကို တိုင်းတာခြင်း။ ၎င်းသည် model ၏ ထိရောက်မှုနှင့် တိကျမှုကို ဆုံးဖြတ်ရန် ကူညီပေးသည်။</li> <li><strong>Model Hub</strong>: Hugging Face Hub ကို ရည်ညွှန်းပြီး AI မော်ဒယ်များ ရှာဖွေ၊ မျှဝေ၊ အသုံးပြုနိုင်သော ဗဟို platform။</li> <li><strong>🤗 Transformers</strong>: Hugging Face က ထုတ်လုပ်ထားတဲ့ library တစ်ခုဖြစ်ပြီး Transformer မော်ဒယ်တွေကို အသုံးပြုပြီး Natural Language Processing (NLP), computer vision, audio processing စတဲ့ နယ်ပယ်တွေမှာ အဆင့်မြင့် AI မော်ဒယ်တွေကို တည်ဆောက်ပြီး အသုံးပြုနိုင်စေပါတယ်။</li> <li><strong>Account</strong>: Hugging Face Hub ပေါ်ရှိ သုံးစွဲသူအကောင့်။</li> <li><strong>Script</strong>: အလိုအလျောက်လုပ်ဆောင်ရန် ရေးသားထားသော code များ။</li> <li><strong>SFTTrainer</strong>: TRL library မှ <code>Trainer</code> class ၏ extension တစ်ခုဖြစ်ပြီး Supervised Fine-Tuning လုပ်ငန်းစဉ်ကို ရိုးရှင်းစေသည်။</li> <li><strong>Direct Preference Optimization (DPO)</strong>: Reinforcement Learning from Human Feedback (RLHF) အတွက် simplified algorithm တစ်ခုဖြစ်ပြီး model output များကို လူသားနှစ်သက်မှုနှင့် ပိုမိုကိုက်ညီအောင် လုပ်ဆောင်ပေးသည်။</li> <li><strong>Google Gemma</strong>: Google မှ ထုတ်လုပ်ထားသော open-source LLM တစ်မျိုး။</li> <li><strong>ChatML</strong>: OpenAI မှ ထုတ်လုပ်ထားသော chat conversation များကို ကိုယ်စားပြုရန်အတွက် markup format တစ်ခု။</li> <li><strong>Alignment Handbook</strong>: Hugging Face မှ LLM များကို လူသားနှစ်သက်မှုနှင့် ကိုက်ညီအောင် လေ့ကျင့်ရန်အတွက် လမ်းညွှန်စာတမ်း။</li> <li><strong>Persian Product Catalogs</strong>: ပါရှန်ဘာသာစကားဖြင့် ထုတ်ကုန်စာရင်းများ။</li> <li><strong>JSON Format</strong>: ဒေတာများကို ပေါ့ပေါ့ပါးပါး ဖလှယ်နိုင်သော format ဖြစ်ပြီး လူသားများ ဖတ်ရှုရလွယ်ကူပြီး စက်များ စီမံဆောင်ရွက်ရလွယ်ကူသည်။</li> <li><strong>Instruction-tuned Model</strong>: ညွှန်ကြားချက်များကို လိုက်နာရန် အထူးလေ့ကျင့်ထားသော model။</li> <li><strong>Prompts</strong>: AI model သို့ ပေးပို့သော input စာသားများ သို့မဟုတ် ညွှန်ကြားချက်များ။</li> <li><strong>Computational Resources</strong>: Machine Learning လုပ်ငန်းများအတွက် လိုအပ်သော ကွန်ပျူတာစွမ်းအား (CPU, GPU), memory နှင့် storage။</li> <li><strong>Engineering Effort</strong>: စနစ်တစ်ခုကို တည်ဆောက်ရန် သို့မဟုတ် ထိန်းသိမ်းရန် လိုအပ်သော နည်းပညာဆိုင်ရာ အလုပ်ပမာဏ။</li> <li><strong>Template Control</strong>: Model ၏ output structure ကို တိကျစွာ ထိန်းချုပ်နိုင်ခြင်း။</li> <li><strong>Output Structure</strong>: Model မှ ထုတ်လုပ်သော ရလဒ်များ၏ ပုံစံ သို့မဟုတ် ဖွဲ့စည်းပုံ။</li> <li><strong>Chat Template Format</strong>: Chatbot dialogues များကို စနစ်တကျ ကိုယ်စားပြုသော စာသားပုံစံ။</li> <li><strong>Strict Output Schemas</strong>: Model ၏ output သည် သတ်မှတ်ထားသော စည်းမျဉ်းများ သို့မဟုတ် ပုံစံများကို တင်းကြပ်စွာ လိုက်နာရခြင်း။</li> <li><strong>Consistent Styling</strong>: Model ၏ output များတစ်လျှောက် တသမတ်တည်းဖြစ်သော ရေးသားဟန် သို့မဟုတ် ပုံစံ။</li> <li><strong>Domain Adaptation</strong>: Model ကို သီးခြားနယ်ပယ်တစ်ခု၏ အချက်အလက်များနှင့် လိုအပ်ချက်များ (ဥပမာ- ဆေးပညာ) နှင့် လိုက်လျောညီထွေဖြစ်အောင် ပြုလုပ်ခြင်း။</li> <li><strong>Domain Terminology</strong>: သီးခြားနယ်ပယ်တစ်ခုတွင် အသုံးပြုသော စကားလုံးများနှင့် အသုံးအနှုန်းများ။</li> <li><strong>Professional Standards</strong>: သီးခြားလုပ်ငန်းနယ်ပယ်တစ်ခုတွင် မျှော်လင့်ထားသော အရည်အသွေး သို့မဟုတ် စံနှုန်းများ။</li> <li><strong>Technical Queries</strong>: နည်းပညာဆိုင်ရာ မေးခွန်းများ။</li> <li><strong>Industry-specific Guidelines</strong>: သီးခြားလုပ်ငန်းနယ်ပယ်တစ်ခုအတွက် သတ်မှတ်ထားသော စည်းမျဉ်းများ သို့မဟုတ် လမ်းညွှန်ချက်များ။</li> <li><strong>Dataset Preparation</strong>: Training အတွက် dataset ကို ပြင်ဆင်ခြင်း။</li> <li><strong>Input-Output Pairs</strong>: Training dataset တွင် input နှင့် ၎င်း၏ မျှော်လင့်ထားသော output တို့ ပါဝင်သော တွဲဖက်ဒေတာ။</li> <li><strong>Input Prompt</strong>: AI model သို့ ပေးပို့သော မူလမေးခွန်း သို့မဟုတ် ညွှန်ကြားချက်။</li> <li><strong>Expected Model Response</strong>: Input prompt အတွက် model မှ မျှော်လင့်ထားသော အဖြေ။</li> <li><strong>Context</strong>: ပေးထားသော အချက်အလက်ကို နားလည်ရန် ကူညီပေးသော နောက်ခံအချက်အလက်။</li> <li><strong>Metadata</strong>: ဒေတာအကြောင်း အချက်အလက်များ (data about data)။</li> <li><strong>Training Data Quality</strong>: training အတွက် အသုံးပြုသော ဒေတာများ၏ သန့်ရှင်းမှု၊ တိကျမှုနှင့် သက်ဆိုင်မှု။</li> <li><strong>Validate Dataset</strong>: Dataset ၏ အရည်အသွေး၊ တိကျမှုနှင့် မျှတမှုကို စစ်ဆေးခြင်း။</li> <li><strong>Training Configuration</strong>: Model ကို လေ့ကျင့်ရန်အတွက် သတ်မှတ်ထားသော parameters နှင့် settings များ။</li> <li><strong>Training Process</strong>: Model ကို ဒေတာများဖြင့် လေ့ကျင့်ပေးသည့် လုပ်ငန်းစဉ်။</li> <li><strong><code>num_train_epochs</code></strong>: Model ကို training dataset တစ်ခုလုံးဖြင့် လေ့ကျင့်သည့် အကြိမ်အရေအတွက်။</li> <li><strong><code>max_steps</code></strong>: Training လုပ်ငန်းစဉ်အတွင်း လုပ်ဆောင်ရမည့် အများဆုံး training steps အရေအတွက်။</li> <li><strong>Overfitting</strong>: Model သည် training data ကို အလွန်အကျွံ သင်ယူသွားပြီး unseen data များနှင့် တွေ့ဆုံသောအခါ စွမ်းဆောင်ရည်ကျဆင်းခြင်း။</li> <li><strong><code>per_device_train_batch_size</code></strong>: GPU သို့မဟုတ် CPU တစ်ခုစီတွင် training လုပ်ငန်းစဉ်တစ်ခုစီအတွက် အသုံးပြုသော samples အရေအတွက်။</li> <li><strong><code>gradient_accumulation_steps</code></strong>: gradients များကို update မလုပ်ခင် batch များစွာမှ gradients များကို စုဆောင်းရန်။ ၎င်းသည် memory ကို ထိထိရောက်ရောက် အသုံးပြုခြင်းဖြင့် effective batch size ကို တိုးမြှင့်နိုင်စေသည်။</li> <li><strong>Effective Batch Size</strong>: <code>per_device_train_batch_size</code> ကို <code>gradient_accumulation_steps</code> ဖြင့် မြှောက်ထားသော တန်ဖိုး။</li> <li><strong>Gradients</strong>: Neural network ၏ parameters များ (weights) ကို update လုပ်ရန် အသုံးပြုသော loss function ၏ ဆင်းသက်လာသော တန်ဖိုးများ။</li> <li><strong><code>learning_rate</code></strong>: Training လုပ်ငန်းစဉ်အတွင်း model ၏ weights များကို မည်မျှပြောင်းလဲရမည်ကို ထိန်းချုပ်သော parameter။</li> <li><strong><code>warmup_ratio</code></strong>: training ၏ ကနဦးအပိုင်းတွင် learning rate ကို ဖြည်းဖြည်းချင်း တိုးမြှင့်ပေးသည့် အချိုး။</li> <li><strong>Instability</strong>: Training လုပ်နေစဉ် model ၏ performance တွင် ကြီးမားသော မတည်ငြိမ်မှုများ ဖြစ်ပေါ်ခြင်း။</li> <li><strong>Monitoring Parameters</strong>: training လုပ်ငန်းစဉ်၏ တိုးတက်မှုကို ခြေရာခံရန် အသုံးပြုသော parameters များ။</li> <li><strong><code>logging_steps</code></strong>: Training metrics များကို log လုပ်သော အကြိမ်ရေ။</li> <li><strong><code>eval_steps</code></strong>: Validation dataset ပေါ်တွင် model ကို evaluation လုပ်သော အကြိမ်ရေ။</li> <li><strong><code>save_steps</code></strong>: Model ၏ checkpoint များကို သိမ်းဆည်းသော အကြိမ်ရေ။</li> <li><strong>Validation Metrics</strong>: Validation dataset ပေါ်တွင် model ၏ စွမ်းဆောင်ရည်ကို တိုင်းတာရန် အသုံးပြုသော metrics များ။</li> <li><strong><code>AutoModelForCausalLM</code></strong>: Hugging Face Transformers library မှ class တစ်ခုဖြစ်ပြီး causal language modeling အတွက် model ကို အလိုအလျောက် load လုပ်ပေးသည်။</li> <li><strong><code>AutoTokenizer</code></strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ class တစ်ခုဖြစ်ပြီး မော်ဒယ်အမည်ကို အသုံးပြုပြီး သက်ဆိုင်ရာ tokenizer ကို အလိုအလျောက် load လုပ်ပေးသည်။</li> <li><strong><code>setup_chat_format</code></strong>: TRL library မှ function တစ်ခုဖြစ်ပြီး model နှင့် tokenizer အတွက် chat format ကို သတ်မှတ်ပေးသည်။</li> <li><strong><code>SFTConfig</code></strong>: TRL library မှ Supervised Fine-Tuning ၏ training arguments များကို သတ်မှတ်ရန်အတွက် configuration class။</li> <li><strong><code>output_dir</code></strong>: Training outputs (models, logs) များကို သိမ်းဆည်းမည့် directory။</li> <li><strong><code>eval_strategy="steps"</code></strong>: Evaluation ကို steps များအပေါ် အခြေခံပြီး လုပ်ဆောင်ရန် strategy။</li> <li><strong><code>Trainer</code></strong>: Hugging Face Transformers library မှ model များကို လေ့ကျင့်ရန်အတွက် မြင့်မားသောအဆင့် (high-level) API။</li> <li><strong><code>trl</code> Library (Transformers Reinforcement Learning)</strong>: Hugging Face မှ Reinforcement Learning from Human Feedback (RLHF) အတွက် ကိရိယာများနှင့် library များ။</li> <li><strong>Packing</strong>: Multiple short examples များကို 하나의 input sequence ထဲသို့ ပေါင်းစပ်ထည့်သွင်းခြင်းဖြင့် training efficiency ကို မြှင့်တင်သော နည်းလမ်း။</li> <li><strong>GPU Utilization</strong>: Graphics Processing Unit (GPU) ကို မည်မျှထိရောက်စွာ အသုံးပြုထားသည်ကို ဖော်ပြခြင်း။</li> <li><strong>Formatting Function</strong>: Multiple fields ပါဝင်သော dataset မှ data များကို single input sequence တစ်ခုအဖြစ် ပေါင်းစပ်ရန်အတွက် custom function။</li> <li><strong>Training Loss</strong>: Model training လုပ်နေစဉ်တွင် တွက်ချက်သော loss value။</li> <li><strong>Validation Loss</strong>: Validation dataset ပေါ်တွင် model ၏ စွမ်းဆောင်ရည်ကို တိုင်းတာသော loss value။</li> <li><strong>Learning Rate Progression</strong>: Training လုပ်နေစဉ် learning rate မည်သို့ပြောင်းလဲသည်ကို ပြသခြင်း။</li> <li><strong>Gradient Norms</strong>: Gradients များ၏ အရွယ်အစား။</li> <li><strong>Underfitting</strong>: Model သည် training data မှ အခြေခံပုံစံများကို ကောင်းစွာမသင်ယူနိုင်ခြင်း။</li> <li><strong>Memorization</strong>: Model သည် training data ကို အလွတ်ကျက်မှတ်ထားခြင်းဖြစ်ပြီး ယေဘုယျကျသော နားလည်မှုမရှိခြင်း။</li> <li><strong>Qualitative Evaluation</strong>: Model ၏ output များကို လူသားများက စစ်ဆေးခြင်းဖြင့် အရည်အသွေးကို အကဲဖြတ်ခြင်း။</li> <li><strong>Convergence</strong>: Training လုပ်နေစဉ် model ၏ parameters များ (weights) သည် တည်ငြိမ်သော အခြေအနေသို့ ရောက်ရှိခြင်း။</li> <li><strong>Generalizable Patterns</strong>: Model သည် unseen data များနှင့် တွေ့ဆုံသောအခါ ကောင်းစွာ စွမ်းဆောင်နိုင်ရန် သင်ယူထားသော ပုံစံများ။</li> <li><strong>Held-out Test Data</strong>: Model training သို့မဟုတ် validation တွင် အသုံးမပြုရသေးသော dataset အပိုင်း။</li> <li><strong>Template Adherence</strong>: Model ၏ output သည် သတ်မှတ်ထားသော template ပုံစံကို တသမတ်တည်း လိုက်နာခြင်းရှိမရှိ။</li> <li><strong>Domain-specific Knowledge Retention</strong>: Model သည် လေ့ကျင့်ထားသော သီးခြားနယ်ပယ်ဆိုင်ရာ ဗဟုသုတများကို မည်မျှထိန်းသိမ်းထားနိုင်ခြင်း။</li> <li><strong>Real-world Performance Metrics</strong>: လက်တွေ့အခြေအနေများတွင် model ၏ စွမ်းဆောင်ရည်ကို တိုင်းတာရန် အသုံးပြုသော metrics များ။</li> <li><strong>Dataset Characteristics</strong>: Dataset ၏ အရွယ်အစား၊ အမျိုးအစား၊ ဖြန့်ဝေမှု စသည်တို့ကဲ့သို့သော လက္ခဏာများ။</li> <li><strong>Model Iterations</strong>: Model ၏ မတူညီသော ဗားရှင်းများ သို့မဟုတ် နောက်ဆက်တွဲ ဖန်တီးမှုများ။</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/my/chapter11/3.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_5q47hu = {
	assets: "/docs/course/pr_1095/my",
	base: "/docs/course/pr_1095/my",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js"),
	import("/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 23],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 112 kB
Xet hash:: 600dbc72953941c9dbfcd148b3812aa7876346b0df62dbe2dd226e82d42016e1

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.