Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1095 /my /chapter11 /4.html

rtrm

about 1 month ago

download

raw

66 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"LoRA (Low-Rank Adaptation)","local":"lora-low-rank-adaptation","sections":[{"title":"LoRA ကို နားလည်ခြင်း","local":"lora-က-နလညခင","sections":[],"depth":2},{"title":"LoRA ရဲ့ အဓိက အကျိုးကျေးဇူးများ","local":"lora-ရ-အဓက-အကကဇမ","sections":[],"depth":2},{"title":"PEFT ဖြင့် LoRA Adapters များကို Loading လုပ်ခြင်း","local":"peft-ဖင-lora-adapters-မက-loading-လပခင","sections":[],"depth":2},{"title":"trl နှင့် SFTTrainer ကို LoRA ဖြင့် အသုံးပြု၍ LLM ကို Fine-tune လုပ်ခြင်း","local":"trl-နင-sfttrainer-က-lora-ဖင-အသပ-llm-က-fine-tune-လပခင","sections":[],"depth":2},{"title":"LoRA Configuration","local":"lora-configuration","sections":[],"depth":2},{"title":"PEFT ဖြင့် TRL ကို အသုံးပြုခြင်း","local":"peft-ဖင-trl-က-အသပခင","sections":[],"depth":2},{"title":"LoRA Adapters များကို ပေါင်းစပ်ခြင်း","local":"lora-adapters-မက-ပငစပခင","sections":[],"depth":2},{"title":"ပေါင်းစပ်မှု အကောင်အထည်ဖော်ခြင်း (Merging Implementation)","local":"ပငစပမ-အကငအထညဖခင-merging-implementation","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1095/my/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/scheduler.893fe8c9.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/singletons.ba455c5c.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.bce52c8a.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/paths.9a7be869.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/preload-helper.b5ee8f74.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.b1df2166.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/0.77c840e7.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/24.2d604c24.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.e6d31e72.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/CodeBlock.abb4f40e.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/CourseFloatingBanner.c1c08878.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"LoRA (Low-Rank Adaptation)","local":"lora-low-rank-adaptation","sections":[{"title":"LoRA ကို နားလည်ခြင်း","local":"lora-က-နလညခင","sections":[],"depth":2},{"title":"LoRA ရဲ့ အဓိက အကျိုးကျေးဇူးများ","local":"lora-ရ-အဓက-အကကဇမ","sections":[],"depth":2},{"title":"PEFT ဖြင့် LoRA Adapters များကို Loading လုပ်ခြင်း","local":"peft-ဖင-lora-adapters-မက-loading-လပခင","sections":[],"depth":2},{"title":"trl နှင့် SFTTrainer ကို LoRA ဖြင့် အသုံးပြု၍ LLM ကို Fine-tune လုပ်ခြင်း","local":"trl-နင-sfttrainer-က-lora-ဖင-အသပ-llm-က-fine-tune-လပခင","sections":[],"depth":2},{"title":"LoRA Configuration","local":"lora-configuration","sections":[],"depth":2},{"title":"PEFT ဖြင့် TRL ကို အသုံးပြုခြင်း","local":"peft-ဖင-trl-က-အသပခင","sections":[],"depth":2},{"title":"LoRA Adapters များကို ပေါင်းစပ်ခြင်း","local":"lora-adapters-မက-ပငစပခင","sections":[],"depth":2},{"title":"ပေါင်းစပ်မှု အကောင်အထည်ဖော်ခြင်း (Merging Implementation)","local":"ပငစပမ-အကငအထညဖခင-merging-implementation","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0" style=""><a href="https://discuss.huggingface.co/t/chapter-2-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/course/en/chapter11/section4.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> </div> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="lora-low-rank-adaptation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-low-rank-adaptation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA (Low-Rank Adaptation)</span></h1> <p data-svelte-h="svelte-3uagbn">Large language models တွေကို fine-tuning လုပ်တာဟာ အရင်းအမြစ် (resource) အများကြီး လိုအပ်တဲ့ လုပ်ငန်းစဉ်တစ်ခု ဖြစ်ပါတယ်။ LoRA ဟာ large language models တွေကို parameters နည်းနည်းလေးနဲ့ fine-tune လုပ်နိုင်စေတဲ့ နည်းပညာတစ်ခု ဖြစ်ပါတယ်။ ၎င်းက attention weights တွေမှာ ပိုသေးငယ်တဲ့ matrices တွေကို ထည့်သွင်းပြီး optimization လုပ်ခြင်းဖြင့် အလုပ်လုပ်ပါတယ်၊ ဒါက သင်ယူရမယ့် parameters တွေကို ၉၀% လောက် လျှော့ချပေးနိုင်ပါတယ်။</p> <h2 class="relative group"><a id="lora-က-နလညခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-က-နလညခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA ကို နားလည်ခြင်း</span></h2> <p data-svelte-h="svelte-iqup2t">LoRA (Low-Rank Adaptation) ဟာ parameter-efficient fine-tuning နည်းပညာတစ်ခု ဖြစ်ပါတယ်။ ဒါက pre-trained model weights တွေကို freeze လုပ်ပြီး၊ trainable rank decomposition matrices တွေကို model ရဲ့ layers တွေထဲကို ထည့်သွင်းပါတယ်။ fine-tuning လုပ်စဉ်မှာ model parameters အားလုံးကို train လုပ်မယ့်အစား၊ LoRA က weight updates တွေကို low-rank decomposition မှတစ်ဆင့် ပိုသေးငယ်တဲ့ matrices တွေအဖြစ် ခွဲထုတ်လိုက်ပါတယ်။ ဒါက model ရဲ့ စွမ်းဆောင်ရည်ကို ထိန်းသိမ်းထားရင်း trainable parameters အရေအတွက်ကို သိသိသာသာ လျှော့ချပေးပါတယ်။ ဥပမာ၊ GPT-3 175B မှာ အသုံးပြုတဲ့အခါ၊ LoRA ဟာ trainable parameters ကို 10,000 ဆ လျှော့ချပေးခဲ့ပြီး GPU memory လိုအပ်ချက်ကို full fine-tuning နဲ့ နှိုင်းယှဉ်ပါက ၃ ဆ လျှော့ချပေးခဲ့ပါတယ်။ LoRA အကြောင်းကို <a href="https://arxiv.org/pdf/2106.09685" rel="nofollow">LoRA paper</a> မှာ ပိုမိုဖတ်ရှုနိုင်ပါတယ်။</p> <p data-svelte-h="svelte-woye07">LoRA က transformer layers တွေမှာ rank decomposition matrices အတွဲတွေကို ထည့်သွင်းခြင်းဖြင့် အလုပ်လုပ်ပါတယ်။ အများအားဖြင့် attention weights တွေကို အာရုံစိုက်ပါတယ်။ Inference လုပ်တဲ့အခါ၊ ဒီ adapter weights တွေကို base model နဲ့ ပေါင်းစပ်နိုင်တာကြောင့် latency overhead လုံးဝ မရှိပါဘူး။ LoRA ဟာ large language models တွေကို သီးခြား tasks တွေ ဒါမှမဟုတ် domains တွေနဲ့ လိုက်လျောညီထွေဖြစ်အောင် ပြုလုပ်ရာမှာ အရင်းအမြစ်လိုအပ်ချက်တွေကို စီမံခန့်ခွဲနိုင်အောင် ကူညီပေးတာကြောင့် အထူးအသုံးဝင်ပါတယ်။</p> <h2 class="relative group"><a id="lora-ရ-အဓက-အကကဇမ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-ရ-အဓက-အကကဇမ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA ရဲ့ အဓိက အကျိုးကျေးဇူးများ</span></h2> <p data-svelte-h="svelte-e3c2yz">၁။ <strong>Memory ထိရောက်မှု (Memory Efficiency)</strong>:</p> <ul data-svelte-h="svelte-vqbbak"><li>GPU memory ထဲမှာ adapter parameters တွေကိုသာ သိမ်းဆည်းပါတယ်။</li> <li>Base model weights တွေဟာ freeze လုပ်ထားပြီး lower precision နဲ့ load လုပ်နိုင်ပါတယ်။</li> <li>Consumer GPUs တွေမှာတောင် large models တွေကို fine-tune လုပ်နိုင်စေပါတယ်။</li></ul> <p data-svelte-h="svelte-tklckz">၂။ <strong>Training Features များ</strong>:</p> <ul data-svelte-h="svelte-1bddzha"><li>PEFT/LoRA ကို minimal setup နဲ့ တိုက်ရိုက်ပေါင်းစပ်ထားပါတယ်။</li> <li>Memory ထိရောက်မှု ပိုကောင်းအောင် QLoRA (Quantized LoRA) ကို ထောက်ပံ့ပေးပါတယ်။</li></ul> <p data-svelte-h="svelte-1ta0ztl">၃။ <strong>Adapter စီမံခန့်ခွဲမှု (Adapter Management)</strong>:</p> <ul data-svelte-h="svelte-1cw0pz9"><li>Checkpoints လုပ်စဉ် adapter weight တွေကို သိမ်းဆည်းပါတယ်။</li> <li>Adapters တွေကို base model ထဲသို့ ပြန်လည်ပေါင်းစပ်နိုင်တဲ့ features တွေ ပါဝင်ပါတယ်။</li></ul> <h2 class="relative group"><a id="peft-ဖင-lora-adapters-မက-loading-လပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-ဖင-lora-adapters-မက-loading-လပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT ဖြင့် LoRA Adapters များကို Loading လုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-fpi32k"><a href="https://github.com/huggingface/peft" rel="nofollow">PEFT</a> ဟာ LoRA အပါအဝင် PEFT နည်းလမ်းတွေကို loading လုပ်ပြီး စီမံခန့်ခွဲဖို့အတွက် ပေါင်းစည်းထားတဲ့ interface တစ်ခုကို ပံ့ပိုးပေးတဲ့ library တစ်ခု ဖြစ်ပါတယ်။ ၎င်းက မတူညီတဲ့ PEFT နည်းလမ်းတွေကြား အလွယ်တကူ load လုပ်ပြီး ပြောင်းလဲနိုင်စေတာကြောင့် မတူညီတဲ့ fine-tuning နည်းပညာတွေကို စမ်းသပ်ဖို့ ပိုမိုလွယ်ကူစေပါတယ်။</p> <p data-svelte-h="svelte-1x6avd5">Adapters တွေကို pretrained model တစ်ခုပေါ်မှာ <code>load_adapter()</code> နဲ့ load လုပ်နိုင်ပါတယ်။ ဒါက weights တွေ ပေါင်းစပ်မထားတဲ့ မတူညီတဲ့ adapters တွေကို စမ်းသပ်ဖို့ အသုံးဝင်ပါတယ်။ <code>set_adapter()</code> function နဲ့ active adapter weights တွေကို သတ်မှတ်ပါ။ base model ကို ပြန်လိုချင်ရင် LoRA modules အားလုံးကို unload လုပ်ဖို့ <code>unload()</code> ကို အသုံးပြုနိုင်ပါတယ်။ ဒါက မတူညီတဲ့ task-specific weights တွေကြား ပြောင်းလဲဖို့ လွယ်ကူစေပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel, PeftConfig

	config = PeftConfig.from_pretrained(<span class="hljs-string">"ybelkada/opt-350m-lora"</span>)
	model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
	lora_model = PeftModel.from_pretrained(model, <span class="hljs-string">"ybelkada/opt-350m-lora"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-bkhm6l"><img src="https://github.com/huggingface/smol-course/raw/main/3_parameter_efficient_finetuning/images/lora_adapter.png" alt="lora_load_adapter"></p> <h2 class="relative group"><a id="trl-နင-sfttrainer-က-lora-ဖင-အသပ-llm-က-fine-tune-လပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trl-နင-sfttrainer-က-lora-ဖင-အသပ-llm-က-fine-tune-လပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>trl နှင့် SFTTrainer ကို LoRA ဖြင့် အသုံးပြု၍ LLM ကို Fine-tune လုပ်ခြင်း</span></h2> <p data-svelte-h="svelte-q8yb2f"><code>trl</code> မှ <a href="https://huggingface.co/docs/trl/sft_trainer" rel="nofollow">SFTTrainer</a> ဟာ <a href="https://huggingface.co/docs/peft/en/index" rel="nofollow">PEFT</a> library မှတစ်ဆင့် LoRA adapters တွေနဲ့ ပေါင်းစပ်ထားပါတယ်။ ဒါက SFT နဲ့ လုပ်ခဲ့သလိုပဲ model ကို fine-tune လုပ်နိုင်တယ်လို့ ဆိုလိုတာပါ။ ဒါပေမယ့် train လုပ်ဖို့ လိုအပ်တဲ့ parameters အရေအတွက်ကို လျှော့ချဖို့ LoRA ကို အသုံးပြုတာပါ။</p> <p data-svelte-h="svelte-d2x65b">ကျွန်တော်တို့ရဲ့ ဥပမာမှာ PEFT က <code>LoRAConfig</code> class ကို အသုံးပြုပါမယ်။ တည်ဆောက်မှုမှာ configuration steps အနည်းငယ်သာ လိုအပ်ပါတယ်။</p> <p data-svelte-h="svelte-rbp21i">၁။ LoRA configuration (rank, alpha, dropout) ကို သတ်မှတ်ပါ။
	၂။ PEFT config နဲ့ SFTTrainer ကို ဖန်တီးပါ။
	၃။ Adapter weights တွေကို train လုပ်ပြီး သိမ်းဆည်းပါ။</p> <h2 class="relative group"><a id="lora-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA Configuration</span></h2> <p data-svelte-h="svelte-1e5gcm8">LoRA configuration နဲ့ အဓိက parameters တွေကို တစ်ဆင့်ချင်းစီ လေ့လာကြည့်ရအောင်။</p> <table data-svelte-h="svelte-awmtw4"><thead><tr><th>Parameter</th> <th>ဖော်ပြချက်</th></tr></thead> <tbody><tr><td><code>r</code> (rank)</td> <td>Weight updates အတွက် အသုံးပြုသော low-rank matrices ၏ dimension။ အများအားဖြင့် ၄-၃၂ ကြား။ တန်ဖိုးနည်းလေ compression ပိုကောင်းလေ၊ သို့သော် expressiveness နည်းပါးနိုင်ခြေရှိလေ။</td></tr> <tr><td><code>lora_alpha</code></td> <td>LoRA layers များအတွက် scaling factor။ ပုံမှန်အားဖြင့် rank တန်ဖိုး၏ ၂ ဆ။ တန်ဖိုးများလေ adaptation effects ပိုအားကောင်းလေ။</td></tr> <tr><td><code>lora_dropout</code></td> <td>LoRA layers များအတွက် dropout probability။ ပုံမှန်အားဖြင့် ၀.၀၅-၀.၁။ တန်ဖိုးများလေ training လုပ်စဉ် overfitting ကို ကာကွယ်ရန် ပိုမိုအထောက်အကူပြုလေ။</td></tr> <tr><td><code>bias</code></td> <td>bias terms များကို training လုပ်ခြင်းကို ထိန်းချုပ်သည်။ “none”, “all”, သို့မဟုတ် “lora_only” တို့ ဖြစ်နိုင်သည်။ memory efficiency အတွက် “none” က အသုံးအများဆုံး။</td></tr> <tr><td><code>target_modules</code></td> <td>LoRA ကို မည်သည့် model modules များတွင် အသုံးပြုမည်ကို သတ်မှတ်သည်။ “all-linear” သို့မဟုတ် “q_proj,v_proj” ကဲ့သို့ သီးခြား modules များ ဖြစ်နိုင်သည်။ modules များလေ adaptability ပိုများလေ၊ သို့သော် memory အသုံးပြုမှု ပိုများလေ။</td></tr></tbody></table> <blockquote class="tip" data-svelte-h="svelte-1rz5s22"><p>PEFT နည်းလမ်းတွေကို အကောင်အထည်ဖော်တဲ့အခါ၊ LoRA အတွက် small rank values (၄-၈) ကနေ စတင်ပြီး training loss ကို စောင့်ကြည့်ပါ။ overfitting ကို ကာကွယ်ဖို့ validation sets တွေကို အသုံးပြုပြီး၊ ဖြစ်နိုင်ရင် full fine-tuning baselines တွေနဲ့ ရလဒ်တွေကို နှိုင်းယှဉ်ပါ။ နည်းလမ်းအမျိုးမျိုးရဲ့ ထိရောက်မှုက task အလိုက် ကွာခြားနိုင်တာကြောင့် စမ်းသပ်လေ့လာခြင်းက အဓိကပါပဲ။</p></blockquote> <h2 class="relative group"><a id="peft-ဖင-trl-က-အသပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#peft-ဖင-trl-က-အသပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PEFT ဖြင့် TRL ကို အသုံးပြုခြင်း</span></h2> <p data-svelte-h="svelte-1ytwktk">Memory လိုအပ်ချက်များကို လျှော့ချရန်အတွက် PEFT နည်းလမ်းများကို TRL နှင့် ပေါင်းစပ်ပြီး fine-tuning လုပ်နိုင်ပါတယ်။ Model ကို load လုပ်တဲ့အခါ <code>LoraConfig</code> ကို ပေးနိုင်ပါတယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig

	<span class="hljs-comment"># <span class="hljs-doctag">TODO:</span> LoRA parameters များကို Configure လုပ်ပါ။</span>
	<span class="hljs-comment"># r: LoRA update matrices အတွက် rank dimension (သေးငယ်လေ compression ပိုကောင်းလေ)</span>
	rank_dimension = <span class="hljs-number">6</span>
	<span class="hljs-comment"># lora_alpha: LoRA layers များအတွက် scaling factor (များလေ adaptation ပိုအားကောင်းလေ)</span>
	lora_alpha = <span class="hljs-number">8</span>
	<span class="hljs-comment"># lora_dropout: LoRA layers များအတွက် dropout probability (overfitting ကို ကာကွယ်ရန် ကူညီ)</span>
	lora_dropout = <span class="hljs-number">0.05</span>

	peft_config = LoraConfig(
	r=rank_dimension, <span class="hljs-comment"># Rank dimension - ပုံမှန်အားဖြင့် 4-32 ကြား</span>
	lora_alpha=lora_alpha, <span class="hljs-comment"># LoRA scaling factor - ပုံမှန်အားဖြင့် rank ၏ 2 ဆ</span>
	lora_dropout=lora_dropout, <span class="hljs-comment"># LoRA layers များအတွက် dropout probability</span>
	bias=<span class="hljs-string">"none"</span>, <span class="hljs-comment"># LoRA အတွက် bias အမျိုးအစား။ သက်ဆိုင်ရာ biases များကို training လုပ်စဉ် update လုပ်ပါမည်။</span>
	target_modules=<span class="hljs-string">"all-linear"</span>, <span class="hljs-comment"># LoRA ကို မည်သည့် modules များတွင် အသုံးပြုရန်</span>
	task_type=<span class="hljs-string">"CAUSAL_LM"</span>, <span class="hljs-comment"># Model architecture အတွက် task အမျိုးအစား</span>
	)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-v85ird">အထက်မှာ၊ model ကို မှန်ကန်တဲ့ device ကို အလိုအလျောက် assign လုပ်ဖို့ <code>device_map="auto"</code> ကို အသုံးပြုခဲ့ပါတယ်။ <code>device_map={"": device_index}</code> ကို အသုံးပြုပြီး model ကို သီးခြား device တစ်ခုကို ကိုယ်တိုင် assign လုပ်နိုင်ပါတယ်။</p> <p data-svelte-h="svelte-1kzy85n">LoRA configuration နဲ့ <code>SFTTrainer</code> ကိုလည်း သတ်မှတ်ဖို့ လိုအပ်ပါလိမ့်မယ်။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># LoRA configuration ဖြင့် SFTTrainer ကို ဖန်တီးပါ။</span>
	trainer = SFTTrainer(
	model=model,
	args=args,
	train_dataset=dataset[<span class="hljs-string">"train"</span>],
	peft_config=peft_config, <span class="hljs-comment"># LoRA configuration</span>
	max_seq_length=max_seq_length, <span class="hljs-comment"># Maximum sequence length</span>
	processing_class=tokenizer,
	)<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-17qn4ft"><p>✏️ <strong>စမ်းသပ်ကြည့်ပါ။</strong> ယခင်အပိုင်းက သင် fine-tune လုပ်ထားတဲ့ model ကို အခြေခံပြီး LoRA နဲ့ ထပ်ပြီး fine-tune လုပ်ကြည့်ပါ။ အထက်မှာ သတ်မှတ်ခဲ့တဲ့ LoRA configuration ကို အသုံးပြုပြီး <code>deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</code> model ကို fine-tune လုပ်ဖို့ <code>HuggingFaceTB/smoltalk</code> dataset ကို အသုံးပြုပါ။</p></blockquote> <h2 class="relative group"><a id="lora-adapters-မက-ပငစပခင" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lora-adapters-မက-ပငစပခင"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LoRA Adapters များကို ပေါင်းစပ်ခြင်း</span></h2> <p data-svelte-h="svelte-19qi6as">LoRA ဖြင့် train လုပ်ပြီးနောက်၊ ပိုမိုလွယ်ကူစွာ deploy လုပ်နိုင်ရန် adapter weights များကို base model ထဲသို့ ပြန်လည်ပေါင်းစပ်ချင်ပါလိမ့်မယ်။ ဒါက ပေါင်းစပ်ထားတဲ့ weights တွေပါဝင်တဲ့ single model တစ်ခုကို ဖန်တီးပေးပြီး inference လုပ်စဉ် adapters တွေကို သီးခြား load လုပ်ဖို့ မလိုအပ်တော့ပါဘူး။</p> <p data-svelte-h="svelte-1kirhj5">ပေါင်းစပ်ခြင်းလုပ်ငန်းစဉ်မှာ memory စီမံခန့်ခွဲမှုနဲ့ precision ကို ဂရုစိုက်ဖို့ လိုအပ်ပါတယ်။ base model နဲ့ adapter weights နှစ်ခုလုံးကို တစ်ပြိုင်နက်တည်း load လုပ်ရမှာဖြစ်လို့၊ လုံလောက်တဲ့ GPU/CPU memory ရှိဖို့ သေချာပါစေ။ <code>transformers</code> ထဲက <code>device_map="auto"</code> ကို အသုံးပြုရင် သင့် hardware ပေါ်မူတည်ပြီး model အတွက် မှန်ကန်တဲ့ device ကို ရှာဖွေပေးပါလိမ့်မယ်။</p> <p data-svelte-h="svelte-1pnf3j3">လုပ်ငန်းစဉ်တစ်လျှောက်လုံး consistent precision (ဥပမာ- float16) ကို ထိန်းသိမ်းပြီး၊ training လုပ်စဉ် အသုံးပြုခဲ့တဲ့ precision နဲ့ ကိုက်ညီအောင် လုပ်ဆောင်ကာ၊ ပေါင်းစပ်ထားတဲ့ model ကို deployment အတွက် တူညီတဲ့ format နဲ့ သိမ်းဆည်းပါ။</p> <h2 class="relative group"><a id="ပငစပမ-အကငအထညဖခင-merging-implementation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ပငစပမ-အကငအထညဖခင-merging-implementation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ပေါင်းစပ်မှု အကောင်အထည်ဖော်ခြင်း (Merging Implementation)</span></h2> <p data-svelte-h="svelte-1fhkptk">LoRA adapter တစ်ခုကို train လုပ်ပြီးနောက်၊ adapter weights တွေကို base model ထဲသို့ ပြန်လည်ပေါင်းစပ်နိုင်ပါတယ်။ ဒါကို ဘယ်လိုလုပ်ရမလဲဆိုတာ အောက်ပါအတိုင်းပါ။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM
	<span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> PeftModel

	<span class="hljs-comment"># 1. Base model ကို load လုပ်ပါ</span>
	base_model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-string">"base_model_name"</span>, torch_dtype=torch.float16, device_map=<span class="hljs-string">"auto"</span>
	)

	<span class="hljs-comment"># 2. Adapter ပါဝင်သော PEFT model ကို load လုပ်ပါ</span>
	peft_model = PeftModel.from_pretrained(
	base_model, <span class="hljs-string">"path/to/adapter"</span>, torch_dtype=torch.float16
	)

	<span class="hljs-comment"># 3. Adapter weights များကို base model နှင့် ပေါင်းစပ်ပါ။</span>
	merged_model = peft_model.merge_and_unload()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-l4agdc">saved model မှာ size ကွာခြားမှုတွေ တွေ့ရရင်၊ tokenizer ကိုပါ save လုပ်ထားခြင်းရှိမရှိ သေချာအောင် လုပ်ပါ။</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Model နဲ့ tokenizer နှစ်ခုလုံးကို save လုပ်ပါ</span>
	tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"base_model_name"</span>)
	merged_model.save_pretrained(<span class="hljs-string">"path/to/save/merged_model"</span>)
	tokenizer.save_pretrained(<span class="hljs-string">"path/to/save/merged_model"</span>)<!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-13peb1o"><p>✏️ <strong>စမ်းသပ်ကြည့်ပါ။</strong> adapter weights တွေကို base model ထဲသို့ ပြန်လည်ပေါင်းစပ်ကြည့်ပါ။ အထက်မှာ သတ်မှတ်ခဲ့တဲ့ LoRA configuration ကို အသုံးပြုပြီး <code>deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</code> model ကို fine-tune လုပ်ဖို့ <code>HuggingFaceTB/smoltalk</code> dataset ကို အသုံးပြုပါ။</p></blockquote> <h1 class="relative group"><a id="အရငအမစမ-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#အရငအမစမ-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>အရင်းအမြစ်များ (Resources)</span></h1> <ul data-svelte-h="svelte-1rurwvs"><li><a href="https://arxiv.org/pdf/2106.09685" rel="nofollow">LoRA: Low-Rank Adaptation of Large Language Models</a></li> <li><a href="https://huggingface.co/docs/peft" rel="nofollow">PEFT Documentation</a></li> <li><a href="https://huggingface.co/blog/peft" rel="nofollow">Hugging Face blog post on PEFT</a></li></ul> <h2 class="relative group"><a id="ဝဟရ-ရငလငခက-glossary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ဝဟရ-ရငလငခက-glossary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ဝေါဟာရ ရှင်းလင်းချက် (Glossary)</span></h2> <ul data-svelte-h="svelte-svhn0n"><li><strong>LoRA (Low-Rank Adaptation)</strong>: Transformer မော်ဒယ်များကဲ့သို့သော large models များကို fine-tuning လုပ်ရာတွင် ထိရောက်မှုရှိစေရန်အတွက် model ၏ layers တွေမှာ low-rank matrices တွေကို ထပ်ထည့်သည့် နည်းပညာ။ ၎င်းသည် memory အသုံးပြုမှုကို သိသိသာသာ လျှော့ချနိုင်သည်။</li> <li><strong>Fine-tuning</strong>: ကြိုတင်လေ့ကျင့်ထားပြီးသား (pre-trained) မော်ဒယ်တစ်ခုကို သီးခြားလုပ်ငန်းတစ်ခု (specific task) အတွက် အနည်းငယ်သော ဒေတာနဲ့ ထပ်မံလေ့ကျင့်ပေးခြင်းကို ဆိုလိုပါတယ်။</li> <li><strong>Large Language Models (LLMs)</strong>: လူသားဘာသာစကားကို နားလည်ပြီး ထုတ်လုပ်ပေးနိုင်တဲ့ အလွန်ကြီးမားတဲ့ Artificial Intelligence (AI) မော်ဒယ်တွေ ဖြစ်ပါတယ်။</li> <li><strong>Resource Intensive</strong>: ကွန်ပျူတာ၏ စွမ်းအား (CPU/GPU) နှင့် မှတ်ဉာဏ် (RAM/VRAM) များစွာ လိုအပ်သော လုပ်ငန်းစဉ်။</li> <li><strong>Parameters</strong>: မော်ဒယ်၏ သင်ယူနိုင်သော အစိတ်အပိုင်းများ (weights နှင့် biases)။</li> <li><strong>Attention Weights</strong>: Transformer model တွင် input sequence တစ်ခုအတွင်းရှိ token များကြား ဆက်နွယ်မှု၏ အရေးပါပုံကို ဖော်ပြသော weights များ။</li> <li><strong>Optimization</strong>: အကောင်းဆုံးရလဒ်ရရှိရန် parameters များကို ချိန်ညှိခြင်းလုပ်ငန်းစဉ်။</li> <li><strong>Trainable Parameters</strong>: လေ့ကျင့်မှုလုပ်ငန်းစဉ်အတွင်း ပြောင်းလဲနိုင်သော မော်ဒယ်၏ parameters အရေအတွက်။</li> <li><strong>Parameter-Efficient Fine-Tuning (PEFT)</strong>: Large models များကို fine-tuning လုပ်ရာတွင် trainable parameters အရေအတွက်ကို သိသိသာသာ လျှော့ချသည့် နည်းလမ်းများ။</li> <li><strong>Pre-trained Model Weights</strong>: မော်ဒယ်ကို မူလ pre-training လုပ်ငန်းစဉ်မှ သင်ယူထားသော weights များ။</li> <li><strong>Freeze</strong>: Model ၏ အချို့သော layers များ၏ weights များကို training လုပ်စဉ် မပြောင်းလဲစေရန် ပိတ်ဆို့ထားခြင်း။</li> <li><strong>Rank Decomposition Matrices</strong>: သင်္ချာပိုင်းဆိုင်ရာ matrix တစ်မျိုးဖြစ်ပြီး matrix တစ်ခုကို low-rank matrices အဖြစ် ခွဲထုတ်ခြင်းနည်းလမ်း။</li> <li><strong>Model’s Layers</strong>: Neural network model တစ်ခု၏ အဆင့်များ။</li> <li><strong>Weight Updates</strong>: Training လုပ်ငန်းစဉ်အတွင်း model weights များ ပြောင်းလဲခြင်း။</li> <li><strong>GPT-3 175B</strong>: OpenAI မှ ထုတ်လုပ်ထားသော parameters ဘီလီယံ ၁၇၅ ခုပါဝင်သည့် အလွန်ကြီးမားသော Large Language Model (LLM)။</li> <li><strong>GPU Memory Requirements</strong>: GPU တွင် model တစ်ခုကို run ရန် လိုအပ်သော မှတ်ဉာဏ်ပမာဏ။</li> <li><strong>Full Fine-tuning</strong>: model ၏ parameters အားလုံးကို training လုပ်ခြင်း။</li> <li><strong>Transformer Layers</strong>: Transformer architecture တွင် ပါဝင်သော အဓိက layers များ။</li> <li><strong>Adapter Weights</strong>: LoRA မှ ထည့်သွင်းထားသော low-rank matrices ၏ weights များ။</li> <li><strong>Inference</strong>: လေ့ကျင့်ပြီးသား Artificial Intelligence (AI) မော်ဒယ်တစ်ခုကို အသုံးပြုပြီး input data ကနေ ခန့်မှန်းချက်တွေ ဒါမှမဟုတ် output တွေကို ထုတ်လုပ်တဲ့ လုပ်ငန်းစဉ်။</li> <li><strong>Base Model</strong>: Fine-tuning မလုပ်ရသေးသော သို့မဟုတ် adapter weights များ မပေါင်းစပ်ရသေးသော မူရင်း pre-trained model။</li> <li><strong>Latency Overhead</strong>: လုပ်ငန်းစဉ်တစ်ခု လုပ်ဆောင်ရန်အတွက် အချိန်ပိုကုန်ကျခြင်း။</li> <li><strong>Domains</strong>: သီးခြားနယ်ပယ်များ (ဥပမာ- medical domain, legal domain)။</li> <li><strong>Memory Efficiency</strong>: ကွန်ပျူတာ၏ RAM အသုံးပြုမှုကို လျှော့ချနိုင်ခြင်း။</li> <li><strong>Lower Precision</strong>: floating-point numbers များကို bits နည်းပါးစွာ အသုံးပြု၍ ကိုယ်စားပြုခြင်း (ဥပမာ- float16)။ ၎င်းသည် memory ကို ချွေတာသည်။</li> <li><strong>Consumer GPUs</strong>: သုံးစွဲသူများအတွက် ထုတ်လုပ်ထားသော GPU များ (ဥပမာ- NVIDIA GeForce, AMD Radeon)။</li> <li><strong>PEFT Integration</strong>: PEFT library ဖြင့် အခြား frameworks များ သို့မဟုတ် libraries များကို ပေါင်းစပ်ခြင်း။</li> <li><strong>QLoRA (Quantized LoRA)</strong>: LoRA ကို quantization နည်းပညာဖြင့် ပေါင်းစပ်ထားခြင်း။ ၎င်းသည် model weights များကို ပိုမိုနိမ့်သော precision ဖြင့် သိမ်းဆည်းခြင်းဖြင့် memory efficiency ကို ပိုမိုတိုးတက်စေသည်။</li> <li><strong>Adapter Parameters</strong>: LoRA adapters တွင် ပါဝင်သော parameters များ။</li> <li><strong>Checkpoints</strong>: မော်ဒယ်၏ weights များနှင့် အခြားဖွဲ့စည်းပုံများ (configuration) ကို သတ်မှတ်ထားသော အချိန်တစ်ခုတွင် သိမ်းဆည်းထားခြင်း။</li> <li><strong><code>load_adapter()</code></strong>: PEFT library မှ method တစ်ခုဖြစ်ပြီး adapter weights များကို model ထဲသို့ load လုပ်ရန်။</li> <li><strong><code>set_adapter()</code></strong>: PEFT library မှ method တစ်ခုဖြစ်ပြီး active adapter weights များကို သတ်မှတ်ရန်။</li> <li><strong><code>unload()</code></strong>: PEFT library မှ method တစ်ခုဖြစ်ပြီး LoRA modules အားလုံးကို model မှ ဖယ်ရှားရန်။</li> <li><strong><code>PeftModel</code></strong>: PEFT library မှ class တစ်ခုဖြစ်ပြီး model တွင် adapter layers များကို ထည့်သွင်းထားသော model ကို ကိုယ်စားပြုသည်။</li> <li><strong><code>PeftConfig</code></strong>: PEFT library မှ class တစ်ခုဖြစ်ပြီး PEFT နည်းလမ်းများ၏ configuration များကို သတ်မှတ်ရန်။</li> <li><strong><code>AutoModelForCausalLM</code></strong>: Hugging Face Transformers library မှ causal language models များကို အလိုအလျောက် load လုပ်ရန် class။</li> <li><strong><code>LoraConfig</code></strong>: PEFT library မှ LoRA နည်းလမ်းအတွက် configuration class။</li> <li><strong><code>r</code> (rank dimension)</strong>: LoRA update matrices များအတွက် rank ၏ dimension။</li> <li><strong><code>lora_alpha</code> (scaling factor)</strong>: LoRA layers များအတွက် scaling factor။</li> <li><strong><code>lora_dropout</code> (dropout probability)</strong>: LoRA layers များအတွက် dropout probability။</li> <li><strong><code>bias</code></strong>: LoRA တွင် bias terms များကို train လုပ်ခြင်းရှိမရှိ ထိန်းချုပ်သော parameter။</li> <li><strong><code>target_modules</code></strong>: LoRA ကို မည်သည့် model modules များတွင် အသုံးပြုမည်ကို သတ်မှတ်သော parameter။</li> <li><strong><code>task_type</code></strong>: model architecture အတွက် task အမျိုးအစား (ဥပမာ- “CAUSAL_LM”)။</li> <li><strong>CAUSAL_LM (Causal Language Model)</strong>: စာသား sequence တစ်ခု၏ နောက်ဆက်တွဲ token (စကားလုံး) ကို ခန့်မှန်းရန် လေ့ကျင့်ထားသော language model အမျိုးအစား။</li> <li><strong>Training Loss</strong>: Training လုပ်စဉ် model ၏ loss တန်ဖိုး။</li> <li><strong>Validation Sets</strong>: Training လုပ်နေစဉ် model ၏ စွမ်းဆောင်ရည်ကို အကဲဖြတ်ရန် အသုံးပြုသော dataset အပိုင်း။</li> <li><strong>Overfitting</strong>: Model တစ်ခုသည် training data ကို အလွန်အမင်း သင်ယူထားခြင်းကြောင့် မမြင်ဖူးသေးသော data များပေါ်တွင် စွမ်းဆောင်ရည် နည်းပါးခြင်း။</li> <li><strong>Full Fine-tuning Baselines</strong>: model ၏ parameters အားလုံးကို fine-tuning လုပ်ထားသော model ၏ စွမ်းဆောင်ရည်ကို နှိုင်းယှဉ်ရန်အတွက် အခြေခံရလဒ်များ။</li> <li><strong><code>device_map="auto"</code></strong>: Hugging Face Transformers တွင် model ကို GPU သို့မဟုတ် CPU သို့ အလိုအလျောက် assign လုပ်ရန်။</li> <li><strong><code>device_map={"": device_index}</code></strong>: Model ကို သီးခြား device index တစ်ခုသို့ ကိုယ်တိုင် assign လုပ်ရန်။</li> <li><strong><code>SFTTrainer</code></strong>: TRL library မှ <code>Trainer</code> class ၏ extension တစ်ခုဖြစ်ပြီး Supervised Fine-Tuning လုပ်ငန်းစဉ်ကို ရိုးရှင်းစေသည်။</li> <li><strong><code>max_seq_length</code></strong>: Model ၏ input sequence ၏ အမြင့်ဆုံး အရှည်။</li> <li><strong><code>processing_class</code></strong>: Tokenizer class ကို ရည်ညွှန်းသည်။</li> <li><strong><code>HuggingFaceTB/smoltalk</code></strong>: Hugging Face Hub ပေါ်ရှိ dataset တစ်ခု၏ identifier။</li> <li><strong><code>deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B</code></strong>: Hugging Face Hub ပေါ်ရှိ model တစ်ခု၏ identifier။</li> <li><strong>Deployment</strong>: trained model တစ်ခုကို အသုံးပြုသူများ လက်လှမ်းမီနိုင်သော ပတ်ဝန်းကျင်သို့ ဖြန့်ကျက်ခြင်း။</li> <li><strong>Memory Management</strong>: ကွန်ပျူတာ၏ မှတ်ဉာဏ်ကို ထိရောက်စွာ အသုံးပြုခြင်းနှင့် စီမံခန့်ခွဲခြင်း။</li> <li><strong>Precision</strong>: floating-point numbers များကို ကိုယ်စားပြုရာတွင် အသုံးပြုသော bits အရေအတွက် (ဥပမာ- float16, float32)။</li> <li><strong><code>torch_dtype=torch.float16</code></strong>: PyTorch တွင် tensor များ၏ data type ကို float16 အဖြစ် သတ်မှတ်ရန်။</li> <li><strong><code>peft_model.merge_and_unload()</code></strong>: PEFT library မှ method တစ်ခုဖြစ်ပြီး adapter weights များကို base model ထဲသို့ ပေါင်းစပ်ပြီး adapter modules များကို unload လုပ်သည်။</li> <li><strong><code>save_pretrained()</code></strong>: Hugging Face Transformers library မှ method တစ်ခုဖြစ်ပြီး model သို့မဟုတ် tokenizer ကို disk ထဲသို့ သိမ်းဆည်းရန်။</li> <li><strong><code>AutoTokenizer</code></strong>: Hugging Face Transformers library မှာ ပါဝင်တဲ့ class တစ်ခုဖြစ်ပြီး မော်ဒယ်အမည်ကို အသုံးပြုပြီး သက်ဆိုင်ရာ tokenizer ကို အလိုအလျောက် load လုပ်ပေးသည်။</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/my/chapter11/4.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_5q47hu = {
	assets: "/docs/course/pr_1095/my",
	base: "/docs/course/pr_1095/my",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js"),
	import("/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 24],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 66 kB
Xet hash:: ac9b8efd31111d0f2a23f171d55a30a4d6ff47f6a5c5e47a8124ee1ac9992c92

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.