Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1095 /my /chapter11 /1.html

rtrm

about 1 month ago

download

raw

31.7 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Supervised Fine-Tuning","local":"supervised-fine-tuning","sections":[{"title":"၁။ Chat Templates များ","local":"၁-chat-templates-မ","sections":[],"depth":2},{"title":"၂။ Supervised Fine-Tuning","local":"၂-supervised-fine-tuning","sections":[],"depth":2},{"title":"၃။ LoRA (Low-Rank Adaptation)","local":"၃-lora-low-rank-adaptation","sections":[],"depth":2},{"title":"၄။ Evaluation","local":"၄-evaluation","sections":[],"depth":2},{"title":"References","local":"references","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1095/my/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/scheduler.893fe8c9.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/singletons.ba455c5c.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.bce52c8a.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/paths.9a7be869.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/preload-helper.b5ee8f74.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/index.b1df2166.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/0.77c840e7.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/nodes/21.c63bde04.js">
	<link rel="modulepreload" href="/docs/course/pr_1095/my/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.e6d31e72.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Supervised Fine-Tuning","local":"supervised-fine-tuning","sections":[{"title":"၁။ Chat Templates များ","local":"၁-chat-templates-မ","sections":[],"depth":2},{"title":"၂။ Supervised Fine-Tuning","local":"၂-supervised-fine-tuning","sections":[],"depth":2},{"title":"၃။ LoRA (Low-Rank Adaptation)","local":"၃-lora-low-rank-adaptation","sections":[],"depth":2},{"title":"၄။ Evaluation","local":"၄-evaluation","sections":[],"depth":2},{"title":"References","local":"references","sections":[],"depth":2},{"title":"ဝေါဟာရ ရှင်းလင်းချက် (Glossary)","local":"ဝဟရ-ရငလငခက-glossary","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 max-sm:gap-0.5 h-6 max-sm:h-5 px-2 max-sm:px-1.5 text-[11px] max-sm:text-[9px] font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0"><svg class="w-3 h-3 max-sm:w-2.5 max-sm:h-2.5" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-6 max-sm:h-5 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible w-3 h-3 max-sm:w-2.5 max-sm:h-2.5 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="supervised-fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supervised-fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supervised Fine-Tuning</span></h1> <p data-svelte-h="svelte-araf28"><a href="/course/chapter2/2">Chapter 2 အပိုင်း ၂</a> မှာ၊ generative language models တွေကို summarization နဲ့ question answering လို သီးခြား tasks တွေမှာ fine-tune လုပ်နိုင်တယ်ဆိုတာကို ကျွန်တော်တို့ တွေ့ခဲ့ရပါတယ်။ ဒါပေမယ့် ဒီနေ့ခေတ်မှာတော့ language models တွေကို tasks အမျိုးမျိုးမှာ တစ်ပြိုင်နက်တည်း fine-tune လုပ်တာက ပိုပြီး အဖြစ်များပါတယ်။ ဒီနည်းလမ်းကို supervised fine-tuning (SFT) လို့ ခေါ်ပါတယ်။ ဒီလုပ်ငန်းစဉ်က models တွေ ပိုမိုစွမ်းဆောင်နိုင်ပြီး မတူညီတဲ့ use cases တွေကို ကိုင်တွယ်ဖြေရှင်းနိုင်ဖို့ ကူညီပေးပါတယ်။ ChatGPT လို platform တွေမှာ လူတွေ အသုံးပြုနေတဲ့ LLMs အများစုဟာ လူသားတွေရဲ့ နှစ်သက်မှုနဲ့ ပိုမိုကိုက်ညီပြီး အထောက်အကူဖြစ်အောင် SFT ကို ဖြတ်သန်းပြီးသားတွေ ဖြစ်ပါတယ်။ ဒီအခန်းကို အပိုင်းလေးပိုင်း ခွဲခြားပြီး လေ့လာသွားပါမယ်။</p> <h2 class="relative group"><a id="၁-chat-templates-မ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၁-chat-templates-မ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၁။ Chat Templates များ</span></h2> <p data-svelte-h="svelte-15514lk">Chat templates တွေက အသုံးပြုသူတွေနဲ့ AI models တွေကြား အပြန်အလှန်ဆက်သွယ်မှုတွေကို စနစ်တကျ ပြုလုပ်ပေးပါတယ်။ ဒါက တသမတ်တည်းဖြစ်ပြီး အခြေအနေနဲ့ကိုက်ညီတဲ့ တုံ့ပြန်မှုတွေကို သေချာစေပါတယ်။ ၎င်းတို့မှာ system prompts နဲ့ role-based messages တွေလို အစိတ်အပိုင်းတွေ ပါဝင်ပါတယ်။</p> <h2 class="relative group"><a id="၂-supervised-fine-tuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၂-supervised-fine-tuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၂။ Supervised Fine-Tuning</span></h2> <p data-svelte-h="svelte-u5esp">Supervised Fine-Tuning (SFT) ဟာ pre-trained language models တွေကို သီးခြား tasks တွေနဲ့ လိုက်လျောညီထွေဖြစ်အောင် ပြုလုပ်တဲ့ အရေးကြီးတဲ့ လုပ်ငန်းစဉ်တစ်ခု ဖြစ်ပါတယ်။ ဒီအတွက် task-specific dataset တစ်ခုကို labeled examples တွေနဲ့ train လုပ်ရပါတယ်။ SFT အကြောင်း အသေးစိတ်လမ်းညွှန်ချက်တွေ၊ အဓိကအဆင့်တွေနဲ့ အကောင်းဆုံးကျင့်စဉ်တွေအတွက် <a href="https://huggingface.co/docs/trl/en/sft_trainer" rel="nofollow">TRL documentation ရဲ့ supervised fine-tuning အပိုင်း</a> ကို ကြည့်ရှုနိုင်ပါတယ်။</p> <h2 class="relative group"><a id="၃-lora-low-rank-adaptation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၃-lora-low-rank-adaptation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၃။ LoRA (Low-Rank Adaptation)</span></h2> <p data-svelte-h="svelte-sos45m">Low Rank Adaptation (LoRA) ဟာ model ရဲ့ layers တွေမှာ low-rank matrices တွေကို ထည့်သွင်းခြင်းဖြင့် language models တွေကို fine-tuning လုပ်တဲ့ နည်းပညာတစ်ခု ဖြစ်ပါတယ်။ ဒါက model ရဲ့ pre-trained knowledge ကို ထိန်းသိမ်းထားရင်း ထိရောက်တဲ့ fine-tuning ကို ခွင့်ပြုပါတယ်။ LoRA ရဲ့ အဓိက အကျိုးကျေးဇူးတွေထဲက တစ်ခုကတော့ memory ကို သိသိသာသာ ချွေတာနိုင်တာပဲ ဖြစ်ပါတယ်။ ဒါကြောင့် limited resources ရှိတဲ့ hardware တွေမှာတောင် large models တွေကို fine-tune လုပ်နိုင်ပါတယ်။</p> <h2 class="relative group"><a id="၄-evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#၄-evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>၄။ Evaluation</span></h2> <p data-svelte-h="svelte-ao9tl6">Evaluation ဟာ fine-tuning လုပ်ငန်းစဉ်ရဲ့ အရေးကြီးတဲ့ အဆင့်တစ်ခု ဖြစ်ပါတယ်။ ဒါက task-specific dataset ပေါ်မှာ model ရဲ့ စွမ်းဆောင်ရည်ကို တိုင်းတာနိုင်စေပါတယ်။</p> <blockquote class="tip" data-svelte-h="svelte-1bepxci"><p>⚠️ Model Hub နဲ့ 🤗 Transformers မှာ ရရှိနိုင်တဲ့ features တွေအားလုံးကနေ အကျိုးအပြည့်အဝရရှိဖို့အတွက် <a href="https://huggingface.co/join" rel="nofollow">account တစ်ခု ဖန်တီးဖို့</a> ကျွန်တော်တို့ အကြံပြုပါတယ်။</p></blockquote> <h2 class="relative group"><a id="references" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#references"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>References</span></h2> <ul data-svelte-h="svelte-iu8cf1"><li><a href="https://huggingface.co/docs/transformers/main/en/chat_templating" rel="nofollow">Transformers documentation on chat templates</a></li> <li><a href="https://github.com/huggingface/trl/blob/main/trl/scripts/sft.py" rel="nofollow">Script for Supervised Fine-Tuning in TRL</a></li> <li><a href="https://huggingface.co/docs/trl/main/en/sft_trainer" rel="nofollow"><code>SFTTrainer</code> in TRL</a></li> <li><a href="https://arxiv.org/abs/2305.18290" rel="nofollow">Direct Preference Optimization Paper</a></li> <li><a href="https://huggingface.co/docs/trl/sft_trainer" rel="nofollow">Supervised Fine-Tuning with TRL</a></li> <li><a href="https://github.com/huggingface/alignment-handbook" rel="nofollow">How to fine-tune Google Gemma with ChatML and Hugging Face TRL</a></li> <li><a href="https://huggingface.co/learn/cookbook/en/fine_tuning_llm_to_generate_persian_product_catalogs_in_json_format" rel="nofollow">Fine-tuning LLM to Generate Persian Product Catalogs in JSON Format</a></li></ul> <h2 class="relative group"><a id="ဝဟရ-ရငလငခက-glossary" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ဝဟရ-ရငလငခက-glossary"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ဝေါဟာရ ရှင်းလင်းချက် (Glossary)</span></h2> <ul data-svelte-h="svelte-1p0xb62"><li><strong>Supervised Fine-Tuning (SFT)</strong>: ကြိုတင်လေ့ကျင့်ထားပြီးသား (pre-trained) မော်ဒယ်တစ်ခုကို တိကျသောလုပ်ငန်းဆောင်တာများ (specific tasks) အတွက် label ပါသော ဒေတာများကို အသုံးပြု၍ ထပ်မံလေ့ကျင့်ခြင်းနည်းလမ်း။ ၎င်းသည် မော်ဒယ်ကို ပိုမိုစွမ်းဆောင်နိုင်ပြီး ဘက်စုံသုံးနိုင်စေသည်။</li> <li><strong>Generative Language Models</strong>: စာသားအသစ်များ၊ code သို့မဟုတ် အခြားဒေတာပုံစံများကို ဖန်တီးထုတ်လုပ်နိုင်သော ဘာသာစကားမော်ဒယ်များ။</li> <li><strong>Summarization</strong>: ရှည်လျားသော စာသားတစ်ခု၏ အနှစ်ချုပ်ကို ထုတ်လုပ်ခြင်း။</li> <li><strong>Question Answering</strong>: ပေးထားသော စာသားတစ်ခုမှ မေးခွန်းတစ်ခု၏ အဖြေကို ရှာဖွေခြင်း။</li> <li><strong>Language Models</strong>: လူသားဘာသာစကား၏ ဖြန့်ဝေမှုကို နားလည်ရန် လေ့ကျင့်ထားသော AI မော်ဒယ်တစ်ခု။ ၎င်းသည် စာသားထုတ်လုပ်ခြင်း၊ ဘာသာပြန်ခြင်း စသည့်လုပ်ငန်းများတွင် အသုံးပြုနိုင်သည်။</li> <li><strong>Tasks</strong>: Artificial Intelligence (AI) သို့မဟုတ် Machine Learning (ML) မော်ဒယ်တစ်ခုက လုပ်ဆောင်ရန် ဒီဇိုင်းထုတ်ထားသော သီးခြားအလုပ်။</li> <li><strong>Versatile</strong>: ကွဲပြားသော အလုပ်များ သို့မဟုတ် အခြေအနေများစွာကို ကိုင်တွယ်နိုင်စွမ်းရှိခြင်း။</li> <li><strong>Use Cases</strong>: ထုတ်ကုန် သို့မဟုတ် စနစ်တစ်ခုကို သီးခြားအခြေအနေတစ်ခုတွင် မည်သို့အသုံးပြုသည်ကို ဖော်ပြခြင်း။</li> <li><strong>LLMs (Large Language Models)</strong>: လူသားဘာသာစကားကို နားလည်ပြီး ထုတ်လုပ်ပေးနိုင်တဲ့ အလွန်ကြီးမားတဲ့ Artificial Intelligence (AI) မော်ဒယ်တွေ ဖြစ်ပါတယ်။</li> <li><strong>ChatGPT</strong>: OpenAI မှ ဖန်တီးထားသော လူသားနှင့်ဆင်တူသော စာသားများကို ဖန်တီးနိုင်သည့် conversational AI မော်ဒယ်။</li> <li><strong>Human Preferences</strong>: လူသားများ၏ နှစ်သက်မှုများ သို့မဟုတ် ရွေးချယ်မှုများ။</li> <li><strong>Chat Templates</strong>: အသုံးပြုသူနှင့် AI မော်ဒယ်များကြား အပြန်အလှန်ဆက်သွယ်မှုများကို စနစ်တကျ ပြုလုပ်ပေးသည့် ဖွဲ့စည်းပုံများ။ ၎င်းတို့သည် တသမတ်တည်းဖြစ်ပြီး အခြေအနေနှင့်ကိုက်ညီသော တုံ့ပြန်မှုများကို သေချာစေသည်။</li> <li><strong>System Prompts</strong>: AI မော်ဒယ်တစ်ခုအား ၎င်း၏ အခန်းကဏ္ဍ၊ ပုံစံ သို့မဟုတ် လုပ်ဆောင်ရမည့်အရာများကို လမ်းညွှန်ပေးသည့် မူလညွှန်ကြားချက်များ။</li> <li><strong>Role-based Messages</strong>: AI model နှင့် အသုံးပြုသူတို့၏ သတ်မှတ်ထားသော အခန်းကဏ္ဍများ (ဥပမာ- user, assistant) အပေါ် အခြေခံ၍ ပေးပို့သော messages များ။</li> <li><strong>Pre-trained Language Models</strong>: အကြီးစား ဒေတာအမြောက်အမြားဖြင့် ကြိုတင်လေ့ကျင့်ထားပြီးဖြစ်သော ဘာသာစကားမော်ဒယ်များ။</li> <li><strong>Task-specific Dataset</strong>: သီးခြားလုပ်ငန်းတစ်ခု (ဥပမာ- sentiment analysis) အတွက် အထူးပြင်ဆင်ထားသော ဒေတာအစုအဝေး။</li> <li><strong>Labeled Examples</strong>: labels များ သို့မဟုတ် မှန်ကန်သောအဖြေများ ပါဝင်သော training data များ။</li> <li><strong>TRL Documentation</strong>: Hugging Face Transformes Reinforcement Learning (TRL) library ၏ တရားဝင် မှတ်တမ်းများ (documentation)။</li> <li><strong>LoRA (Low-Rank Adaptation)</strong>: Transformer မော်ဒယ်များကဲ့သို့သော large models များကို fine-tuning လုပ်ရာတွင် ထိရောက်မှုရှိစေရန်အတွက် model ၏ layers တွေမှာ low-rank matrices တွေကို ထပ်ထည့်သည့် နည်းပညာ။ ၎င်းသည် memory အသုံးပြုမှုကို သိသိသာသာ လျှော့ချနိုင်သည်။</li> <li><strong>Low-Rank Matrices</strong>: သင်္ချာပိုင်းဆိုင်ရာ matrix တစ်မျိုးဖြစ်ပြီး ၎င်း၏ rank သည် ၎င်း၏ dimensions များထက် သိသိသာသာ နည်းပါးသည်။ Machine Learning တွင် parameters အရေအတွက်ကို လျှော့ချရန် အသုံးပြုသည်။</li> <li><strong>Model’s Layers</strong>: Neural network model တစ်ခု၏ အဆင့်များ။</li> <li><strong>Pre-trained Knowledge</strong>: မော်ဒယ်အား မူလ pre-training လုပ်ငန်းစဉ်မှ သင်ယူထားသော ဗဟုသုတများ။</li> <li><strong>Memory Savings</strong>: ကွန်ပျူတာ၏ RAM အသုံးပြုမှုကို လျှော့ချနိုင်ခြင်း။</li> <li><strong>Hardware with Limited Resources</strong>: ကွန်ပျူတာ၏ memory (RAM) သို့မဟုတ် processing power (GPU) အစွမ်းအစ အကန့်အသတ်ရှိသော devices များ။</li> <li><strong>Evaluation</strong>: fine-tuning လုပ်ငန်းစဉ်ပြီးနောက် model ၏ စွမ်းဆောင်ရည်ကို တိုင်းတာခြင်း။ ၎င်းသည် model ၏ ထိရောက်မှုနှင့် တိကျမှုကို ဆုံးဖြတ်ရန် ကူညီပေးသည်။</li> <li><strong>Model Hub</strong>: Hugging Face Hub ကို ရည်ညွှန်းပြီး AI မော်ဒယ်များ ရှာဖွေ၊ မျှဝေ၊ အသုံးပြုနိုင်သော ဗဟို platform။</li> <li><strong>🤗 Transformers</strong>: Hugging Face က ထုတ်လုပ်ထားတဲ့ library တစ်ခုဖြစ်ပြီး Transformer မော်ဒယ်တွေကို အသုံးပြုပြီး Natural Language Processing (NLP), computer vision, audio processing စတဲ့ နယ်ပယ်တွေမှာ အဆင့်မြင့် AI မော်ဒယ်တွေကို တည်ဆောက်ပြီး အသုံးပြုနိုင်စေပါတယ်။</li> <li><strong>Account</strong>: Hugging Face Hub ပေါ်ရှိ သုံးစွဲသူအကောင့်။</li> <li><strong>Script</strong>: အလိုအလျောက်လုပ်ဆောင်ရန် ရေးသားထားသော code များ။</li> <li><strong>SFTTrainer</strong>: TRL library မှ <code>Trainer</code> class ၏ extension တစ်ခုဖြစ်ပြီး Supervised Fine-Tuning လုပ်ငန်းစဉ်ကို ရိုးရှင်းစေသည်။</li> <li><strong>Direct Preference Optimization (DPO)</strong>: Reinforcement Learning from Human Feedback (RLHF) အတွက် simplified algorithm တစ်ခုဖြစ်ပြီး model output များကို လူသားနှစ်သက်မှုနှင့် ပိုမိုကိုက်ညီအောင် လုပ်ဆောင်ပေးသည်။</li> <li><strong>Google Gemma</strong>: Google မှ ထုတ်လုပ်ထားသော open-source LLM တစ်မျိုး။</li> <li><strong>ChatML</strong>: OpenAI မှ ထုတ်လုပ်ထားသော chat conversation များကို ကိုယ်စားပြုရန်အတွက် markup format တစ်ခု။</li> <li><strong>Alignment Handbook</strong>: Hugging Face မှ LLM များကို လူသားနှစ်သက်မှုနှင့် ကိုက်ညီအောင် လေ့ကျင့်ရန်အတွက် လမ်းညွှန်စာတမ်း။</li> <li><strong>Persian Product Catalogs</strong>: ပါရှန်ဘာသာစကားဖြင့် ထုတ်ကုန်စာရင်းများ။</li> <li><strong>JSON Format</strong>: ဒေတာများကို ပေါ့ပေါ့ပါးပါး ဖလှယ်နိုင်သော format ဖြစ်ပြီး လူသားများ ဖတ်ရှုရလွယ်ကူပြီး စက်များ စီမံဆောင်ရွက်ရလွယ်ကူသည်။</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/my/chapter11/1.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_5q47hu = {
	assets: "/docs/course/pr_1095/my",
	base: "/docs/course/pr_1095/my",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1095/my/_app/immutable/entry/start.8e25cab6.js"),
	import("/docs/course/pr_1095/my/_app/immutable/entry/app.b12ce275.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 21],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 31.7 kB
Xet hash:: 637b217e20add268cfdb91d787b138a5e1db14a7e8568d943defe725391cbafa

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.