Buckets:

rtrm's picture
download
raw
25.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Open R1 for Students&quot;,&quot;local&quot;:&quot;open-r1-for-students&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What You’ll Learn&quot;,&quot;local&quot;:&quot;what-youll-learn&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Why This Matters for Students&quot;,&quot;local&quot;:&quot;why-this-matters-for-students&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Chapter Overview&quot;,&quot;local&quot;:&quot;chapter-overview&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1️⃣ Introduction to Reinforcement Learning and its Role in LLMs&quot;,&quot;local&quot;:&quot;1-introduction-to-reinforcement-learning-and-its-role-in-llms&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;2️⃣ Understanding the DeepSeek R1 Paper&quot;,&quot;local&quot;:&quot;2-understanding-the-deepseek-r1-paper&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;3️⃣ Implementing GRPO in TRL&quot;,&quot;local&quot;:&quot;3-implementing-grpo-in-trl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;4️⃣ Practical use case to align a model&quot;,&quot;local&quot;:&quot;4-practical-use-case-to-align-a-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Prerequisites&quot;,&quot;local&quot;:&quot;prerequisites&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;How to Use This Chapter&quot;,&quot;local&quot;:&quot;how-to-use-this-chapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/course/pr_1069/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/entry/start.c5306bb2.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/scheduler.37c15a92.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/singletons.bc78d867.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/index.18351ede.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/paths.76894643.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/entry/app.4264f5f8.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/index.7cb9c9b8.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/nodes/0.f5347c47.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/nodes/28.055a6198.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/Tip.d10b3fc9.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/CodeBlock.abae2786.js">
<link rel="modulepreload" href="/docs/course/pr_1069/en/_app/immutable/chunks/getInferenceSnippets.f9350a3f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Open R1 for Students&quot;,&quot;local&quot;:&quot;open-r1-for-students&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;What You’ll Learn&quot;,&quot;local&quot;:&quot;what-youll-learn&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Why This Matters for Students&quot;,&quot;local&quot;:&quot;why-this-matters-for-students&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Chapter Overview&quot;,&quot;local&quot;:&quot;chapter-overview&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;1️⃣ Introduction to Reinforcement Learning and its Role in LLMs&quot;,&quot;local&quot;:&quot;1-introduction-to-reinforcement-learning-and-its-role-in-llms&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;2️⃣ Understanding the DeepSeek R1 Paper&quot;,&quot;local&quot;:&quot;2-understanding-the-deepseek-r1-paper&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;3️⃣ Implementing GRPO in TRL&quot;,&quot;local&quot;:&quot;3-implementing-grpo-in-trl&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;4️⃣ Practical use case to align a model&quot;,&quot;local&quot;:&quot;4-practical-use-case-to-align-a-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Prerequisites&quot;,&quot;local&quot;:&quot;prerequisites&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;How to Use This Chapter&quot;,&quot;local&quot;:&quot;how-to-use-this-chapter&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="open-r1-for-students" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#open-r1-for-students"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Open R1 for Students</span></h1> <p data-svelte-h="svelte-d5kmhw">Welcome to an exciting journey into the world of open-source AI with reinforcement learning! This chapter is designed to help students understand reinforcement learning and its role in LLMs.</p> <p data-svelte-h="svelte-1qhele0">We will also explore <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>, a groundbreaking community project that’s making advanced AI accessible to everyone. Specifically, this course is to help students and learners to use and contribute to <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>.</p> <h2 class="relative group"><a id="what-youll-learn" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-youll-learn"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What You’ll Learn</span></h2> <p data-svelte-h="svelte-gyca4k">In this chapter, we’ll break down complex concepts into easy-to-understand pieces and show you how you can be part of this exciting project to make LLMs reason on complex problems.</p> <p data-svelte-h="svelte-1dnu3fz">LLMs have shown excellent performance on many generative tasks. However, up until recently they have struggled on complex problems that require reasoning. For example, they struggle to deal with puzzles or math problems that require multiple steps of reasoning.</p> <p data-svelte-h="svelte-1a9mpqp">Open R1 is a project that aims to make LLMs reason on complex problems. It does this by using reinforcement learning to encourage LLMs to ‘think’ and reason.</p> <p data-svelte-h="svelte-hzj1fi">In simple terms, the model is train to generate thoughts as well as outputs, and to structure these thoughts and outputs so that they can be handled separately by the user.</p> <p data-svelte-h="svelte-13kwili">Let’s take a look at an example. A we gave ourself the task of solving the following problem, we might think like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Problem: <span class="hljs-string">&quot;I have 3 apples and 2 oranges. How many pieces of fruit do I have in total?&quot;</span>
Thought: <span class="hljs-string">&quot;I need to add the number of apples and oranges to get the total number of pieces of fruit.&quot;</span>
Answer: <span class="hljs-string">&quot;5&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kj6jrw">We can then structure this thought and answer so that they can be handled separately by the user. For reasoning tasks, LLMs can be trained to generate thoughts and answers in the following format:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&lt;think&gt;I need to add the number of apples and oranges to get the total number of pieces of fruit.&lt;/think&gt;
5<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-cyhm3e">As a user, we can then extract the thought and answer from the model’s output and use them to solve the problem.</p> <h2 class="relative group"><a id="why-this-matters-for-students" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-this-matters-for-students"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why This Matters for Students</span></h2> <p data-svelte-h="svelte-13v0ev8">As a student, understanding Open R1 and the role of reinforcement learning in LLMs is valuable because:</p> <ul data-svelte-h="svelte-1x7fhn4"><li>It shows you how cutting-edge AI is developed</li> <li>It gives you hands-on opportunities to learn and contribute</li> <li>It helps you understand where AI technology is heading</li> <li>It opens doors to future career opportunities in AI</li></ul> <h2 class="relative group"><a id="chapter-overview" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chapter-overview"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chapter Overview</span></h2> <p data-svelte-h="svelte-of01q">This chapter is divided into four sections, each focusing on a different aspect of Open R1:</p> <h3 class="relative group"><a id="1-introduction-to-reinforcement-learning-and-its-role-in-llms" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-introduction-to-reinforcement-learning-and-its-role-in-llms"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1️⃣ Introduction to Reinforcement Learning and its Role in LLMs</span></h3> <p data-svelte-h="svelte-cvbn0t">We’ll explore the basics of Reinforcement Learning (RL) and its role in training LLMs.</p> <ul data-svelte-h="svelte-1oq3sgv"><li>What is RL?</li> <li>How is RL used in LLMs?</li> <li>What is DeepSeek R1?</li> <li>What are the key innovations of DeepSeek R1?</li></ul> <h3 class="relative group"><a id="2-understanding-the-deepseek-r1-paper" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-understanding-the-deepseek-r1-paper"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2️⃣ Understanding the DeepSeek R1 Paper</span></h3> <p data-svelte-h="svelte-1o5dwft">We’ll break down the research paper that inspired <a href="https://huggingface.co/open-r1" rel="nofollow">Open R1</a>:</p> <ul data-svelte-h="svelte-qtiizc"><li>Key innovations and breakthroughs</li> <li>The training process and architecture</li> <li>Results and their significance</li></ul> <h3 class="relative group"><a id="3-implementing-grpo-in-trl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-implementing-grpo-in-trl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3️⃣ Implementing GRPO in TRL</span></h3> <p data-svelte-h="svelte-weh7pe">We’ll get practical with code examples:</p> <ul data-svelte-h="svelte-h3ghvk"><li>How to use the Transformer Reinforcement Learning (TRL) library</li> <li>Setting up GRPO training</li></ul> <h3 class="relative group"><a id="4-practical-use-case-to-align-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-practical-use-case-to-align-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4️⃣ Practical use case to align a model</span></h3> <p data-svelte-h="svelte-1wapewh">We’ll look at a practical use case to align a model using Open R1.</p> <ul data-svelte-h="svelte-3or5pv"><li>How to train a model using GRPO in TRL</li> <li>Share your model on the <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a></li></ul> <h2 class="relative group"><a id="prerequisites" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prerequisites"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prerequisites</span></h2> <p data-svelte-h="svelte-1fp4txr">To get the most out of this chapter, it’s helpful to have:</p> <ul data-svelte-h="svelte-1x6c8jl"><li>Solid understanding of Python programming</li> <li>Familiarity with machine learning concepts</li> <li>Interest in AI and language models</li></ul> <p data-svelte-h="svelte-152yrq3">Don’t worry if you’re missing some of these – we’ll explain key concepts as we go along! 🚀</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1x0x43v">If you don’t have all the prerequisites, check out this <a href="/course/chapter1/1">course</a> from units 1 to 11</p></div> <h2 class="relative group"><a id="how-to-use-this-chapter" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-use-this-chapter"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to Use This Chapter</span></h2> <ol data-svelte-h="svelte-1sl8uk0"><li><strong>Read Sequentially</strong>: The sections build on each other, so it’s best to read them in order</li> <li><strong>Share Notes</strong>: Write down key concepts and questions and discuss them with in the community in <a href="https://discord.gg/F3vZujJH" rel="nofollow">Discord</a></li> <li><strong>Try the Code</strong>: When we get to practical examples, try them yourself</li> <li><strong>Join the Community</strong>: Use the resources we provide to connect with other learners</li></ol> <p data-svelte-h="svelte-c4dvpf">Let’s begin our exploration of Open R1 and discover how you can be part of making AI more accessible to everyone! 🚀</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/en/chapter12/1.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1y0degu = {
assets: "/docs/course/pr_1069/en",
base: "/docs/course/pr_1069/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/course/pr_1069/en/_app/immutable/entry/start.c5306bb2.js"),
import("/docs/course/pr_1069/en/_app/immutable/entry/app.4264f5f8.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 28],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
25.1 kB
·
Xet hash:
4f5a07ca3213579505c85d4ccc4113a126b4703d8df937e2c8e033b567e85c51

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.