Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / course /pr_1021 /en /_app /immutable /nodes /28.5215aa4e.js

rtrm's picture

about 2 months ago

14.6 kB

	import{s as Mt,o as Tt,n as Ct}from"../chunks/scheduler.37c15a92.js";import{S as Lt,i as bt,g as a,s as i,r as u,A as It,h as o,f as n,c as s,j as vt,u as h,x as r,k as yt,y as kt,a as l,v as m,d as f,t as d,w as g}from"../chunks/index.7cb9c9b8.js";import{T as _t}from"../chunks/Tip.d10b3fc9.js";import{C as xt}from"../chunks/CodeBlock.abae2786.js";import{H as c,E as Rt}from"../chunks/getInferenceSnippets.a2135f3c.js";function Ut(ae){let p,v='If you don’t have all the prerequisites, check out this <a href="/course/chapter1/1">course</a> from units 1 to 11';return{c(){p=a("p"),p.innerHTML=v},l($){p=o($,"P",{"data-svelte-h":!0}),r(p)!=="svelte-1x0x43v"&&(p.innerHTML=v)},m($,ie){l($,p,ie)},p:Ct,d($){$&&n(p)}}}function Ht(ae){let p,v,$,ie,y,oe,x,Ve="Welcome to an exciting journey into the world of open-source AI with reinforcement learning! This chapter is designed to help students understand reinforcement learning and its role in LLMs.",re,M,Ee='We will also explore <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>, a groundbreaking community project that’s making advanced AI accessible to everyone. Specifically, this course is to help students and learners to use and contribute to <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>.',pe,T,ue,C,De="In this chapter, we’ll break down complex concepts into easy-to-understand pieces and show you how you can be part of this exciting project to make LLMs reason on complex problems.",he,L,Qe="LLMs have shown excellent performance on many generative tasks. However, up until recently they have struggled on complex problems that require reasoning. For example, they struggle to deal with puzzles or math problems that require multiple steps of reasoning.",me,b,Xe="Open R1 is a project that aims to make LLMs reason on complex problems. It does this by using reinforcement learning to encourage LLMs to ‘think’ and reason.",fe,I,Ne="In simple terms, the model is train to generate thoughts as well as outputs, and to structure these thoughts and outputs so that they can be handled separately by the user.",de,k,Ke="Let’s take a look at an example. A we gave ourself the task of solving the following problem, we might think like this:",ge,_,$e,R,et="We can then structure this thought and answer so that they can be handled separately by the user. For reasoning tasks, LLMs can be trained to generate thoughts and answers in the following format:",ce,U,we,H,tt="As a user, we can then extract the thought and answer from the model’s output and use them to solve the problem.",ve,J,ye,P,nt="As a student, understanding Open R1 and the role of reinforcement learning in LLMs is valuable because:",xe,G,lt="<li>It shows you how cutting-edge AI is developed</li> <li>It gives you hands-on opportunities to learn and contribute</li> <li>It helps you understand where AI technology is heading</li> <li>It opens doors to future career opportunities in AI</li>",Me,j,Te,W,it="This chapter is divided into four sections, each focusing on a different aspect of Open R1:",Ce,B,Le,S,st="We’ll explore the basics of Reinforcement Learning (RL) and its role in training LLMs.",be,Z,at="<li>What is RL?</li> <li>How is RL used in LLMs?</li> <li>What is DeepSeek R1?</li> <li>What are the key innovations of DeepSeek R1?</li>",Ie,q,ke,O,ot='We’ll break down the research paper that inspired <a href="https://huggingface.co/open-r1" rel="nofollow">Open R1</a>:',_e,Y,rt="<li>Key innovations and breakthroughs</li> <li>The training process and architecture</li> <li>Results and their significance</li>",Re,z,Ue,A,pt="We’ll get practical with code examples:",He,F,ut="<li>How to use the Transformer Reinforcement Learning (TRL) library</li> <li>Setting up GRPO training</li>",Je,V,Pe,E,ht="We’ll look at a practical use case to align a model using Open R1.",Ge,D,mt='<li>How to train a model using GRPO in TRL</li> <li>Share your model on the <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a></li>',je,Q,We,X,ft="To get the most out of this chapter, it’s helpful to have:",Be,N,dt="<li>Solid understanding of Python programming</li> <li>Familiarity with machine learning concepts</li> <li>Interest in AI and language models</li>",Se,K,gt="Don’t worry if you’re missing some of these – we’ll explain key concepts as we go along! 🚀",Ze,w,qe,ee,Oe,te,$t='<li><strong>Read Sequentially</strong>: The sections build on each other, so it’s best to read them in order</li> <li><strong>Share Notes</strong>: Write down key concepts and questions and discuss them with in the community in <a href="https://discord.gg/F3vZujJH" rel="nofollow">Discord</a></li> <li><strong>Try the Code</strong>: When we get to practical examples, try them yourself</li> <li><strong>Join the Community</strong>: Use the resources we provide to connect with other learners</li>',Ye,ne,ct="Let’s begin our exploration of Open R1 and discover how you can be part of making AI more accessible to everyone! 🚀",ze,le,Ae,se,Fe;return y=new c({props:{title:"Open R1 for Students",local:"open-r1-for-students",headingTag:"h1"}}),T=new c({props:{title:"What You’ll Learn",local:"what-youll-learn",headingTag:"h2"}}),_=new xt({props:{code:"UHJvYmxlbSUzQSUyMCUyMkklMjBoYXZlJTIwMyUyMGFwcGxlcyUyMGFuZCUyMDIlMjBvcmFuZ2VzLiUyMEhvdyUyMG1hbnklMjBwaWVjZXMlMjBvZiUyMGZydWl0JTIwZG8lMjBJJTIwaGF2ZSUyMGluJTIwdG90YWwlM0YlMjIlMEElMEFUaG91Z2h0JTNBJTIwJTIySSUyMG5lZWQlMjB0byUyMGFkZCUyMHRoZSUyMG51bWJlciUyMG9mJTIwYXBwbGVzJTIwYW5kJTIwb3JhbmdlcyUyMHRvJTIwZ2V0JTIwdGhlJTIwdG90YWwlMjBudW1iZXIlMjBvZiUyMHBpZWNlcyUyMG9mJTIwZnJ1aXQuJTIyJTBBJTBBQW5zd2VyJTNBJTIwJTIyNSUyMg==",highlighted:`Problem: <span class="hljs-string">"I have 3 apples and 2 oranges. How many pieces of fruit do I have in total?"</span>

	Thought: <span class="hljs-string">"I need to add the number of apples and oranges to get the total number of pieces of fruit."</span>

	Answer: <span class="hljs-string">"5"</span>`,wrap:!1}}),U=new xt({props:{code:"JTNDdGhpbmslM0VJJTIwbmVlZCUyMHRvJTIwYWRkJTIwdGhlJTIwbnVtYmVyJTIwb2YlMjBhcHBsZXMlMjBhbmQlMjBvcmFuZ2VzJTIwdG8lMjBnZXQlMjB0aGUlMjB0b3RhbCUyMG51bWJlciUyMG9mJTIwcGllY2VzJTIwb2YlMjBmcnVpdC4lM0MlMkZ0aGluayUzRSUwQTU=",highlighted:`<think>I need to add the number of apples and oranges to get the total number of pieces of fruit.</think>
	5`,wrap:!1}}),J=new c({props:{title:"Why This Matters for Students",local:"why-this-matters-for-students",headingTag:"h2"}}),j=new c({props:{title:"Chapter Overview",local:"chapter-overview",headingTag:"h2"}}),B=new c({props:{title:"1️⃣ Introduction to Reinforcement Learning and its Role in LLMs",local:"1-introduction-to-reinforcement-learning-and-its-role-in-llms",headingTag:"h3"}}),q=new c({props:{title:"2️⃣ Understanding the DeepSeek R1 Paper",local:"2-understanding-the-deepseek-r1-paper",headingTag:"h3"}}),z=new c({props:{title:"3️⃣ Implementing GRPO in TRL",local:"3-implementing-grpo-in-trl",headingTag:"h3"}}),V=new c({props:{title:"4️⃣ Practical use case to align a model",local:"4-practical-use-case-to-align-a-model",headingTag:"h3"}}),Q=new c({props:{title:"Prerequisites",local:"prerequisites",headingTag:"h2"}}),w=new _t({props:{$$slots:{default:[Ut]},$$scope:{ctx:ae}}}),ee=new c({props:{title:"How to Use This Chapter",local:"how-to-use-this-chapter",headingTag:"h2"}}),le=new Rt({props:{source:"https://github.com/huggingface/course/blob/main/chapters/en/chapter12/1.mdx"}}),{c(){p=a("meta"),v=i(),$=a("p"),ie=i(),u(y.$$.fragment),oe=i(),x=a("p"),x.textContent=Ve,re=i(),M=a("p"),M.innerHTML=Ee,pe=i(),u(T.$$.fragment),ue=i(),C=a("p"),C.textContent=De,he=i(),L=a("p"),L.textContent=Qe,me=i(),b=a("p"),b.textContent=Xe,fe=i(),I=a("p"),I.textContent=Ne,de=i(),k=a("p"),k.textContent=Ke,ge=i(),u(_.$$.fragment),$e=i(),R=a("p"),R.textContent=et,ce=i(),u(U.$$.fragment),we=i(),H=a("p"),H.textContent=tt,ve=i(),u(J.$$.fragment),ye=i(),P=a("p"),P.textContent=nt,xe=i(),G=a("ul"),G.innerHTML=lt,Me=i(),u(j.$$.fragment),Te=i(),W=a("p"),W.textContent=it,Ce=i(),u(B.$$.fragment),Le=i(),S=a("p"),S.textContent=st,be=i(),Z=a("ul"),Z.innerHTML=at,Ie=i(),u(q.$$.fragment),ke=i(),O=a("p"),O.innerHTML=ot,_e=i(),Y=a("ul"),Y.innerHTML=rt,Re=i(),u(z.$$.fragment),Ue=i(),A=a("p"),A.textContent=pt,He=i(),F=a("ul"),F.innerHTML=ut,Je=i(),u(V.$$.fragment),Pe=i(),E=a("p"),E.textContent=ht,Ge=i(),D=a("ul"),D.innerHTML=mt,je=i(),u(Q.$$.fragment),We=i(),X=a("p"),X.textContent=ft,Be=i(),N=a("ul"),N.innerHTML=dt,Se=i(),K=a("p"),K.textContent=gt,Ze=i(),u(w.$$.fragment),qe=i(),u(ee.$$.fragment),Oe=i(),te=a("ol"),te.innerHTML=$t,Ye=i(),ne=a("p"),ne.textContent=ct,ze=i(),u(le.$$.fragment),Ae=i(),se=a("p"),this.h()},l(e){const t=It("svelte-u9bgzb",document.head);p=o(t,"META",{name:!0,content:!0}),t.forEach(n),v=s(e),$=o(e,"P",{}),vt($).forEach(n),ie=s(e),h(y.$$.fragment,e),oe=s(e),x=o(e,"P",{"data-svelte-h":!0}),r(x)!=="svelte-d5kmhw"&&(x.textContent=Ve),re=s(e),M=o(e,"P",{"data-svelte-h":!0}),r(M)!=="svelte-1qhele0"&&(M.innerHTML=Ee),pe=s(e),h(T.$$.fragment,e),ue=s(e),C=o(e,"P",{"data-svelte-h":!0}),r(C)!=="svelte-gyca4k"&&(C.textContent=De),he=s(e),L=o(e,"P",{"data-svelte-h":!0}),r(L)!=="svelte-1dnu3fz"&&(L.textContent=Qe),me=s(e),b=o(e,"P",{"data-svelte-h":!0}),r(b)!=="svelte-1a9mpqp"&&(b.textContent=Xe),fe=s(e),I=o(e,"P",{"data-svelte-h":!0}),r(I)!=="svelte-hzj1fi"&&(I.textContent=Ne),de=s(e),k=o(e,"P",{"data-svelte-h":!0}),r(k)!=="svelte-13kwili"&&(k.textContent=Ke),ge=s(e),h(_.$$.fragment,e),$e=s(e),R=o(e,"P",{"data-svelte-h":!0}),r(R)!=="svelte-kj6jrw"&&(R.textContent=et),ce=s(e),h(U.$$.fragment,e),we=s(e),H=o(e,"P",{"data-svelte-h":!0}),r(H)!=="svelte-cyhm3e"&&(H.textContent=tt),ve=s(e),h(J.$$.fragment,e),ye=s(e),P=o(e,"P",{"data-svelte-h":!0}),r(P)!=="svelte-13v0ev8"&&(P.textContent=nt),xe=s(e),G=o(e,"UL",{"data-svelte-h":!0}),r(G)!=="svelte-1x7fhn4"&&(G.innerHTML=lt),Me=s(e),h(j.$$.fragment,e),Te=s(e),W=o(e,"P",{"data-svelte-h":!0}),r(W)!=="svelte-of01q"&&(W.textContent=it),Ce=s(e),h(B.$$.fragment,e),Le=s(e),S=o(e,"P",{"data-svelte-h":!0}),r(S)!=="svelte-cvbn0t"&&(S.textContent=st),be=s(e),Z=o(e,"UL",{"data-svelte-h":!0}),r(Z)!=="svelte-1oq3sgv"&&(Z.innerHTML=at),Ie=s(e),h(q.$$.fragment,e),ke=s(e),O=o(e,"P",{"data-svelte-h":!0}),r(O)!=="svelte-1o5dwft"&&(O.innerHTML=ot),_e=s(e),Y=o(e,"UL",{"data-svelte-h":!0}),r(Y)!=="svelte-qtiizc"&&(Y.innerHTML=rt),Re=s(e),h(z.$$.fragment,e),Ue=s(e),A=o(e,"P",{"data-svelte-h":!0}),r(A)!=="svelte-weh7pe"&&(A.textContent=pt),He=s(e),F=o(e,"UL",{"data-svelte-h":!0}),r(F)!=="svelte-h3ghvk"&&(F.innerHTML=ut),Je=s(e),h(V.$$.fragment,e),Pe=s(e),E=o(e,"P",{"data-svelte-h":!0}),r(E)!=="svelte-1wapewh"&&(E.textContent=ht),Ge=s(e),D=o(e,"UL",{"data-svelte-h":!0}),r(D)!=="svelte-3or5pv"&&(D.innerHTML=mt),je=s(e),h(Q.$$.fragment,e),We=s(e),X=o(e,"P",{"data-svelte-h":!0}),r(X)!=="svelte-1fp4txr"&&(X.textContent=ft),Be=s(e),N=o(e,"UL",{"data-svelte-h":!0}),r(N)!=="svelte-1x6c8jl"&&(N.innerHTML=dt),Se=s(e),K=o(e,"P",{"data-svelte-h":!0}),r(K)!=="svelte-152yrq3"&&(K.textContent=gt),Ze=s(e),h(w.$$.fragment,e),qe=s(e),h(ee.$$.fragment,e),Oe=s(e),te=o(e,"OL",{"data-svelte-h":!0}),r(te)!=="svelte-1sl8uk0"&&(te.innerHTML=$t),Ye=s(e),ne=o(e,"P",{"data-svelte-h":!0}),r(ne)!=="svelte-c4dvpf"&&(ne.textContent=ct),ze=s(e),h(le.$$.fragment,e),Ae=s(e),se=o(e,"P",{}),vt(se).forEach(n),this.h()},h(){yt(p,"name","hf:doc:metadata"),yt(p,"content",Jt)},m(e,t){kt(document.head,p),l(e,v,t),l(e,$,t),l(e,ie,t),m(y,e,t),l(e,oe,t),l(e,x,t),l(e,re,t),l(e,M,t),l(e,pe,t),m(T,e,t),l(e,ue,t),l(e,C,t),l(e,he,t),l(e,L,t),l(e,me,t),l(e,b,t),l(e,fe,t),l(e,I,t),l(e,de,t),l(e,k,t),l(e,ge,t),m(_,e,t),l(e,$e,t),l(e,R,t),l(e,ce,t),m(U,e,t),l(e,we,t),l(e,H,t),l(e,ve,t),m(J,e,t),l(e,ye,t),l(e,P,t),l(e,xe,t),l(e,G,t),l(e,Me,t),m(j,e,t),l(e,Te,t),l(e,W,t),l(e,Ce,t),m(B,e,t),l(e,Le,t),l(e,S,t),l(e,be,t),l(e,Z,t),l(e,Ie,t),m(q,e,t),l(e,ke,t),l(e,O,t),l(e,_e,t),l(e,Y,t),l(e,Re,t),m(z,e,t),l(e,Ue,t),l(e,A,t),l(e,He,t),l(e,F,t),l(e,Je,t),m(V,e,t),l(e,Pe,t),l(e,E,t),l(e,Ge,t),l(e,D,t),l(e,je,t),m(Q,e,t),l(e,We,t),l(e,X,t),l(e,Be,t),l(e,N,t),l(e,Se,t),l(e,K,t),l(e,Ze,t),m(w,e,t),l(e,qe,t),m(ee,e,t),l(e,Oe,t),l(e,te,t),l(e,Ye,t),l(e,ne,t),l(e,ze,t),m(le,e,t),l(e,Ae,t),l(e,se,t),Fe=!0},p(e,[t]){const wt={};t&2&&(wt.$$scope={dirty:t,ctx:e}),w.$set(wt)},i(e){Fe\|\|(f(y.$$.fragment,e),f(T.$$.fragment,e),f(_.$$.fragment,e),f(U.$$.fragment,e),f(J.$$.fragment,e),f(j.$$.fragment,e),f(B.$$.fragment,e),f(q.$$.fragment,e),f(z.$$.fragment,e),f(V.$$.fragment,e),f(Q.$$.fragment,e),f(w.$$.fragment,e),f(ee.$$.fragment,e),f(le.$$.fragment,e),Fe=!0)},o(e){d(y.$$.fragment,e),d(T.$$.fragment,e),d(_.$$.fragment,e),d(U.$$.fragment,e),d(J.$$.fragment,e),d(j.$$.fragment,e),d(B.$$.fragment,e),d(q.$$.fragment,e),d(z.$$.fragment,e),d(V.$$.fragment,e),d(Q.$$.fragment,e),d(w.$$.fragment,e),d(ee.$$.fragment,e),d(le.$$.fragment,e),Fe=!1},d(e){e&&(n(v),n($),n(ie),n(oe),n(x),n(re),n(M),n(pe),n(ue),n(C),n(he),n(L),n(me),n(b),n(fe),n(I),n(de),n(k),n(ge),n($e),n(R),n(ce),n(we),n(H),n(ve),n(ye),n(P),n(xe),n(G),n(Me),n(Te),n(W),n(Ce),n(Le),n(S),n(be),n(Z),n(Ie),n(ke),n(O),n(_e),n(Y),n(Re),n(Ue),n(A),n(He),n(F),n(Je),n(Pe),n(E),n(Ge),n(D),n(je),n(We),n(X),n(Be),n(N),n(Se),n(K),n(Ze),n(qe),n(Oe),n(te),n(Ye),n(ne),n(ze),n(Ae),n(se)),n(p),g(y,e),g(T,e),g(_,e),g(U,e),g(J,e),g(j,e),g(B,e),g(q,e),g(z,e),g(V,e),g(Q,e),g(w,e),g(ee,e),g(le,e)}}}const Jt='{"title":"Open R1 for Students","local":"open-r1-for-students","sections":[{"title":"What You’ll Learn","local":"what-youll-learn","sections":[],"depth":2},{"title":"Why This Matters for Students","local":"why-this-matters-for-students","sections":[],"depth":2},{"title":"Chapter Overview","local":"chapter-overview","sections":[{"title":"1️⃣ Introduction to Reinforcement Learning and its Role in LLMs","local":"1-introduction-to-reinforcement-learning-and-its-role-in-llms","sections":[],"depth":3},{"title":"2️⃣ Understanding the DeepSeek R1 Paper","local":"2-understanding-the-deepseek-r1-paper","sections":[],"depth":3},{"title":"3️⃣ Implementing GRPO in TRL","local":"3-implementing-grpo-in-trl","sections":[],"depth":3},{"title":"4️⃣ Practical use case to align a model","local":"4-practical-use-case-to-align-a-model","sections":[],"depth":3}],"depth":2},{"title":"Prerequisites","local":"prerequisites","sections":[],"depth":2},{"title":"How to Use This Chapter","local":"how-to-use-this-chapter","sections":[],"depth":2}],"depth":1}';function Pt(ae){return Tt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Zt extends Lt{constructor(p){super(),bt(this,p,Pt,Ht,Mt,{})}}export{Zt as component};

Xet Storage Details

Size:: 14.6 kB
Xet hash:: 56b5718864464521d437f88c042dd2622c2316de702f288ec9f5a267ab929f5f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.