Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1213 /es /chapter12 /1.html

rtrm

3 months ago

download

raw

27.6 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Open R1 para estudiantes","local":"open-r1-para-estudiantes","sections":[{"title":"Lo que aprenderás","local":"lo-que-aprenderás","sections":[],"depth":2},{"title":"Por qué esto importa para estudiantes","local":"por-qué-esto-importa-para-estudiantes","sections":[],"depth":2},{"title":"Resumen del capítulo","local":"resumen-del-capítulo","sections":[{"title":"1️⃣ Introducción al aprendizaje por refuerzo y su papel en los LLM","local":"1-introducción-al-aprendizaje-por-refuerzo-y-su-papel-en-los-llm","sections":[],"depth":3},{"title":"2️⃣ Entender el paper de DeepSeek R1","local":"2-entender-el-paper-de-deepseek-r1","sections":[],"depth":3},{"title":"3️⃣ Implementar GRPO en TRL","local":"3-implementar-grpo-en-trl","sections":[],"depth":3},{"title":"4️⃣ Caso práctico para alinear un modelo","local":"4-caso-práctico-para-alinear-un-modelo","sections":[],"depth":3}],"depth":2},{"title":"Requisitos previos","local":"requisitos-previos","sections":[],"depth":2},{"title":"Cómo usar este capítulo","local":"cómo-usar-este-capítulo","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1213/es/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/entry/start.36d27295.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/scheduler.505acc25.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/singletons.6865fa96.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/index.001f95d5.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/paths.ec28c642.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/entry/app.3b43d7f3.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/preload-helper.8c2bab6b.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/index.e22abd30.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/nodes/0.e2c0ea78.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/nodes/28.fe7d6f13.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.a144e953.js">
	<link rel="modulepreload" href="/docs/course/pr_1213/es/_app/immutable/chunks/CodeBlock.f6688f67.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Open R1 para estudiantes","local":"open-r1-para-estudiantes","sections":[{"title":"Lo que aprenderás","local":"lo-que-aprenderás","sections":[],"depth":2},{"title":"Por qué esto importa para estudiantes","local":"por-qué-esto-importa-para-estudiantes","sections":[],"depth":2},{"title":"Resumen del capítulo","local":"resumen-del-capítulo","sections":[{"title":"1️⃣ Introducción al aprendizaje por refuerzo y su papel en los LLM","local":"1-introducción-al-aprendizaje-por-refuerzo-y-su-papel-en-los-llm","sections":[],"depth":3},{"title":"2️⃣ Entender el paper de DeepSeek R1","local":"2-entender-el-paper-de-deepseek-r1","sections":[],"depth":3},{"title":"3️⃣ Implementar GRPO en TRL","local":"3-implementar-grpo-en-trl","sections":[],"depth":3},{"title":"4️⃣ Caso práctico para alinear un modelo","local":"4-caso-práctico-para-alinear-un-modelo","sections":[],"depth":3}],"depth":2},{"title":"Requisitos previos","local":"requisitos-previos","sections":[],"depth":2},{"title":"Cómo usar este capítulo","local":"cómo-usar-este-capítulo","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="open-r1-para-estudiantes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#open-r1-para-estudiantes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Open R1 para estudiantes</span></h1> <p data-svelte-h="svelte-1ozg3v9">Bienvenido a un recorrido por el mundo de la IA de código abierto con aprendizaje por refuerzo. Este capítulo está pensado para ayudarte a entender el aprendizaje por refuerzo y su papel en los LLM.</p> <p data-svelte-h="svelte-1ga1i96">También exploraremos <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>, un proyecto comunitario que busca hacer más accesible la IA avanzada. En concreto, este curso pretende ayudar a estudiantes y personas que están aprendiendo a usar y contribuir a <a href="https://github.com/huggingface/open-r1" rel="nofollow">Open R1</a>.</p> <h2 class="relative group"><a id="lo-que-aprenderás" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lo-que-aprenderás"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Lo que aprenderás</span></h2> <p data-svelte-h="svelte-1iq6bzf">En este capítulo desglosaremos conceptos complejos en partes manejables y veremos cómo puedes formar parte de este proyecto para conseguir que los LLM razonen sobre problemas complejos.</p> <p data-svelte-h="svelte-1ip4gfc">Los LLM han mostrado un rendimiento excelente en muchas tareas generativas. Sin embargo, hasta hace poco les costaba resolver problemas complejos que requieren razonamiento. Por ejemplo, tienen más dificultades con acertijos o problemas matemáticos que exigen varios pasos.</p> <p data-svelte-h="svelte-xaha2d">Open R1 es un proyecto que busca hacer que los LLM razonen sobre problemas complejos. Para ello usa aprendizaje por refuerzo y anima a los LLM a “pensar” y razonar.</p> <p data-svelte-h="svelte-wz4v3x">En términos sencillos, el modelo se entrena para generar pensamientos además de respuestas, y para estructurar esos pensamientos y respuestas de forma que el usuario pueda manejarlos por separado.</p> <p data-svelte-h="svelte-pyrm32">Veamos un ejemplo. Si tuviéramos que resolver el siguiente problema, podríamos pensar así:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Problem: <span class="hljs-string">"I have 3 apples and 2 oranges. How many pieces of fruit do I have in total?"</span>

	Thought: <span class="hljs-string">"I need to add the number of apples and oranges to get the total number of pieces of fruit."</span>

	Answer: <span class="hljs-string">"5"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x33itm">Luego podemos estructurar ese pensamiento y esa respuesta para que el usuario las gestione por separado. En tareas de razonamiento, se puede entrenar a los LLM para generar pensamientos y respuestas con el siguiente formato:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><think>I need to add the number of apples and oranges to get the total number of pieces of fruit.</think>
	5<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jkrljq">Como usuarios, después podemos extraer el pensamiento y la respuesta de la salida del modelo y usarlos para resolver el problema.</p> <h2 class="relative group"><a id="por-qué-esto-importa-para-estudiantes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#por-qué-esto-importa-para-estudiantes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Por qué esto importa para estudiantes</span></h2> <p data-svelte-h="svelte-jennwe">Como estudiante, entender Open R1 y el papel del aprendizaje por refuerzo en los LLM es valioso porque:</p> <ul data-svelte-h="svelte-1svanzn"><li>Muestra cómo se desarrolla la IA de vanguardia.</li> <li>Te da oportunidades prácticas para aprender y contribuir.</li> <li>Te ayuda a entender hacia dónde se dirige la tecnología de IA.</li> <li>Abre puertas a futuras oportunidades profesionales en IA.</li></ul> <h2 class="relative group"><a id="resumen-del-capítulo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#resumen-del-capítulo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Resumen del capítulo</span></h2> <p data-svelte-h="svelte-1lvgmb4">Este capítulo se divide en cuatro secciones, cada una centrada en un aspecto distinto de Open R1:</p> <h3 class="relative group"><a id="1-introducción-al-aprendizaje-por-refuerzo-y-su-papel-en-los-llm" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-introducción-al-aprendizaje-por-refuerzo-y-su-papel-en-los-llm"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1️⃣ Introducción al aprendizaje por refuerzo y su papel en los LLM</span></h3> <p data-svelte-h="svelte-jppug6">Exploraremos los conceptos básicos de Reinforcement Learning (RL, aprendizaje por refuerzo) y su papel en el entrenamiento de LLM.</p> <ul data-svelte-h="svelte-hkjj56"><li>¿Qué es RL?</li> <li>¿Cómo se usa RL en los LLM?</li> <li>¿Qué es DeepSeek R1?</li> <li>¿Cuáles son las innovaciones clave de DeepSeek R1?</li></ul> <h3 class="relative group"><a id="2-entender-el-paper-de-deepseek-r1" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-entender-el-paper-de-deepseek-r1"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2️⃣ Entender el paper de DeepSeek R1</span></h3> <p data-svelte-h="svelte-ehdqkp">Desglosaremos el paper de investigación que inspiró a <a href="https://huggingface.co/open-r1" rel="nofollow">Open R1</a>:</p> <ul data-svelte-h="svelte-1pvm8xm"><li>Innovaciones y avances clave.</li> <li>El proceso de entrenamiento y la arquitectura.</li> <li>Los resultados y su importancia.</li></ul> <h3 class="relative group"><a id="3-implementar-grpo-en-trl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-implementar-grpo-en-trl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3️⃣ Implementar GRPO en TRL</span></h3> <p data-svelte-h="svelte-rulp29">Pasaremos a la práctica con ejemplos de código:</p> <ul data-svelte-h="svelte-qcs3ft"><li>Cómo usar la librería Transformer Reinforcement Learning (TRL).</li> <li>Configurar el entrenamiento con GRPO.</li></ul> <h3 class="relative group"><a id="4-caso-práctico-para-alinear-un-modelo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#4-caso-práctico-para-alinear-un-modelo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>4️⃣ Caso práctico para alinear un modelo</span></h3> <p data-svelte-h="svelte-fq631f">Veremos un caso práctico para alinear un modelo usando Open R1.</p> <ul data-svelte-h="svelte-1vz7py5"><li>Cómo entrenar un modelo usando GRPO en TRL.</li> <li>Compartir tu modelo en el <a href="https://huggingface.co/models" rel="nofollow">Hugging Face Hub</a>.</li></ul> <h2 class="relative group"><a id="requisitos-previos" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#requisitos-previos"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Requisitos previos</span></h2> <p data-svelte-h="svelte-5vwp88">Para aprovechar mejor este capítulo, ayuda tener:</p> <ul data-svelte-h="svelte-1m7k029"><li>Buen dominio de programación en Python.</li> <li>Familiaridad con conceptos de machine learning.</li> <li>Interés por la IA y los modelos de lenguaje.</li></ul> <p data-svelte-h="svelte-1rwk7to">No te preocupes si no tienes todo eso todavía: explicaremos los conceptos clave a medida que avancemos.</p> <blockquote class="tip" data-svelte-h="svelte-1ibcq5j"><p>Si no tienes todos los requisitos previos, revisa este <a href="/course/chapter1/1">curso</a> desde las unidades 1 hasta la 11.</p></blockquote> <h2 class="relative group"><a id="cómo-usar-este-capítulo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#cómo-usar-este-capítulo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Cómo usar este capítulo</span></h2> <ol data-svelte-h="svelte-d9rola"><li><strong>Lee en orden</strong>: Las secciones se apoyan unas en otras, así que conviene seguir la secuencia.</li> <li><strong>Toma notas</strong>: Anota conceptos clave y preguntas, y compártelos con la comunidad en <a href="https://discord.gg/UrrTSsSyjb" rel="nofollow">Discord</a>.</li> <li><strong>Prueba el código</strong>: Cuando lleguemos a los ejemplos prácticos, ejecútalos por tu cuenta.</li> <li><strong>Únete a la comunidad</strong>: Usa los recursos que proporcionamos para conectar con otras personas que están aprendiendo.</li></ol> <p data-svelte-h="svelte-18olqxa">Empecemos a explorar Open R1 y a ver cómo puedes participar en hacer que la IA sea más accesible para todo el mundo.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/es/chapter12/1.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1nznq34 = {
	assets: "/docs/course/pr_1213/es",
	base: "/docs/course/pr_1213/es",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1213/es/_app/immutable/entry/start.36d27295.js"),
	import("/docs/course/pr_1213/es/_app/immutable/entry/app.3b43d7f3.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 28],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 27.6 kB
Xet hash:: 550218d3d2d4ad41687774482a3bd684b22e808e7df7e7a536bb71c9d8de944d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.