Buckets:

HuggingFaceDocBuilder's picture
download
raw
31.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Evaluación Personalizada en un Dominio&quot;,&quot;local&quot;:&quot;evaluación-personalizada-en-un-dominio&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Diseñando Tu Estrategia de Evaluación&quot;,&quot;local&quot;:&quot;diseñando-tu-estrategia-de-evaluación&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Implementación con LightEval&quot;,&quot;local&quot;:&quot;implementación-con-lighteval&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Métricas Personalizadas&quot;,&quot;local&quot;:&quot;métricas-personalizadas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Creación de Conjuntos de Datos&quot;,&quot;local&quot;:&quot;creación-de-conjuntos-de-datos&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Mejores Prácticas&quot;,&quot;local&quot;:&quot;mejores-prácticas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Referencias&quot;,&quot;local&quot;:&quot;referencias&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/smol-course/pr_296/es/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/entry/start.cd6905d6.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/scheduler.f38f3a0e.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/singletons.114ea3ca.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/paths.88e2d765.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/entry/app.d412b8f0.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/preload-helper.6811d58e.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/index.a03143fc.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/nodes/0.c409a4e3.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/nodes/15.68d1c497.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/CopyLLMTxtMenu.1ddc8e76.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.36c7bd5b.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/CodeBlock.d5e3cacc.js">
<link rel="modulepreload" href="/docs/smol-course/pr_296/es/_app/immutable/chunks/CourseFloatingBanner.9d00122a.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Evaluación Personalizada en un Dominio&quot;,&quot;local&quot;:&quot;evaluación-personalizada-en-un-dominio&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Diseñando Tu Estrategia de Evaluación&quot;,&quot;local&quot;:&quot;diseñando-tu-estrategia-de-evaluación&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Implementación con LightEval&quot;,&quot;local&quot;:&quot;implementación-con-lighteval&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Métricas Personalizadas&quot;,&quot;local&quot;:&quot;métricas-personalizadas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Creación de Conjuntos de Datos&quot;,&quot;local&quot;:&quot;creación-de-conjuntos-de-datos&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Mejores Prácticas&quot;,&quot;local&quot;:&quot;mejores-prácticas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Referencias&quot;,&quot;local&quot;:&quot;referencias&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="evaluación-personalizada-en-un-dominio" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluación-personalizada-en-un-dominio"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluación Personalizada en un Dominio</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0" style=""><a href="https://discuss.huggingface.co/t/chapter-10-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/smol-course/blob/main/notebooks/es/4_evaluation/lighteval_evaluate_and_analyse_your_LLM.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> </div>
Aunque los &quot;benchmarks&quot; estándares proporcionan conocimiento relevante, muchas aplicaciones requieren enfoques de evaluación especializados que se adapten a dominios específicos o a casos de uso particulares. Esta guía te ayudará a crear flujos de evaluación personalizados que evalúen con precisión el rendimiento de tu modelo en tu dominio objetivo.
<h2 class="relative group"><a id="diseñando-tu-estrategia-de-evaluación" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#diseñando-tu-estrategia-de-evaluación"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Diseñando Tu Estrategia de Evaluación</span></h2> <p data-svelte-h="svelte-1uv1366">Una estrategia de evaluación personalizada exitosa comienza con objetivos claros. Es fundamental considerar qué capacidades específicas necesita demostrar tu modelo en tu dominio. Esto podría incluir conocimientos técnicos, patrones de razonamiento o formatos específicos del dominio. Documenta estos requisitos cuidadosamente; ellos guiarán tanto el diseño de tus tareas como la selección de métricas.</p> <p data-svelte-h="svelte-1ezebxq">Tu evaluación debe probar tanto casos de uso estándar como casos límite. Por ejemplo, en un dominio médico, podrías evaluar tanto escenarios comunes de diagnóstico como condiciones raras. En aplicaciones financieras, podrías probar tanto transacciones rutinarias como casos complejos que involucren múltiples monedas o condiciones especiales.</p> <h2 class="relative group"><a id="implementación-con-lighteval" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implementación-con-lighteval"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implementación con LightEval</span></h2> <p data-svelte-h="svelte-1t9nqft">LightEval proporciona un marco flexible para implementar evaluaciones personalizadas. Así es como puedes crear una tarea personalizada:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> lighteval.tasks <span class="hljs-keyword">import</span> Task, Doc
<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> SampleLevelMetric, MetricCategory, MetricUseCase
<span class="hljs-keyword">class</span> <span class="hljs-title class_">CustomEvalTask</span>(<span class="hljs-title class_ inherited__">Task</span>):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__(
name=<span class="hljs-string">&quot;custom_task&quot;</span>,
version=<span class="hljs-string">&quot;0.0.1&quot;</span>,
metrics=[<span class="hljs-string">&quot;accuracy&quot;</span>, <span class="hljs-string">&quot;f1&quot;</span>], <span class="hljs-comment"># Tus métricas elegidas</span>
description=<span class="hljs-string">&quot;Descripción de tu tarea de evaluación personalizada&quot;</span>
)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">get_prompt</span>(<span class="hljs-params">self, sample</span>):
<span class="hljs-comment"># Formatea tu entrada en un &quot;prompt&quot;</span>
<span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Question: <span class="hljs-subst">{sample[<span class="hljs-string">&#x27;question&#x27;</span>
]}</span>\nAnswer:&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">process_response</span>(<span class="hljs-params">self, response, ref</span>):
<span class="hljs-comment"># Procesa el &quot;output&quot; del modelo y compáralo con la referencia</span>
<span class="hljs-keyword">return</span> response.strip() == ref.strip()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="métricas-personalizadas" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#métricas-personalizadas"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Métricas Personalizadas</span></h2> <p data-svelte-h="svelte-1lef47m">Las tareas específicas de dominio a menudo requieren métricas especializadas. LightEval proporciona un marco flexible para crear métricas personalizadas que capturen aspectos relevantes del rendimiento del dominio:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> aenum <span class="hljs-keyword">import</span> extend_enum
<span class="hljs-keyword">from</span> lighteval.metrics <span class="hljs-keyword">import</span> Metrics, SampleLevelMetric, SampleLevelMetricGrouping
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-comment"># Definir una función de métrica a nivel de muestra</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">custom_metric</span>(<span class="hljs-params">predictions: <span class="hljs-built_in">list</span>[<span class="hljs-built_in">str</span>], formatted_doc: Doc, **kwargs</span>) -&gt; <span class="hljs-built_in">dict</span>:
<span class="hljs-string">&quot;&quot;&quot;Ejemplo de métrica que genera múltiples puntuaciones por muestra&quot;&quot;&quot;</span>
response = predictions[<span class="hljs-number">0</span>]
<span class="hljs-keyword">return</span> {
<span class="hljs-string">&quot;accuracy&quot;</span>: response == formatted_doc.choices[formatted_doc.gold_index],
<span class="hljs-string">&quot;length_match&quot;</span>: <span class="hljs-built_in">len</span>(response) == <span class="hljs-built_in">len</span>(formatted_doc.reference)
}
<span class="hljs-comment"># Crear una métrica que genere múltiples valores por muestra</span>
custom_metric_group = SampleLevelMetricGrouping(
metric_name=[<span class="hljs-string">&quot;accuracy&quot;</span>, <span class="hljs-string">&quot;length_match&quot;</span>], <span class="hljs-comment"># Nombres de submétricas</span>
higher_is_better={ <span class="hljs-comment"># define si valores más altos son mejores para cada métrica</span>
<span class="hljs-string">&quot;accuracy&quot;</span>: <span class="hljs-literal">True</span>,
<span class="hljs-string">&quot;length_match&quot;</span>: <span class="hljs-literal">True</span>
},
category=MetricCategory.CUSTOM,
use_case=MetricUseCase.SCORING,
sample_level_fn=custom_metric,
corpus_level_fn={ <span class="hljs-comment"># define cómo agregar cada métrica</span>
<span class="hljs-string">&quot;accuracy&quot;</span>: np.mean,
<span class="hljs-string">&quot;length_match&quot;</span>: np.mean
}
)
<span class="hljs-comment"># Registrar la métrica en LightEval</span>
extend_enum(Metrics, <span class="hljs-string">&quot;custom_metric_name&quot;</span>, custom_metric_group)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-90rrea">Para casos más simples donde solo necesitas un valor por muestra:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">simple_metric</span>(<span class="hljs-params">predictions: <span class="hljs-built_in">list</span>[<span class="hljs-built_in">str</span>], formatted_doc: Doc, **kwargs</span>) -&gt; <span class="hljs-built_in">bool</span>:
<span class="hljs-string">&quot;&quot;&quot;Ejemplo de métrica que genera una única puntuación por muestra&quot;&quot;&quot;</span>
response = predictions[<span class="hljs-number">0</span>]
<span class="hljs-keyword">return</span> response == formatted_doc.choices[formatted_doc.gold_index]
simple_metric_obj = SampleLevelMetric(
metric_name=<span class="hljs-string">&quot;simple_accuracy&quot;</span>,
higher_is_better=<span class="hljs-literal">True</span>,
category=MetricCategory.CUSTOM,
use_case=MetricUseCase.SCORING,
sample_level_fn=simple_metric,
corpus_level_fn=np.mean <span class="hljs-comment"># define cómo agregar resultados entre muestras</span>
)
extend_enum(Metrics, <span class="hljs-string">&quot;simple_metric&quot;</span>, simple_metric_obj)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-m8etzu">Una vez definas tus métricas personalizadas, puedes usarlas luego en tus tareas de evaluación haciendo referencia a ellas en la configuración de la tarea. Las métricas se calcularán automáticamente en todas las muestras y se agregarán según las funciones que especifiques.</p> <p data-svelte-h="svelte-ftiqlb">Para métricas más complejas, considera:</p> <ul data-svelte-h="svelte-1tdkzgr"><li>Usar metadatos en tus documentos formateados para ponderar o ajustar puntuaciones</li> <li>Implementar funciones de agregación personalizadas para estadísticas a nivel de corpus</li> <li>Agregar verificaciones de validación para las entradas de tus métricas</li> <li>Documentar casos límite y comportamientos esperados</li></ul> <p data-svelte-h="svelte-12qxjwn">Para un ejemplo completo de métricas personalizadas en acción, consulta nuestro <a href="./project/README">proyecto de evaluación de dominio</a>.</p> <h2 class="relative group"><a id="creación-de-conjuntos-de-datos" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#creación-de-conjuntos-de-datos"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Creación de Conjuntos de Datos</span></h2> <p data-svelte-h="svelte-fbvi9q">La evaluación de alta calidad requiere conjuntos de datos cuidadosamente curados. Considera estos enfoques para la creación de conjuntos de datos:</p> <ol data-svelte-h="svelte-lk7ogq"><li><p><strong>Anotación por Expertos</strong>: Trabaja con expertos del dominio para crear y validar ejemplos de evaluación. Herramientas como <a href="https://github.com/argilla-io/argilla" rel="nofollow">Argilla</a> hacen este proceso más eficiente.</p></li> <li><p><strong>Datos del Mundo Real</strong>: Recopila y anonimiza datos de uso real, asegurándote de que representen escenarios reales de despliegue del modelo.</p></li> <li><p><strong>Generación Sintética</strong>: Usa LLMs para generar ejemplos iniciales y luego permite que expertos los validen y refinen.</p></li></ol> <h2 class="relative group"><a id="mejores-prácticas" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mejores-prácticas"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Mejores Prácticas</span></h2> <ul data-svelte-h="svelte-obc85i"><li>Documenta tu metodología de evaluación a fondo, incluidas los supuestos o limitaciones</li> <li>Incluye casos de prueba diversos que cubran diferentes aspectos de tu dominio</li> <li>Considera tanto métricas automatizadas como evaluaciones humanas donde sea apropiado</li> <li>Controla las versiones de tus conjuntos de datos y código de evaluación</li> <li>Actualiza regularmente tu conjunto de evaluaciones a medida que descubras nuevos casos límite o requisitos</li></ul> <h2 class="relative group"><a id="referencias" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#referencias"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Referencias</span></h2> <ul data-svelte-h="svelte-58u1ku"><li><a href="https://github.com/huggingface/lighteval/wiki/Adding-a-Custom-Task" rel="nofollow">Guía de Tareas Personalizadas en LightEval</a></li> <li><a href="https://github.com/huggingface/lighteval/wiki/Adding-a-New-Metric" rel="nofollow">Métricas Personalizadas en LightEval</a></li> <li><a href="https://docs.argilla.io" rel="nofollow">Documentación de Argilla</a> para anotación de conjuntos de datos</li> <li><a href="https://github.com/huggingface/evaluation-guidebook" rel="nofollow">Guía de Evaluación</a> para principios generales de evaluación</li></ul> <h1 class="relative group"><a id="próximos-pasos" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#próximos-pasos"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Próximos Pasos</span></h1> <p data-svelte-h="svelte-1tpinaa">⏩ Para un ejemplo completo de cómo implementar estos conceptos, consulta nuestro <a href="./project/README">proyecto de evaluación de dominio</a>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/smol-course/blob/main/units/es/unit4/2.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_fiigkd = {
assets: "/docs/smol-course/pr_296/es",
base: "/docs/smol-course/pr_296/es",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/smol-course/pr_296/es/_app/immutable/entry/start.cd6905d6.js"),
import("/docs/smol-course/pr_296/es/_app/immutable/entry/app.d412b8f0.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 15],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
31.9 kB
·
Xet hash:
50591810047eeee1bcc91469d008a578be1aa2a7d21742b12931940ec3beee32

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.