Buckets:

hf-doc-build/doc / evaluate /main /en /index.html
rtrm's picture
download
raw
23.3 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Evaluate on the Hub&quot;,&quot;local&quot;:&quot;evaluate-on-the-hub&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Eval Results on the Hub&quot;,&quot;local&quot;:&quot;eval-results-on-the-hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Community Managed Leaderboards&quot;,&quot;local&quot;:&quot;community-managed-leaderboards&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Model Cards&quot;,&quot;local&quot;:&quot;model-cards&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Libraries and packages&quot;,&quot;local&quot;:&quot;libraries-and-packages&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;LightEval&quot;,&quot;local&quot;:&quot;lighteval&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🤗 Evaluate&quot;,&quot;local&quot;:&quot;-evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/evaluate/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/entry/start.138f3e02.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/scheduler.5f3e6389.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/singletons.3420a244.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/paths.65ada1b9.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/entry/app.ad076786.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/preload-helper.0572feea.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/index.62df735e.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/nodes/0.11bd9b89.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/nodes/9.ceb44410.js">
<link rel="modulepreload" href="/docs/evaluate/main/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.e306bd5b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Evaluate on the Hub&quot;,&quot;local&quot;:&quot;evaluate-on-the-hub&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Eval Results on the Hub&quot;,&quot;local&quot;:&quot;eval-results-on-the-hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Community Managed Leaderboards&quot;,&quot;local&quot;:&quot;community-managed-leaderboards&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Model Cards&quot;,&quot;local&quot;:&quot;model-cards&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Libraries and packages&quot;,&quot;local&quot;:&quot;libraries-and-packages&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;LightEval&quot;,&quot;local&quot;:&quot;lighteval&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;🤗 Evaluate&quot;,&quot;local&quot;:&quot;-evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="evaluate-on-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluate-on-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluate on the Hub</span></h1> <p align="center" data-svelte-h="svelte-17q3qfh"><br> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/evaluate-docs/evaluate-on-hub-banner.png" alt="Evaluate on the Hub banner" width="400"> <br></p> <p data-svelte-h="svelte-199ec9q">You can evaluate AI models on the Hub in multiple ways and this page will guide you through the different options:</p> <ul data-svelte-h="svelte-dvv1kk"><li><strong>Community Leaderboards</strong> bring together the best models for a given task or domain and make them accessible to everyone by ranking them.</li> <li><strong>Model Cards</strong> provide a comprehensive overview of a model’s capabilities from the author’s perspective.</li> <li><strong>Libraries and Packages</strong> give you the tools to evaluate your models on the Hub.</li></ul> <h2 class="relative group"><a id="eval-results-on-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#eval-results-on-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Eval Results on the Hub</span></h2> <p data-svelte-h="svelte-jwf194">The Hub provides a decentralized system for tracking model evaluation results. Benchmark datasets can host leaderboards, and model repos store evaluation scores that automatically appear on both the model page and the benchmark’s leaderboard.</p> <p data-svelte-h="svelte-l4y0je"><img src="https://huggingface.co/huggingface/documentation-images/resolve/main/evaluation-results/benchmark-preview.png" alt="Eval Results on the Hub"></p> <p data-svelte-h="svelte-yan17c">You can add evaluation results to any model by submitting a YAML file to the <code>.eval_results/</code> folder in the model repo. These results display with badges indicating whether they are verified, community-provided, or linked to a benchmark leaderboard.</p> <p data-svelte-h="svelte-15f0adz">For full details on adding evaluation results to models and registering benchmark datasets, see the <a href="https://huggingface.co/docs/hub/eval-results" rel="nofollow">Evaluation Results documentation</a>.</p> <h2 class="relative group"><a id="community-managed-leaderboards" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#community-managed-leaderboards"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Community Managed Leaderboards</span></h2> <p data-svelte-h="svelte-glijqz">Community leaderboards show how a model performs on a given task or domain. For example, there are leaderboards for question answering, reasoning, classification, vision, and audio. If you’re tackling a new task, you can use a leaderboard to see how a model performs on it.</p> <p data-svelte-h="svelte-1knk97f">Here are some examples of community leaderboards:</p> <table data-svelte-h="svelte-1jx506d"><thead><tr><th>Leaderboard</th> <th>Model Type</th> <th>Description</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/spaces/mteb/leaderboard" rel="nofollow">MTEB</a></td> <td>Embedding</td> <td>The Massive Text Embedding Benchmark leaderboard compares 100+ text and image embedding models across 1000+ languages. Refer to the publication of each selectable benchmark for details on metrics, languages, tasks, and task types. Anyone is welcome to add a model, add benchmarks, help improve zero-shot annotations, or propose other changes to the leaderboard.</td></tr> <tr><td><a href="https://huggingface.co/spaces/gaia-benchmark/leaderboard" rel="nofollow">GAIA</a></td> <td>Agentic</td> <td>GAIA is a benchmark which aims at evaluating next-generation LLMs (LLMs with augmented capabilities due to added tooling, efficient prompting, access to search, etc). (See <a href="https://arxiv.org/abs/2311.12983" rel="nofollow">the paper</a> for more details.)</td></tr> <tr><td><a href="https://huggingface.co/spaces/opencompass/open_vlm_leaderboard" rel="nofollow">OpenVLM Leaderboard</a></td> <td>Vision Language Models</td> <td>The OpenVLM Leaderboard evaluates 272+ Vision-Language Models (including GPT-4v, Gemini, QwenVLPlus, LLaVA) across 31 different multi-modal benchmarks using the VLMEvalKit framework. It focuses on open-source VLMs and publicly available API models.</td></tr> <tr><td><a href="https://huggingface.co/spaces/hf-audio/open_asr_leaderboard" rel="nofollow">Open ASR Leaderboard</a></td> <td>Audio</td> <td>The Open ASR Leaderboard ranks and evaluates speech recognition models on the Hugging Face Hub. Models are ranked based on their Average WER, from lowest to highest.</td></tr> <tr><td><a href="https://huggingface.co/spaces/llm-perf/leaderboard" rel="nofollow">LLM-Perf Leaderboard</a></td> <td>LLM Performance</td> <td>The 🤗 LLM-Perf Leaderboard 🏋️ is a leaderboard at the intersection of quality and performance. Its aim is to benchmark the performance (latency, throughput, memory &amp; energy) of Large Language Models (LLMs) with different hardware, backends and optimizations using Optimum-Benchmark.</td></tr></tbody></table> <p data-svelte-h="svelte-1f5x9ig">There are many more leaderboards on the Hub. Check out all the leaderboards via this <a href="https://huggingface.co/spaces?category=model-benchmarking" rel="nofollow">search</a> or use this <a href="https://huggingface.co/spaces/OpenEvals/find-a-leaderboard" rel="nofollow">dedicated Space</a> to find a leaderboard for your task.</p> <h2 class="relative group"><a id="model-cards" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-cards"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model Cards</span></h2> <p data-svelte-h="svelte-emu6rw">Model cards provide an overview of a model’s capabilities evaluated by the community or the model’s author. They are a great way to understand a model’s capabilities and limitations.</p> <p data-svelte-h="svelte-ch4llb"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/evaluate-docs/qwen-model-card.png" alt="Qwen model card"></p> <p data-svelte-h="svelte-419yrl">Unlike leaderboards, model card evaluation scores are often created by the author, rather than by the community.</p> <blockquote class="tip" data-svelte-h="svelte-1lt94s6"><p>For information on reporting results, see details on <a href="https://huggingface.co/docs/hub/en/model-cards#evaluation-results" rel="nofollow">the Model Card Evaluation Results metadata</a>.</p></blockquote> <h2 class="relative group"><a id="libraries-and-packages" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#libraries-and-packages"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Libraries and packages</span></h2> <p data-svelte-h="svelte-1fjf3yw">There are a number of open-source libraries and packages that you can use to evaluate your models on the Hub. These are useful if you want to evaluate a custom model or performance on a custom evaluation task.</p> <h3 class="relative group"><a id="lighteval" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lighteval"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LightEval</span></h3> <p data-svelte-h="svelte-hp2it2">LightEval is a library for evaluating LLMs. It is designed to be comprehensive and customizable. Visit the LightEval <a href="https://github.com/huggingface/lighteval" rel="nofollow">repository</a> for more information.</p> <blockquote class="tip" data-svelte-h="svelte-1v5w5v8"><p>For more recent evaluation approaches that are popular on the Hugging Face Hub that are currently more actively maintained, check out <a href="https://github.com/huggingface/lighteval" rel="nofollow">LightEval</a>.</p></blockquote> <h3 class="relative group"><a id="-evaluate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-evaluate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>🤗 Evaluate</span></h3> <p data-svelte-h="svelte-39j43p">A library for easily evaluating machine learning models and datasets.</p> <p data-svelte-h="svelte-1jpimtx">With a single line of code, you get access to dozens of evaluation methods for different domains (NLP, Computer Vision, Reinforcement Learning, and more!). Be it on your local machine or in a distributed training setup, you can evaluate your models in a consistent and reproducible way!</p> <p data-svelte-h="svelte-16w2hj7">Visit the 🤗 Evaluate <a href="https://huggingface.co/evaluate-metric" rel="nofollow">organization</a> for a full list of available metrics. Each metric has a dedicated Space with an interactive demo for how to use the metric, and a documentation card detailing the metrics limitations and usage.</p> <div class="mt-10" data-svelte-h="svelte-uvvmxj"><div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5"><a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./installation"><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Tutorials</div> <p class="text-gray-700">Learn the basics and become familiar with loading, computing, and saving with 🤗 Evaluate. Start here if you are using 🤗 Evaluate for the first time!</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./choosing_a_metric"><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div> <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Evaluate to solve real-world problems.</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./types_of_evaluations"><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div> <p class="text-gray-700">High-level explanations for building a better understanding of important topics such as considerations going into evaluating a model or dataset and the difference between metrics, measurements, and comparisons.</p></a> <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/main_classes"><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div> <p class="text-gray-700">Technical descriptions of how 🤗 Evaluate classes and methods work.</p></a></div></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/evaluate/blob/main/docs/source/index.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_95ljrq = {
assets: "/docs/evaluate/main/en",
base: "/docs/evaluate/main/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/evaluate/main/en/_app/immutable/entry/start.138f3e02.js"),
import("/docs/evaluate/main/en/_app/immutable/entry/app.ad076786.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 9],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
23.3 kB
·
Xet hash:
79629feb43b28f38a97e6a016b6fb24b23fe10b3fa575db31e430e61baadcb2d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.