Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Saving and Reading Results","local":"saving-and-reading-results","sections":[{"title":"Saving Results Locally","local":"saving-results-locally","sections":[],"depth":2},{"title":"Pushing Results to the Hugging Face Hub","local":"pushing-results-to-the-hugging-face-hub","sections":[],"depth":2},{"title":"Pushing Results to TensorBoard","local":"pushing-results-to-tensorboard","sections":[],"depth":2},{"title":"Pushing Results to Weights & Biases or Trackio","local":"pushing-results-to-weights--biases-or-trackio","sections":[],"depth":2},{"title":"How to Load and Investigate Details","local":"how-to-load-and-investigate-details","sections":[{"title":"Loading from Local Detail Files","local":"loading-from-local-detail-files","sections":[],"depth":3},{"title":"Loading from the Hugging Face Hub","local":"loading-from-the-hugging-face-hub","sections":[],"depth":3}],"depth":2},{"title":"Detail File Structure","local":"detail-file-structure","sections":[],"depth":2},{"title":"EvaluationTracker Configuration","local":"evaluationtracker-configuration","sections":[{"title":"Basic Configuration","local":"basic-configuration","sections":[],"depth":3},{"title":"Advanced Configuration","local":"advanced-configuration","sections":[],"depth":3},{"title":"Key Parameters","local":"key-parameters","sections":[],"depth":3}],"depth":2},{"title":"Result File Structure","local":"result-file-structure","sections":[{"title":"General Configuration","local":"general-configuration","sections":[],"depth":3},{"title":"Task-Specific Information","local":"task-specific-information","sections":[],"depth":3},{"title":"Results","local":"results","sections":[],"depth":3}],"depth":2},{"title":"Example of a Result File","local":"example-of-a-result-file","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/lighteval/pr_1221/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/entry/start.86845e89.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/scheduler.3a17fb72.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/singletons.9670729e.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/paths.dc4e91fb.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/entry/app.2df9be51.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/preload-helper.3f9be9c8.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/index.093f8863.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/nodes/0.15740f19.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/nodes/22.442929a5.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.5e7ea2bd.js"> | |
| <link rel="modulepreload" href="/docs/lighteval/pr_1221/en/_app/immutable/chunks/CodeBlock.09235327.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Saving and Reading Results","local":"saving-and-reading-results","sections":[{"title":"Saving Results Locally","local":"saving-results-locally","sections":[],"depth":2},{"title":"Pushing Results to the Hugging Face Hub","local":"pushing-results-to-the-hugging-face-hub","sections":[],"depth":2},{"title":"Pushing Results to TensorBoard","local":"pushing-results-to-tensorboard","sections":[],"depth":2},{"title":"Pushing Results to Weights & Biases or Trackio","local":"pushing-results-to-weights--biases-or-trackio","sections":[],"depth":2},{"title":"How to Load and Investigate Details","local":"how-to-load-and-investigate-details","sections":[{"title":"Loading from Local Detail Files","local":"loading-from-local-detail-files","sections":[],"depth":3},{"title":"Loading from the Hugging Face Hub","local":"loading-from-the-hugging-face-hub","sections":[],"depth":3}],"depth":2},{"title":"Detail File Structure","local":"detail-file-structure","sections":[],"depth":2},{"title":"EvaluationTracker Configuration","local":"evaluationtracker-configuration","sections":[{"title":"Basic Configuration","local":"basic-configuration","sections":[],"depth":3},{"title":"Advanced Configuration","local":"advanced-configuration","sections":[],"depth":3},{"title":"Key Parameters","local":"key-parameters","sections":[],"depth":3}],"depth":2},{"title":"Result File Structure","local":"result-file-structure","sections":[{"title":"General Configuration","local":"general-configuration","sections":[],"depth":3},{"title":"Task-Specific Information","local":"task-specific-information","sections":[],"depth":3},{"title":"Results","local":"results","sections":[],"depth":3}],"depth":2},{"title":"Example of a Result File","local":"example-of-a-result-file","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="saving-and-reading-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-and-reading-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving and Reading Results</span></h1> <p data-svelte-h="svelte-9aqcbu">Lighteval provides comprehensive logging and result management through the <code>EvaluationTracker</code> class. This system allows you to save results locally and optionally push them to various platforms for collaboration and analysis.</p> <h2 class="relative group"><a id="saving-results-locally" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-results-locally"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving Results Locally</span></h2> <p data-svelte-h="svelte-nzo4lv">Lighteval automatically saves results and evaluation details in the | |
| directory specified with the <code>--output-dir</code> option. The results are saved in | |
| <code>{output_dir}/results/{model_name}/results_{timestamp}.json</code>. <a href="#example-of-a-result-file">Here is an | |
| example of a result file</a>. The output path can be | |
| any <a href="https://filesystem-spec.readthedocs.io/en/latest/index.html" rel="nofollow">fsspec</a> | |
| compliant path (local, S3, Hugging Face Hub, Google Drive, FTP, etc.).</p> <p data-svelte-h="svelte-103anxe">To save detailed evaluation information, you can use the <code>--save-details</code> | |
| option. The details are saved in Parquet files at | |
| <code>{output_dir}/details/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet</code>.</p> <p data-svelte-h="svelte-h64e6b">If you want results to be saved in a custom path structure, you can set the <code>results-path-template</code> option. | |
| This allows you to specify a string template for the path. The template must contain the following | |
| variables: <code>output_dir</code>, <code>model_name</code>, <code>org</code>. For example: | |
| <code>{output_dir}/{org}_{model}</code>. The template will be used to create the path for the results file.</p> <h2 class="relative group"><a id="pushing-results-to-the-hugging-face-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pushing-results-to-the-hugging-face-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pushing Results to the Hugging Face Hub</span></h2> <p data-svelte-h="svelte-1cjd5zg">You can push results and evaluation details to the Hugging Face Hub. To do | |
| so, you need to set the <code>--push-to-hub</code> option as well as the <code>--results-org</code> | |
| option. The results are saved in a dataset with the name | |
| <code>{results_org}/{model_org}/{model_name}</code>. To push the details, you need to set | |
| the <code>--save-details</code> option.</p> <p data-svelte-h="svelte-icxxh7">The dataset created will be private by default. You can make it public by | |
| setting the <code>--public-run</code> option.</p> <h2 class="relative group"><a id="pushing-results-to-tensorboard" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pushing-results-to-tensorboard"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pushing Results to TensorBoard</span></h2> <p data-svelte-h="svelte-1bci1gp">You can push results to TensorBoard by setting <code>--push-to-tensorboard</code>. | |
| This creates a TensorBoard dashboard in a Hugging Face organization specified with the <code>--results-org</code> | |
| option.</p> <h2 class="relative group"><a id="pushing-results-to-weights--biases-or-trackio" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pushing-results-to-weights--biases-or-trackio"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pushing Results to Weights & Biases or Trackio</span></h2> <p data-svelte-h="svelte-17ozben">You can push results to Weights & Biases by setting <code>--wandb</code>. This initializes a W&B | |
| run and logs the results.</p> <p data-svelte-h="svelte-qmfp1v">W&B arguments need to be set in your environment variables:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> WANDB_PROJECT=<span class="hljs-string">"lighteval"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-407scl">You can find a complete list of variables in the <a href="https://docs.wandb.ai/guides/track/environment-variables/" rel="nofollow">W&B documentation</a>.</p> <p data-svelte-h="svelte-174dmv1">If Trackio is available in your environment (<code>pip install lighteval[trackio]</code>), it will be used to log and push results to a | |
| Hugging Face dataset. Choose the dataset name and organization with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> WANDB_SPACE_ID=<span class="hljs-string">"org/name"</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="how-to-load-and-investigate-details" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-to-load-and-investigate-details"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How to Load and Investigate Details</span></h2> <h3 class="relative group"><a id="loading-from-local-detail-files" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loading-from-local-detail-files"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loading from Local Detail Files</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">import</span> glob | |
| output_dir = <span class="hljs-string">"evals_doc"</span> | |
| model_name = <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span> | |
| timestamp = <span class="hljs-string">"latest"</span> | |
| task = <span class="hljs-string">"gsm8k"</span> | |
| <span class="hljs-keyword">if</span> timestamp == <span class="hljs-string">"latest"</span>: | |
| path = <span class="hljs-string">f"<span class="hljs-subst">{output_dir}</span>/details/<span class="hljs-subst">{model_name}</span>/*/"</span> | |
| timestamps = glob.glob(path) | |
| timestamp = <span class="hljs-built_in">sorted</span>(timestamps)[-<span class="hljs-number">1</span>].split(<span class="hljs-string">"/"</span>)[-<span class="hljs-number">2</span>] | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Latest timestamp: <span class="hljs-subst">{timestamp}</span>"</span>) | |
| details_path = <span class="hljs-string">f"<span class="hljs-subst">{output_dir}</span>/details/<span class="hljs-subst">{model_name}</span>/<span class="hljs-subst">{timestamp}</span>/details_<span class="hljs-subst">{task}</span>_<span class="hljs-subst">{timestamp}</span>.parquet"</span> | |
| <span class="hljs-comment"># Load the details</span> | |
| details = load_dataset(<span class="hljs-string">"parquet"</span>, data_files=details_path, split=<span class="hljs-string">"train"</span>) | |
| <span class="hljs-keyword">for</span> detail <span class="hljs-keyword">in</span> details: | |
| <span class="hljs-built_in">print</span>(detail)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="loading-from-the-hugging-face-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loading-from-the-hugging-face-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loading from the Hugging Face Hub</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| results_org = <span class="hljs-string">"SaylorTwift"</span> | |
| model_name = <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span> | |
| sanitized_model_name = model_name.replace(<span class="hljs-string">"/"</span>, <span class="hljs-string">"__"</span>) | |
| task = <span class="hljs-string">"gsm8k"</span> | |
| public_run = <span class="hljs-literal">False</span> | |
| dataset_path = <span class="hljs-string">f"<span class="hljs-subst">{results_org}</span>/details_<span class="hljs-subst">{sanitized_model_name}</span><span class="hljs-subst">{<span class="hljs-string">'_private'</span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> public_run <span class="hljs-keyword">else</span> <span class="hljs-string">''</span>}</span>"</span> | |
| details = load_dataset(dataset_path, task.replace(<span class="hljs-string">"|"</span>, <span class="hljs-string">"_"</span>), split=<span class="hljs-string">"latest"</span>) | |
| <span class="hljs-keyword">for</span> detail <span class="hljs-keyword">in</span> details: | |
| <span class="hljs-built_in">print</span>(detail)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="detail-file-structure" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#detail-file-structure"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Detail File Structure</span></h2> <p data-svelte-h="svelte-1w33ibm">The detail file contains the following columns:</p> <ul data-svelte-h="svelte-1wz4paq"><li><strong><code>__doc__</code></strong>: The document used for evaluation, containing the gold reference, few-shot examples, and other hyperparameters used for the task.</li> <li><strong><code>__model_response__</code></strong>: Contains model generations, log probabilities, and the input that was sent to the model.</li> <li><strong><code>__metric__</code></strong>: The value of the metrics for this sample.</li></ul> <h2 class="relative group"><a id="evaluationtracker-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluationtracker-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>EvaluationTracker Configuration</span></h2> <p data-svelte-h="svelte-vgm6ps">The <code>EvaluationTracker</code> class provides several configuration options for customizing how results are saved and pushed:</p> <h3 class="relative group"><a id="basic-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#basic-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Basic Configuration</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> lighteval.logging.evaluation_tracker <span class="hljs-keyword">import</span> EvaluationTracker | |
| tracker = EvaluationTracker( | |
| output_dir=<span class="hljs-string">"./results"</span>, | |
| save_details=<span class="hljs-literal">True</span>, | |
| push_to_hub=<span class="hljs-literal">True</span>, | |
| hub_results_org=<span class="hljs-string">"your_username"</span>, | |
| public=<span class="hljs-literal">False</span> | |
| )<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="advanced-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced Configuration</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tracker = EvaluationTracker( | |
| output_dir=<span class="hljs-string">"./results"</span>, | |
| results_path_template=<span class="hljs-string">"{output_dir}/custom/{org}_{model}"</span>, | |
| save_details=<span class="hljs-literal">True</span>, | |
| push_to_hub=<span class="hljs-literal">True</span>, | |
| push_to_tensorboard=<span class="hljs-literal">True</span>, | |
| hub_results_org=<span class="hljs-string">"my-org"</span>, | |
| tensorboard_metric_prefix=<span class="hljs-string">"eval"</span>, | |
| public=<span class="hljs-literal">True</span>, | |
| use_wandb=<span class="hljs-literal">True</span> | |
| )<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="key-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#key-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Key Parameters</span></h3> <ul data-svelte-h="svelte-n2g9hd"><li><strong><code>output_dir</code></strong>: Local directory to save evaluation results and logs</li> <li><strong><code>results_path_template</code></strong>: Template for results directory structure</li> <li><strong><code>save_details</code></strong>: Whether to save detailed evaluation records (default: True)</li> <li><strong><code>push_to_hub</code></strong>: Whether to push results to Hugging Face Hub (default: False)</li> <li><strong><code>push_to_tensorboard</code></strong>: Whether to push metrics to TensorBoard (default: False)</li> <li><strong><code>hub_results_org</code></strong>: Hugging Face Hub organization to push results to</li> <li><strong><code>tensorboard_metric_prefix</code></strong>: Prefix for TensorBoard metrics (default: “eval”)</li> <li><strong><code>public</code></strong>: Whether to make Hub datasets public (default: False)</li> <li><strong><code>use_wandb</code></strong>: Whether to log to Weights & Biases or Trackio (default: False)</li></ul> <h2 class="relative group"><a id="result-file-structure" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#result-file-structure"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Result File Structure</span></h2> <p data-svelte-h="svelte-1ieqcct">The main results file contains several sections:</p> <h3 class="relative group"><a id="general-configuration" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general-configuration"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>General Configuration</span></h3> <ul data-svelte-h="svelte-r2q4i7"><li><strong><code>config_general</code></strong>: Overall evaluation configuration including model information, timing, and system details</li> <li><strong><code>summary_general</code></strong>: General statistics about the evaluation run</li></ul> <h3 class="relative group"><a id="task-specific-information" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#task-specific-information"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Task-Specific Information</span></h3> <ul data-svelte-h="svelte-a95mdm"><li><strong><code>config_tasks</code></strong>: Configuration details for each evaluated task</li> <li><strong><code>summary_tasks</code></strong>: Task-specific statistics and metadata</li> <li><strong><code>versions</code></strong>: Version information for tasks and datasets</li></ul> <h3 class="relative group"><a id="results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Results</span></h3> <ul data-svelte-h="svelte-1iqqkon"><li><strong><code>results</code></strong>: Actual evaluation metrics and scores for each task</li></ul> <h2 class="relative group"><a id="example-of-a-result-file" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-of-a-result-file"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example of a Result File</span></h2> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"config_general"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"lighteval_sha"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"203045a8431bc9b77245c9998e05fc54509ea07f"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"num_fewshot_seeds"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"max_samples"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"job_id"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">""</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"start_time"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">620979.879320166</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"end_time"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">621004.632108041</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"total_evaluation_time_secondes"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"24.752787875011563"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"model_name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"gpt2"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"model_sha"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"607a30d783dfa663caf39e06633721c8d4cfcd7e"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"model_dtype"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"model_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"476.2 MB"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"results"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"gsm8k|0"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"em"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"em_stderr"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"maj@8"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"maj@8_stderr"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"all"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"em"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"em_stderr"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"maj@8"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"maj@8_stderr"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"versions"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"gsm8k|0"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"config_tasks"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"lighteval|gsm8k"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"gsm8k"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"prompt_function"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"gsm8k"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hf_repo"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"gsm8k"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hf_subset"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"main"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"metric"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"metric_name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"em"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"higher_is_better"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"category"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"3"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"use_case"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"5"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"sample_level_fn"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"compute"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"corpus_level_fn"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"mean"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"metric_name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"maj@8"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"higher_is_better"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"category"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"5"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"use_case"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"5"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"sample_level_fn"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"compute"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"corpus_level_fn"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"mean"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hf_avail_splits"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-string">"train"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-string">"test"</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"evaluation_splits"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-string">"test"</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"few_shots_split"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"few_shots_select"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"random_sampling_from_train"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"generation_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">256</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"generation_grammar"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stop_sequence"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-string">"Question="</span> | |
| <span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"num_samples"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"original_num_docs"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1319</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"effective_num_docs"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"must_remove_duplicate_docs"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">null</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"version"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"summary_tasks"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"gsm8k|0"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hashes"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hash_examples"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"8517d5bf7e880086"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_full_prompts"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"8517d5bf7e880086"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_input_tokens"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"29916e7afe5cb51d"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_cont_tokens"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"37f91ce23ef6d435"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"padded"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"non_padded"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"effective_few_shots"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"summary_general"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hashes"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"hash_examples"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"5f383c395f01096e"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_full_prompts"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"5f383c395f01096e"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_input_tokens"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"ac933feb14f96d7b"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hash_cont_tokens"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"9d03fb26f8da7277"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"padded"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"non_padded"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lighteval/blob/main/docs/source/saving-and-reading-results.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_13vx2u = { | |
| assets: "/docs/lighteval/pr_1221/en", | |
| base: "/docs/lighteval/pr_1221/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/lighteval/pr_1221/en/_app/immutable/entry/start.86845e89.js"), | |
| import("/docs/lighteval/pr_1221/en/_app/immutable/entry/app.2df9be51.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 22], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 63.5 kB
- Xet hash:
- 987bf938397fe5aff51ca658bb8a5decda87d3e1843d9685fc46987c47bf5c2b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.