Buckets:

rtrm's picture
download
raw
14.4 kB
import{s as Ce,n as Ie,o as $e}from"../chunks/scheduler.5f3e6389.js";import{S as ke,i as ve,e as c,s,c as i,h as Ge,a as y,d as t,b as n,f as ge,g as p,j as h,k as be,l as Ae,m as a,n as M,t as r,o,p as m}from"../chunks/index.373ab25c.js";import{C as Ze,H as T,E as Be}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.c2e0d06d.js";import{C as ce}from"../chunks/CodeBlock.cd35d790.js";function Ee(ye){let u,Y,V,x,j,N,J,P,d,ue=`Lighteval can be used from a custom Python script. To evaluate a model, you will need to set up an
<a href="/docs/lighteval/pr_1027/en/package_reference/evaluation_tracker#lighteval.logging.evaluation_tracker.EvaluationTracker">EvaluationTracker</a>, <a href="/docs/lighteval/pr_1027/en/package_reference/pipeline#lighteval.pipeline.PipelineParameters">PipelineParameters</a>,
a <a href="package_reference/models"><code>model</code></a> or a <a href="package_reference/model_config"><code>model_config</code></a>,
and a <a href="/docs/lighteval/pr_1027/en/package_reference/pipeline#lighteval.pipeline.Pipeline">Pipeline</a>.`,H,w,he="After that, simply run the pipeline and save the results.",S,U,Q,f,z,g,L,b,Te="The <code>EvaluationTracker</code> handles logging and saving evaluation results. It can save results locally and optionally push them to the Hugging Face Hub.",q,C,K,I,je="<code>PipelineParameters</code> configures how the evaluation pipeline runs, including parallelism settings and task configuration.",D,$,O,k,Je="Model configurations define the model to be evaluated, including the model name, data type, and other model-specific parameters. Different backends (VLLM, Transformers, etc.) have their own configuration classes.",ee,v,le,G,de="The <code>Pipeline</code> orchestrates the entire evaluation process, taking the tasks, model configuration, and parameters to run the evaluation.",te,A,ae,Z,we="You can evaluate on multiple tasks by providing a comma-separated list or a file path:",se,B,ne,E,ie,_,Ue="To use custom tasks, set the <code>custom_tasks_directory</code> parameter to the path containing your custom task definitions:",pe,X,Me,W,fe='For more information on creating custom tasks, see the <a href="adding-a-custom-task">Adding a Custom Task</a> guide.',re,R,oe,F,me;return j=new Ze({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),J=new T({props:{title:"Using the Python API",local:"using-the-python-api",headingTag:"h1"}}),U=new ce({props:{code:"aW1wb3J0JTIwbGlnaHRldmFsJTBBZnJvbSUyMGxpZ2h0ZXZhbC5sb2dnaW5nLmV2YWx1YXRpb25fdHJhY2tlciUyMGltcG9ydCUyMEV2YWx1YXRpb25UcmFja2VyJTBBZnJvbSUyMGxpZ2h0ZXZhbC5tb2RlbHMudmxsbS52bGxtX21vZGVsJTIwaW1wb3J0JTIwVkxMTU1vZGVsQ29uZmlnJTBBZnJvbSUyMGxpZ2h0ZXZhbC5waXBlbGluZSUyMGltcG9ydCUyMFBhcmFsbGVsaXNtTWFuYWdlciUyQyUyMFBpcGVsaW5lJTJDJTIwUGlwZWxpbmVQYXJhbWV0ZXJzJTBBZnJvbSUyMGxpZ2h0ZXZhbC51dGlscy5pbXBvcnRzJTIwaW1wb3J0JTIwaXNfcGFja2FnZV9hdmFpbGFibGUlMEElMEFpZiUyMGlzX3BhY2thZ2VfYXZhaWxhYmxlKCUyMmFjY2VsZXJhdGUlMjIpJTNBJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGRhdGV0aW1lJTIwaW1wb3J0JTIwdGltZWRlbHRhJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEluaXRQcm9jZXNzR3JvdXBLd2FyZ3MlMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKGt3YXJnc19oYW5kbGVycyUzRCU1QkluaXRQcm9jZXNzR3JvdXBLd2FyZ3ModGltZW91dCUzRHRpbWVkZWx0YShzZWNvbmRzJTNEMzAwMCkpJTVEKSUwQWVsc2UlM0ElMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyME5vbmUlMEElMEFkZWYlMjBtYWluKCklM0ElMEElMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlMjAlM0QlMjBFdmFsdWF0aW9uVHJhY2tlciglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIyLiUyRnJlc3VsdHMlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzYXZlX2RldGFpbHMlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHVzaF90b19odWIlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaHViX3Jlc3VsdHNfb3JnJTNEJTIyeW91cl91c2VybmFtZSUyMiUyQyUyMCUyMCUyMyUyMFJlcGxhY2UlMjB3aXRoJTIweW91ciUyMGFjdHVhbCUyMHVzZXJuYW1lJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtcyUyMCUzRCUyMFBpcGVsaW5lUGFyYW1ldGVycyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsYXVuY2hlcl90eXBlJTNEUGFyYWxsZWxpc21NYW5hZ2VyLkFDQ0VMRVJBVEUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjdXN0b21fdGFza3NfZGlyZWN0b3J5JTNETm9uZSUyQyUyMCUyMCUyMyUyMFNldCUyMHRvJTIwcGF0aCUyMGlmJTIwdXNpbmclMjBjdXN0b20lMjB0YXNrcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFJlbW92ZSUyMHRoZSUyMHBhcmFtZXRlciUyMGJlbG93JTIwb25jZSUyMHlvdXIlMjBjb25maWd1cmF0aW9uJTIwaXMlMjB0ZXN0ZWQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfc2FtcGxlcyUzRDEwJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMG1vZGVsX2NvbmZpZyUyMCUzRCUyMFZMTE1Nb2RlbENvbmZpZyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTNEJTIySHVnZ2luZ0ZhY2VINCUyRnplcGh5ci03Yi1iZXRhJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZHR5cGUlM0QlMjJmbG9hdDE2JTIyJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHRhc2slMjAlM0QlMjAlMjJsaWdodGV2YWwlN0Nnc204ayU3QzUlMjIlMEElMEElMjAlMjAlMjAlMjBwaXBlbGluZSUyMCUzRCUyMFBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tzJTNEdGFzayUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtZXRlcnMlM0RwaXBlbGluZV9wYXJhbXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlM0RldmFsdWF0aW9uX3RyYWNrZXIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9jb25maWclM0Rtb2RlbF9jb25maWclMkMlMEElMjAlMjAlMjAlMjApJTBBJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuZXZhbHVhdGUoKSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lLnNhdmVfYW5kX3B1c2hfcmVzdWx0cygpJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuc2hvd19yZXN1bHRzKCklMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">import</span> lighteval
<span class="hljs-keyword">from</span> lighteval.logging.evaluation_tracker <span class="hljs-keyword">import</span> EvaluationTracker
<span class="hljs-keyword">from</span> lighteval.models.vllm.vllm_model <span class="hljs-keyword">import</span> VLLMModelConfig
<span class="hljs-keyword">from</span> lighteval.pipeline <span class="hljs-keyword">import</span> ParallelismManager, Pipeline, PipelineParameters
<span class="hljs-keyword">from</span> lighteval.utils.imports <span class="hljs-keyword">import</span> is_package_available
<span class="hljs-keyword">if</span> is_package_available(<span class="hljs-string">&quot;accelerate&quot;</span>):
<span class="hljs-keyword">from</span> datetime <span class="hljs-keyword">import</span> timedelta
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=<span class="hljs-number">3000</span>))])
<span class="hljs-keyword">else</span>:
accelerator = <span class="hljs-literal">None</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
evaluation_tracker = EvaluationTracker(
output_dir=<span class="hljs-string">&quot;./results&quot;</span>,
save_details=<span class="hljs-literal">True</span>,
push_to_hub=<span class="hljs-literal">True</span>,
hub_results_org=<span class="hljs-string">&quot;your_username&quot;</span>, <span class="hljs-comment"># Replace with your actual username</span>
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
custom_tasks_directory=<span class="hljs-literal">None</span>, <span class="hljs-comment"># Set to path if using custom tasks</span>
<span class="hljs-comment"># Remove the parameter below once your configuration is tested</span>
max_samples=<span class="hljs-number">10</span>
)
model_config = VLLMModelConfig(
model_name=<span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>,
dtype=<span class="hljs-string">&quot;float16&quot;</span>,
)
task = <span class="hljs-string">&quot;lighteval|gsm8k|5&quot;</span>
pipeline = Pipeline(
tasks=task,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
pipeline.evaluate()
pipeline.save_and_push_results()
pipeline.show_results()
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()`,wrap:!1}}),f=new T({props:{title:"Key Components",local:"key-components",headingTag:"h2"}}),g=new T({props:{title:"EvaluationTracker",local:"evaluationtracker",headingTag:"h3"}}),C=new T({props:{title:"PipelineParameters",local:"pipelineparameters",headingTag:"h3"}}),$=new T({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h3"}}),v=new T({props:{title:"Pipeline",local:"pipeline",headingTag:"h3"}}),A=new T({props:{title:"Running Multiple Tasks",local:"running-multiple-tasks",headingTag:"h2"}}),B=new ce({props:{code:"JTIzJTIwTXVsdGlwbGUlMjB0YXNrcyUyMGFzJTIwY29tbWEtc2VwYXJhdGVkJTIwc3RyaW5nJTBBdGFza3MlMjAlM0QlMjAlMjJsaWdodGV2YWwlN0NhaW1lMjQlN0MwJTJDbGlnaHRldmFsJTdDYWltZTI1JTdDMCUyMiUwQSUwQSUyMyUyME9yJTIwbG9hZCUyMGZyb20lMjBhJTIwZmlsZSUwQXRhc2tzJTIwJTNEJTIwJTIyLiUyRnBhdGglMkZ0byUyRnRhc2tzLnR4dCUyMiUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwUGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwdGFza3MlM0R0YXNrcyUyQyUwQSUyMCUyMCUyMCUyMCUyMyUyMC4uLiUyMG90aGVyJTIwcGFyYW1ldGVycyUwQSk=",highlighted:`<span class="hljs-comment"># Multiple tasks as comma-separated string</span>
tasks = <span class="hljs-string">&quot;lighteval|aime24|0,lighteval|aime25|0&quot;</span>
<span class="hljs-comment"># Or load from a file</span>
tasks = <span class="hljs-string">&quot;./path/to/tasks.txt&quot;</span>
pipeline = Pipeline(
tasks=tasks,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),E=new T({props:{title:"Custom Tasks",local:"custom-tasks",headingTag:"h2"}}),X=new ce({props:{code:"cGlwZWxpbmVfcGFyYW1zJTIwJTNEJTIwUGlwZWxpbmVQYXJhbWV0ZXJzKCUwQSUyMCUyMCUyMCUyMGN1c3RvbV90YXNrc19kaXJlY3RvcnklM0QlMjIuJTJGcGF0aCUyRnRvJTJGY3VzdG9tJTJGdGFza3MlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjAuLi4lMjBvdGhlciUyMHBhcmFtZXRlcnMlMEEp",highlighted:`pipeline_params = PipelineParameters(
custom_tasks_directory=<span class="hljs-string">&quot;./path/to/custom/tasks&quot;</span>,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),R=new Be({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/using-the-python-api.mdx"}}),{c(){u=c("meta"),Y=s(),V=c("p"),x=s(),i(j.$$.fragment),N=s(),i(J.$$.fragment),P=s(),d=c("p"),d.innerHTML=ue,H=s(),w=c("p"),w.textContent=he,S=s(),i(U.$$.fragment),Q=s(),i(f.$$.fragment),z=s(),i(g.$$.fragment),L=s(),b=c("p"),b.innerHTML=Te,q=s(),i(C.$$.fragment),K=s(),I=c("p"),I.innerHTML=je,D=s(),i($.$$.fragment),O=s(),k=c("p"),k.textContent=Je,ee=s(),i(v.$$.fragment),le=s(),G=c("p"),G.innerHTML=de,te=s(),i(A.$$.fragment),ae=s(),Z=c("p"),Z.textContent=we,se=s(),i(B.$$.fragment),ne=s(),i(E.$$.fragment),ie=s(),_=c("p"),_.innerHTML=Ue,pe=s(),i(X.$$.fragment),Me=s(),W=c("p"),W.innerHTML=fe,re=s(),i(R.$$.fragment),oe=s(),F=c("p"),this.h()},l(e){const l=Ge("svelte-u9bgzb",document.head);u=y(l,"META",{name:!0,content:!0}),l.forEach(t),Y=n(e),V=y(e,"P",{}),ge(V).forEach(t),x=n(e),p(j.$$.fragment,e),N=n(e),p(J.$$.fragment,e),P=n(e),d=y(e,"P",{"data-svelte-h":!0}),h(d)!=="svelte-51qugj"&&(d.innerHTML=ue),H=n(e),w=y(e,"P",{"data-svelte-h":!0}),h(w)!=="svelte-cbze7g"&&(w.textContent=he),S=n(e),p(U.$$.fragment,e),Q=n(e),p(f.$$.fragment,e),z=n(e),p(g.$$.fragment,e),L=n(e),b=y(e,"P",{"data-svelte-h":!0}),h(b)!=="svelte-tdx8vg"&&(b.innerHTML=Te),q=n(e),p(C.$$.fragment,e),K=n(e),I=y(e,"P",{"data-svelte-h":!0}),h(I)!=="svelte-du9tjt"&&(I.innerHTML=je),D=n(e),p($.$$.fragment,e),O=n(e),k=y(e,"P",{"data-svelte-h":!0}),h(k)!=="svelte-q5c85f"&&(k.textContent=Je),ee=n(e),p(v.$$.fragment,e),le=n(e),G=y(e,"P",{"data-svelte-h":!0}),h(G)!=="svelte-iynlxo"&&(G.innerHTML=de),te=n(e),p(A.$$.fragment,e),ae=n(e),Z=y(e,"P",{"data-svelte-h":!0}),h(Z)!=="svelte-1d3iqab"&&(Z.textContent=we),se=n(e),p(B.$$.fragment,e),ne=n(e),p(E.$$.fragment,e),ie=n(e),_=y(e,"P",{"data-svelte-h":!0}),h(_)!=="svelte-hhge1w"&&(_.innerHTML=Ue),pe=n(e),p(X.$$.fragment,e),Me=n(e),W=y(e,"P",{"data-svelte-h":!0}),h(W)!=="svelte-1eelx5z"&&(W.innerHTML=fe),re=n(e),p(R.$$.fragment,e),oe=n(e),F=y(e,"P",{}),ge(F).forEach(t),this.h()},h(){be(u,"name","hf:doc:metadata"),be(u,"content",_e)},m(e,l){Ae(document.head,u),a(e,Y,l),a(e,V,l),a(e,x,l),M(j,e,l),a(e,N,l),M(J,e,l),a(e,P,l),a(e,d,l),a(e,H,l),a(e,w,l),a(e,S,l),M(U,e,l),a(e,Q,l),M(f,e,l),a(e,z,l),M(g,e,l),a(e,L,l),a(e,b,l),a(e,q,l),M(C,e,l),a(e,K,l),a(e,I,l),a(e,D,l),M($,e,l),a(e,O,l),a(e,k,l),a(e,ee,l),M(v,e,l),a(e,le,l),a(e,G,l),a(e,te,l),M(A,e,l),a(e,ae,l),a(e,Z,l),a(e,se,l),M(B,e,l),a(e,ne,l),M(E,e,l),a(e,ie,l),a(e,_,l),a(e,pe,l),M(X,e,l),a(e,Me,l),a(e,W,l),a(e,re,l),M(R,e,l),a(e,oe,l),a(e,F,l),me=!0},p:Ie,i(e){me||(r(j.$$.fragment,e),r(J.$$.fragment,e),r(U.$$.fragment,e),r(f.$$.fragment,e),r(g.$$.fragment,e),r(C.$$.fragment,e),r($.$$.fragment,e),r(v.$$.fragment,e),r(A.$$.fragment,e),r(B.$$.fragment,e),r(E.$$.fragment,e),r(X.$$.fragment,e),r(R.$$.fragment,e),me=!0)},o(e){o(j.$$.fragment,e),o(J.$$.fragment,e),o(U.$$.fragment,e),o(f.$$.fragment,e),o(g.$$.fragment,e),o(C.$$.fragment,e),o($.$$.fragment,e),o(v.$$.fragment,e),o(A.$$.fragment,e),o(B.$$.fragment,e),o(E.$$.fragment,e),o(X.$$.fragment,e),o(R.$$.fragment,e),me=!1},d(e){e&&(t(Y),t(V),t(x),t(N),t(P),t(d),t(H),t(w),t(S),t(Q),t(z),t(L),t(b),t(q),t(K),t(I),t(D),t(O),t(k),t(ee),t(le),t(G),t(te),t(ae),t(Z),t(se),t(ne),t(ie),t(_),t(pe),t(Me),t(W),t(re),t(oe),t(F)),t(u),m(j,e),m(J,e),m(U,e),m(f,e),m(g,e),m(C,e),m($,e),m(v,e),m(A,e),m(B,e),m(E,e),m(X,e),m(R,e)}}}const _e='{"title":"Using the Python API","local":"using-the-python-api","sections":[{"title":"Key Components","local":"key-components","sections":[{"title":"EvaluationTracker","local":"evaluationtracker","sections":[],"depth":3},{"title":"PipelineParameters","local":"pipelineparameters","sections":[],"depth":3},{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":3},{"title":"Pipeline","local":"pipeline","sections":[],"depth":3}],"depth":2},{"title":"Running Multiple Tasks","local":"running-multiple-tasks","sections":[],"depth":2},{"title":"Custom Tasks","local":"custom-tasks","sections":[],"depth":2}],"depth":1}';function Xe(ye){return $e(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ye extends ke{constructor(u){super(),ve(this,u,Xe,Ee,Ce,{})}}export{Ye as component};

Xet Storage Details

Size:
14.4 kB
·
Xet hash:
654cecf978df1a060a1bd24d74a37ce8d0de99b20ed62191a276ba4d6a40edf7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.