Buckets:

download
raw
14.3 kB
import{s as be,n as $e,o as ke}from"../chunks/scheduler.3a17fb72.js";import{S as ve,i as Ae,e as c,s,c as i,h as Ie,a as y,d as t,b as n,f as ge,g as M,j,k as Ce,l as Ee,m as a,n as p,t as r,o,p as m}from"../chunks/index.093f8863.js";import{C as Ge,H as h,E as Ze}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.5e7ea2bd.js";import{C as ce}from"../chunks/CodeBlock.09235327.js";function Be(ye){let u,Y,F,x,d,P,T,H,J,ue=`Lighteval can be used from a custom Python script. To evaluate a model, you will need to set up an
<a href="/docs/lighteval/pr_1221/en/package_reference/logging#lighteval.logging.evaluation_tracker.EvaluationTracker">EvaluationTracker</a>, <a href="/docs/lighteval/pr_1221/en/package_reference/pipeline#lighteval.pipeline.PipelineParameters">PipelineParameters</a>,
a <a href="package_reference/models"><code>model</code></a> or a <a href="package_reference/model_config"><code>model_config</code></a>,
and a <a href="/docs/lighteval/pr_1221/en/package_reference/pipeline#lighteval.pipeline.Pipeline">Pipeline</a>.`,N,U,je="After that, simply run the pipeline and save the results.",z,w,S,f,Q,g,L,C,he="The <code>EvaluationTracker</code> handles logging and saving evaluation results. It can save results locally and optionally push them to the Hugging Face Hub.",q,b,K,$,de="<code>PipelineParameters</code> configures how the evaluation pipeline runs, including parallelism settings and task configuration.",D,k,O,v,Te="Model configurations define the model to be evaluated, including the model name, data type, and other model-specific parameters. Different backends (VLLM, Transformers, etc.) have their own configuration classes.",ee,A,le,I,Je="The <code>Pipeline</code> orchestrates the entire evaluation process, taking the tasks, model configuration, and parameters to run the evaluation.",te,E,ae,G,Ue="You can evaluate on multiple tasks by providing a comma-separated list or a file path:",se,Z,ne,B,ie,_,we="To use custom tasks, set the <code>custom_tasks_directory</code> parameter to the path containing your custom task definitions:",Me,X,pe,W,fe='For more information on creating custom tasks, see the <a href="adding-a-custom-task">Adding a Custom Task</a> guide.',re,R,oe,V,me;return d=new Ge({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),T=new h({props:{title:"Using the Python API",local:"using-the-python-api",headingTag:"h1"}}),w=new ce({props:{code:"aW1wb3J0JTIwbGlnaHRldmFsJTBBZnJvbSUyMGxpZ2h0ZXZhbC5sb2dnaW5nLmV2YWx1YXRpb25fdHJhY2tlciUyMGltcG9ydCUyMEV2YWx1YXRpb25UcmFja2VyJTBBZnJvbSUyMGxpZ2h0ZXZhbC5tb2RlbHMudmxsbS52bGxtX21vZGVsJTIwaW1wb3J0JTIwVkxMTU1vZGVsQ29uZmlnJTBBZnJvbSUyMGxpZ2h0ZXZhbC5waXBlbGluZSUyMGltcG9ydCUyMFBhcmFsbGVsaXNtTWFuYWdlciUyQyUyMFBpcGVsaW5lJTJDJTIwUGlwZWxpbmVQYXJhbWV0ZXJzJTBBZnJvbSUyMGxpZ2h0ZXZhbC51dGlscy5pbXBvcnRzJTIwaW1wb3J0JTIwaXNfcGFja2FnZV9hdmFpbGFibGUlMEElMEFpZiUyMGlzX3BhY2thZ2VfYXZhaWxhYmxlKCUyMmFjY2VsZXJhdGUlMjIpJTNBJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGRhdGV0aW1lJTIwaW1wb3J0JTIwdGltZWRlbHRhJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEluaXRQcm9jZXNzR3JvdXBLd2FyZ3MlMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKGt3YXJnc19oYW5kbGVycyUzRCU1QkluaXRQcm9jZXNzR3JvdXBLd2FyZ3ModGltZW91dCUzRHRpbWVkZWx0YShzZWNvbmRzJTNEMzAwMCkpJTVEKSUwQWVsc2UlM0ElMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyME5vbmUlMEElMEFkZWYlMjBtYWluKCklM0ElMEElMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlMjAlM0QlMjBFdmFsdWF0aW9uVHJhY2tlciglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIyLiUyRnJlc3VsdHMlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzYXZlX2RldGFpbHMlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHVzaF90b19odWIlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaHViX3Jlc3VsdHNfb3JnJTNEJTIyeW91cl91c2VybmFtZSUyMiUyQyUyMCUyMCUyMyUyMFJlcGxhY2UlMjB3aXRoJTIweW91ciUyMGFjdHVhbCUyMHVzZXJuYW1lJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtcyUyMCUzRCUyMFBpcGVsaW5lUGFyYW1ldGVycyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsYXVuY2hlcl90eXBlJTNEUGFyYWxsZWxpc21NYW5hZ2VyLkFDQ0VMRVJBVEUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjdXN0b21fdGFza3NfZGlyZWN0b3J5JTNETm9uZSUyQyUyMCUyMCUyMyUyMFNldCUyMHRvJTIwcGF0aCUyMGlmJTIwdXNpbmclMjBjdXN0b20lMjB0YXNrcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFJlbW92ZSUyMHRoZSUyMHBhcmFtZXRlciUyMGJlbG93JTIwb25jZSUyMHlvdXIlMjBjb25maWd1cmF0aW9uJTIwaXMlMjB0ZXN0ZWQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfc2FtcGxlcyUzRDEwJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMG1vZGVsX2NvbmZpZyUyMCUzRCUyMFZMTE1Nb2RlbENvbmZpZyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTNEJTIySHVnZ2luZ0ZhY2VINCUyRnplcGh5ci03Yi1iZXRhJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZHR5cGUlM0QlMjJmbG9hdDE2JTIyJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHRhc2slMjAlM0QlMjAlMjJnc204ayU3QzUlMjIlMEElMEElMjAlMjAlMjAlMjBwaXBlbGluZSUyMCUzRCUyMFBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tzJTNEdGFzayUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtZXRlcnMlM0RwaXBlbGluZV9wYXJhbXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlM0RldmFsdWF0aW9uX3RyYWNrZXIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9jb25maWclM0Rtb2RlbF9jb25maWclMkMlMEElMjAlMjAlMjAlMjApJTBBJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuZXZhbHVhdGUoKSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lLnNhdmVfYW5kX3B1c2hfcmVzdWx0cygpJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuc2hvd19yZXN1bHRzKCklMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">import</span> lighteval
<span class="hljs-keyword">from</span> lighteval.logging.evaluation_tracker <span class="hljs-keyword">import</span> EvaluationTracker
<span class="hljs-keyword">from</span> lighteval.models.vllm.vllm_model <span class="hljs-keyword">import</span> VLLMModelConfig
<span class="hljs-keyword">from</span> lighteval.pipeline <span class="hljs-keyword">import</span> ParallelismManager, Pipeline, PipelineParameters
<span class="hljs-keyword">from</span> lighteval.utils.imports <span class="hljs-keyword">import</span> is_package_available
<span class="hljs-keyword">if</span> is_package_available(<span class="hljs-string">&quot;accelerate&quot;</span>):
<span class="hljs-keyword">from</span> datetime <span class="hljs-keyword">import</span> timedelta
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=<span class="hljs-number">3000</span>))])
<span class="hljs-keyword">else</span>:
accelerator = <span class="hljs-literal">None</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
evaluation_tracker = EvaluationTracker(
output_dir=<span class="hljs-string">&quot;./results&quot;</span>,
save_details=<span class="hljs-literal">True</span>,
push_to_hub=<span class="hljs-literal">True</span>,
hub_results_org=<span class="hljs-string">&quot;your_username&quot;</span>, <span class="hljs-comment"># Replace with your actual username</span>
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
custom_tasks_directory=<span class="hljs-literal">None</span>, <span class="hljs-comment"># Set to path if using custom tasks</span>
<span class="hljs-comment"># Remove the parameter below once your configuration is tested</span>
max_samples=<span class="hljs-number">10</span>
)
model_config = VLLMModelConfig(
model_name=<span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>,
dtype=<span class="hljs-string">&quot;float16&quot;</span>,
)
task = <span class="hljs-string">&quot;gsm8k|5&quot;</span>
pipeline = Pipeline(
tasks=task,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
pipeline.evaluate()
pipeline.save_and_push_results()
pipeline.show_results()
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()`,wrap:!1}}),f=new h({props:{title:"Key Components",local:"key-components",headingTag:"h2"}}),g=new h({props:{title:"EvaluationTracker",local:"evaluationtracker",headingTag:"h3"}}),b=new h({props:{title:"PipelineParameters",local:"pipelineparameters",headingTag:"h3"}}),k=new h({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h3"}}),A=new h({props:{title:"Pipeline",local:"pipeline",headingTag:"h3"}}),E=new h({props:{title:"Running Multiple Tasks",local:"running-multiple-tasks",headingTag:"h2"}}),Z=new ce({props:{code:"JTIzJTIwTXVsdGlwbGUlMjB0YXNrcyUyMGFzJTIwY29tbWEtc2VwYXJhdGVkJTIwc3RyaW5nJTBBdGFza3MlMjAlM0QlMjAlMjJhaW1lMjQlMkNhaW1lMjUlMjIlMEElMEElMjMlMjBPciUyMGxvYWQlMjBmcm9tJTIwYSUyMGZpbGUlMEF0YXNrcyUyMCUzRCUyMCUyMi4lMkZwYXRoJTJGdG8lMkZ0YXNrcy50eHQlMjIlMEElMEFwaXBlbGluZSUyMCUzRCUyMFBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMHRhc2tzJTNEdGFza3MlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjAuLi4lMjBvdGhlciUyMHBhcmFtZXRlcnMlMEEp",highlighted:`<span class="hljs-comment"># Multiple tasks as comma-separated string</span>
tasks = <span class="hljs-string">&quot;aime24,aime25&quot;</span>
<span class="hljs-comment"># Or load from a file</span>
tasks = <span class="hljs-string">&quot;./path/to/tasks.txt&quot;</span>
pipeline = Pipeline(
tasks=tasks,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),B=new h({props:{title:"Custom Tasks",local:"custom-tasks",headingTag:"h2"}}),X=new ce({props:{code:"cGlwZWxpbmVfcGFyYW1zJTIwJTNEJTIwUGlwZWxpbmVQYXJhbWV0ZXJzKCUwQSUyMCUyMCUyMCUyMGN1c3RvbV90YXNrc19kaXJlY3RvcnklM0QlMjIuJTJGcGF0aCUyRnRvJTJGY3VzdG9tJTJGdGFza3MlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjAuLi4lMjBvdGhlciUyMHBhcmFtZXRlcnMlMEEp",highlighted:`pipeline_params = PipelineParameters(
custom_tasks_directory=<span class="hljs-string">&quot;./path/to/custom/tasks&quot;</span>,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),R=new Ze({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/using-the-python-api.mdx"}}),{c(){u=c("meta"),Y=s(),F=c("p"),x=s(),i(d.$$.fragment),P=s(),i(T.$$.fragment),H=s(),J=c("p"),J.innerHTML=ue,N=s(),U=c("p"),U.textContent=je,z=s(),i(w.$$.fragment),S=s(),i(f.$$.fragment),Q=s(),i(g.$$.fragment),L=s(),C=c("p"),C.innerHTML=he,q=s(),i(b.$$.fragment),K=s(),$=c("p"),$.innerHTML=de,D=s(),i(k.$$.fragment),O=s(),v=c("p"),v.textContent=Te,ee=s(),i(A.$$.fragment),le=s(),I=c("p"),I.innerHTML=Je,te=s(),i(E.$$.fragment),ae=s(),G=c("p"),G.textContent=Ue,se=s(),i(Z.$$.fragment),ne=s(),i(B.$$.fragment),ie=s(),_=c("p"),_.innerHTML=we,Me=s(),i(X.$$.fragment),pe=s(),W=c("p"),W.innerHTML=fe,re=s(),i(R.$$.fragment),oe=s(),V=c("p"),this.h()},l(e){const l=Ie("svelte-u9bgzb",document.head);u=y(l,"META",{name:!0,content:!0}),l.forEach(t),Y=n(e),F=y(e,"P",{}),ge(F).forEach(t),x=n(e),M(d.$$.fragment,e),P=n(e),M(T.$$.fragment,e),H=n(e),J=y(e,"P",{"data-svelte-h":!0}),j(J)!=="svelte-cqwumb"&&(J.innerHTML=ue),N=n(e),U=y(e,"P",{"data-svelte-h":!0}),j(U)!=="svelte-cbze7g"&&(U.textContent=je),z=n(e),M(w.$$.fragment,e),S=n(e),M(f.$$.fragment,e),Q=n(e),M(g.$$.fragment,e),L=n(e),C=y(e,"P",{"data-svelte-h":!0}),j(C)!=="svelte-tdx8vg"&&(C.innerHTML=he),q=n(e),M(b.$$.fragment,e),K=n(e),$=y(e,"P",{"data-svelte-h":!0}),j($)!=="svelte-du9tjt"&&($.innerHTML=de),D=n(e),M(k.$$.fragment,e),O=n(e),v=y(e,"P",{"data-svelte-h":!0}),j(v)!=="svelte-q5c85f"&&(v.textContent=Te),ee=n(e),M(A.$$.fragment,e),le=n(e),I=y(e,"P",{"data-svelte-h":!0}),j(I)!=="svelte-iynlxo"&&(I.innerHTML=Je),te=n(e),M(E.$$.fragment,e),ae=n(e),G=y(e,"P",{"data-svelte-h":!0}),j(G)!=="svelte-1d3iqab"&&(G.textContent=Ue),se=n(e),M(Z.$$.fragment,e),ne=n(e),M(B.$$.fragment,e),ie=n(e),_=y(e,"P",{"data-svelte-h":!0}),j(_)!=="svelte-hhge1w"&&(_.innerHTML=we),Me=n(e),M(X.$$.fragment,e),pe=n(e),W=y(e,"P",{"data-svelte-h":!0}),j(W)!=="svelte-1eelx5z"&&(W.innerHTML=fe),re=n(e),M(R.$$.fragment,e),oe=n(e),V=y(e,"P",{}),ge(V).forEach(t),this.h()},h(){Ce(u,"name","hf:doc:metadata"),Ce(u,"content",_e)},m(e,l){Ee(document.head,u),a(e,Y,l),a(e,F,l),a(e,x,l),p(d,e,l),a(e,P,l),p(T,e,l),a(e,H,l),a(e,J,l),a(e,N,l),a(e,U,l),a(e,z,l),p(w,e,l),a(e,S,l),p(f,e,l),a(e,Q,l),p(g,e,l),a(e,L,l),a(e,C,l),a(e,q,l),p(b,e,l),a(e,K,l),a(e,$,l),a(e,D,l),p(k,e,l),a(e,O,l),a(e,v,l),a(e,ee,l),p(A,e,l),a(e,le,l),a(e,I,l),a(e,te,l),p(E,e,l),a(e,ae,l),a(e,G,l),a(e,se,l),p(Z,e,l),a(e,ne,l),p(B,e,l),a(e,ie,l),a(e,_,l),a(e,Me,l),p(X,e,l),a(e,pe,l),a(e,W,l),a(e,re,l),p(R,e,l),a(e,oe,l),a(e,V,l),me=!0},p:$e,i(e){me||(r(d.$$.fragment,e),r(T.$$.fragment,e),r(w.$$.fragment,e),r(f.$$.fragment,e),r(g.$$.fragment,e),r(b.$$.fragment,e),r(k.$$.fragment,e),r(A.$$.fragment,e),r(E.$$.fragment,e),r(Z.$$.fragment,e),r(B.$$.fragment,e),r(X.$$.fragment,e),r(R.$$.fragment,e),me=!0)},o(e){o(d.$$.fragment,e),o(T.$$.fragment,e),o(w.$$.fragment,e),o(f.$$.fragment,e),o(g.$$.fragment,e),o(b.$$.fragment,e),o(k.$$.fragment,e),o(A.$$.fragment,e),o(E.$$.fragment,e),o(Z.$$.fragment,e),o(B.$$.fragment,e),o(X.$$.fragment,e),o(R.$$.fragment,e),me=!1},d(e){e&&(t(Y),t(F),t(x),t(P),t(H),t(J),t(N),t(U),t(z),t(S),t(Q),t(L),t(C),t(q),t(K),t($),t(D),t(O),t(v),t(ee),t(le),t(I),t(te),t(ae),t(G),t(se),t(ne),t(ie),t(_),t(Me),t(pe),t(W),t(re),t(oe),t(V)),t(u),m(d,e),m(T,e),m(w,e),m(f,e),m(g,e),m(b,e),m(k,e),m(A,e),m(E,e),m(Z,e),m(B,e),m(X,e),m(R,e)}}}const _e='{"title":"Using the Python API","local":"using-the-python-api","sections":[{"title":"Key Components","local":"key-components","sections":[{"title":"EvaluationTracker","local":"evaluationtracker","sections":[],"depth":3},{"title":"PipelineParameters","local":"pipelineparameters","sections":[],"depth":3},{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":3},{"title":"Pipeline","local":"pipeline","sections":[],"depth":3}],"depth":2},{"title":"Running Multiple Tasks","local":"running-multiple-tasks","sections":[],"depth":2},{"title":"Custom Tasks","local":"custom-tasks","sections":[],"depth":2}],"depth":1}';function Xe(ye){return ke(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ye extends ve{constructor(u){super(),Ae(this,u,Xe,Be,be,{})}}export{Ye as component};

Xet Storage Details

Size:
14.3 kB
·
Xet hash:
73a2347205aa6e60168a917e7f3244bedb640313b11b720a0e038c9d7a5ab5d2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.