Buckets:

rtrm's picture
download
raw
14.1 kB
import{s as ge,n as be,o as Ie}from"../chunks/scheduler.7da89386.js";import{S as Ce,i as ve,g as i,s,r as p,A as ke,h as M,f as t,c as n,j as Ue,u as r,x as h,k as fe,y as $e,a,v as o,d as m,t as c,w as y}from"../chunks/index.20910acc.js";import{C as oe}from"../chunks/CodeBlock.143bd81e.js";import{H as T,E as Ae}from"../chunks/getInferenceSnippets.7cf363b6.js";function Ge(me){let u,F,R,Y,J,N,j,ce=`Lighteval can be used from a custom Python script. To evaluate a model, you will need to set up an
<a href="/docs/lighteval/pr_985/en/package_reference/evaluation_tracker#lighteval.logging.evaluation_tracker.EvaluationTracker">EvaluationTracker</a>, <a href="/docs/lighteval/pr_985/en/package_reference/pipeline#lighteval.pipeline.PipelineParameters">PipelineParameters</a>,
a <a href="package_reference/models"><code>model</code></a> or a <a href="package_reference/model_config"><code>model_config</code></a>,
and a <a href="/docs/lighteval/pr_985/en/package_reference/pipeline#lighteval.pipeline.Pipeline">Pipeline</a>.`,x,d,ye="After that, simply run the pipeline and save the results.",P,w,H,U,S,f,Q,g,ue="The <code>EvaluationTracker</code> handles logging and saving evaluation results. It can save results locally and optionally push them to the Hugging Face Hub.",z,b,L,I,he="<code>PipelineParameters</code> configures how the evaluation pipeline runs, including parallelism settings and task configuration.",q,C,K,v,Te="Model configurations define the model to be evaluated, including the model name, data type, and other model-specific parameters. Different backends (VLLM, Transformers, etc.) have their own configuration classes.",D,k,O,$,Je="The <code>Pipeline</code> orchestrates the entire evaluation process, taking the tasks, model configuration, and parameters to run the evaluation.",ee,A,le,G,je="You can evaluate on multiple tasks by providing a comma-separated list or a file path:",te,Z,ae,B,se,E,de="To use custom tasks, set the <code>custom_tasks_directory</code> parameter to the path containing your custom task definitions:",ne,_,ie,X,we='For more information on creating custom tasks, see the <a href="adding-a-custom-task">Adding a Custom Task</a> guide.',pe,W,Me,V,re;return J=new T({props:{title:"Using the Python API",local:"using-the-python-api",headingTag:"h1"}}),w=new oe({props:{code:"aW1wb3J0JTIwbGlnaHRldmFsJTBBZnJvbSUyMGxpZ2h0ZXZhbC5sb2dnaW5nLmV2YWx1YXRpb25fdHJhY2tlciUyMGltcG9ydCUyMEV2YWx1YXRpb25UcmFja2VyJTBBZnJvbSUyMGxpZ2h0ZXZhbC5tb2RlbHMudmxsbS52bGxtX21vZGVsJTIwaW1wb3J0JTIwVkxMTU1vZGVsQ29uZmlnJTBBZnJvbSUyMGxpZ2h0ZXZhbC5waXBlbGluZSUyMGltcG9ydCUyMFBhcmFsbGVsaXNtTWFuYWdlciUyQyUyMFBpcGVsaW5lJTJDJTIwUGlwZWxpbmVQYXJhbWV0ZXJzJTBBZnJvbSUyMGxpZ2h0ZXZhbC51dGlscy5pbXBvcnRzJTIwaW1wb3J0JTIwaXNfcGFja2FnZV9hdmFpbGFibGUlMEElMEFpZiUyMGlzX3BhY2thZ2VfYXZhaWxhYmxlKCUyMmFjY2VsZXJhdGUlMjIpJTNBJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGRhdGV0aW1lJTIwaW1wb3J0JTIwdGltZWRlbHRhJTBBJTIwJTIwJTIwJTIwZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEluaXRQcm9jZXNzR3JvdXBLd2FyZ3MlMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKGt3YXJnc19oYW5kbGVycyUzRCU1QkluaXRQcm9jZXNzR3JvdXBLd2FyZ3ModGltZW91dCUzRHRpbWVkZWx0YShzZWNvbmRzJTNEMzAwMCkpJTVEKSUwQWVsc2UlM0ElMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvciUyMCUzRCUyME5vbmUlMEElMEFkZWYlMjBtYWluKCklM0ElMEElMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlMjAlM0QlMjBFdmFsdWF0aW9uVHJhY2tlciglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIyLiUyRnJlc3VsdHMlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzYXZlX2RldGFpbHMlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcHVzaF90b19odWIlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaHViX3Jlc3VsdHNfb3JnJTNEJTIyeW91cl91c2VybmFtZSUyMiUyQyUyMCUyMCUyMyUyMFJlcGxhY2UlMjB3aXRoJTIweW91ciUyMGFjdHVhbCUyMHVzZXJuYW1lJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtcyUyMCUzRCUyMFBpcGVsaW5lUGFyYW1ldGVycyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsYXVuY2hlcl90eXBlJTNEUGFyYWxsZWxpc21NYW5hZ2VyLkFDQ0VMRVJBVEUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBjdXN0b21fdGFza3NfZGlyZWN0b3J5JTNETm9uZSUyQyUyMCUyMCUyMyUyMFNldCUyMHRvJTIwcGF0aCUyMGlmJTIwdXNpbmclMjBjdXN0b20lMjB0YXNrcyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFJlbW92ZSUyMHRoZSUyMHBhcmFtZXRlciUyMGJlbG93JTIwb25jZSUyMHlvdXIlMjBjb25maWd1cmF0aW9uJTIwaXMlMjB0ZXN0ZWQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXhfc2FtcGxlcyUzRDEwJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMG1vZGVsX2NvbmZpZyUyMCUzRCUyMFZMTE1Nb2RlbENvbmZpZyglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9uYW1lJTNEJTIySHVnZ2luZ0ZhY2VINCUyRnplcGh5ci03Yi1iZXRhJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZHR5cGUlM0QlMjJmbG9hdDE2JTIyJTJDJTBBJTIwJTIwJTIwJTIwKSUwQSUwQSUyMCUyMCUyMCUyMHRhc2slMjAlM0QlMjAlMjJsaWdodGV2YWwlN0Nnc204ayU3QzUlMjIlMEElMEElMjAlMjAlMjAlMjBwaXBlbGluZSUyMCUzRCUyMFBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRhc2tzJTNEdGFzayUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBpcGVsaW5lX3BhcmFtZXRlcnMlM0RwaXBlbGluZV9wYXJhbXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBldmFsdWF0aW9uX3RyYWNrZXIlM0RldmFsdWF0aW9uX3RyYWNrZXIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtb2RlbF9jb25maWclM0Rtb2RlbF9jb25maWclMkMlMEElMjAlMjAlMjAlMjApJTBBJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuZXZhbHVhdGUoKSUwQSUyMCUyMCUyMCUyMHBpcGVsaW5lLnNhdmVfYW5kX3B1c2hfcmVzdWx0cygpJTBBJTIwJTIwJTIwJTIwcGlwZWxpbmUuc2hvd19yZXN1bHRzKCklMEElMEFpZiUyMF9fbmFtZV9fJTIwJTNEJTNEJTIwJTIyX19tYWluX18lMjIlM0ElMEElMjAlMjAlMjAlMjBtYWluKCk=",highlighted:`<span class="hljs-keyword">import</span> lighteval
<span class="hljs-keyword">from</span> lighteval.logging.evaluation_tracker <span class="hljs-keyword">import</span> EvaluationTracker
<span class="hljs-keyword">from</span> lighteval.models.vllm.vllm_model <span class="hljs-keyword">import</span> VLLMModelConfig
<span class="hljs-keyword">from</span> lighteval.pipeline <span class="hljs-keyword">import</span> ParallelismManager, Pipeline, PipelineParameters
<span class="hljs-keyword">from</span> lighteval.utils.imports <span class="hljs-keyword">import</span> is_package_available
<span class="hljs-keyword">if</span> is_package_available(<span class="hljs-string">&quot;accelerate&quot;</span>):
<span class="hljs-keyword">from</span> datetime <span class="hljs-keyword">import</span> timedelta
<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=<span class="hljs-number">3000</span>))])
<span class="hljs-keyword">else</span>:
accelerator = <span class="hljs-literal">None</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>():
evaluation_tracker = EvaluationTracker(
output_dir=<span class="hljs-string">&quot;./results&quot;</span>,
save_details=<span class="hljs-literal">True</span>,
push_to_hub=<span class="hljs-literal">True</span>,
hub_results_org=<span class="hljs-string">&quot;your_username&quot;</span>, <span class="hljs-comment"># Replace with your actual username</span>
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
custom_tasks_directory=<span class="hljs-literal">None</span>, <span class="hljs-comment"># Set to path if using custom tasks</span>
<span class="hljs-comment"># Remove the parameter below once your configuration is tested</span>
max_samples=<span class="hljs-number">10</span>
)
model_config = VLLMModelConfig(
model_name=<span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>,
dtype=<span class="hljs-string">&quot;float16&quot;</span>,
)
task = <span class="hljs-string">&quot;lighteval|gsm8k|5&quot;</span>
pipeline = Pipeline(
tasks=task,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
pipeline.evaluate()
pipeline.save_and_push_results()
pipeline.show_results()
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
main()`,wrap:!1}}),U=new T({props:{title:"Key Components",local:"key-components",headingTag:"h2"}}),f=new T({props:{title:"EvaluationTracker",local:"evaluationtracker",headingTag:"h3"}}),b=new T({props:{title:"PipelineParameters",local:"pipelineparameters",headingTag:"h3"}}),C=new T({props:{title:"Model Configuration",local:"model-configuration",headingTag:"h3"}}),k=new T({props:{title:"Pipeline",local:"pipeline",headingTag:"h3"}}),A=new T({props:{title:"Running Multiple Tasks",local:"running-multiple-tasks",headingTag:"h2"}}),Z=new oe({props:{code:"JTIzJTIwTXVsdGlwbGUlMjB0YXNrcyUyMGFzJTIwY29tbWEtc2VwYXJhdGVkJTIwc3RyaW5nJTBBdGFza3MlMjAlM0QlMjAlMjJsaWdodGV2YWwlN0NhaW1lMjQlN0MwJTJDbGlnaHRldmFsJTdDYWltZTI1JTdDMCUyMiUwQSUwQSUyMyUyME9yJTIwbG9hZCUyMGZyb20lMjBhJTIwZmlsZSUwQXRhc2tzJTIwJTNEJTIwJTIyLiUyRnBhdGglMkZ0byUyRnRhc2tzLnR4dCUyMiUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwUGlwZWxpbmUoJTBBJTIwJTIwJTIwJTIwdGFza3MlM0R0YXNrcyUyQyUwQSUyMCUyMCUyMCUyMCUyMyUyMC4uLiUyMG90aGVyJTIwcGFyYW1ldGVycyUwQSk=",highlighted:`<span class="hljs-comment"># Multiple tasks as comma-separated string</span>
tasks = <span class="hljs-string">&quot;lighteval|aime24|0,lighteval|aime25|0&quot;</span>
<span class="hljs-comment"># Or load from a file</span>
tasks = <span class="hljs-string">&quot;./path/to/tasks.txt&quot;</span>
pipeline = Pipeline(
tasks=tasks,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),B=new T({props:{title:"Custom Tasks",local:"custom-tasks",headingTag:"h2"}}),_=new oe({props:{code:"cGlwZWxpbmVfcGFyYW1zJTIwJTNEJTIwUGlwZWxpbmVQYXJhbWV0ZXJzKCUwQSUyMCUyMCUyMCUyMGN1c3RvbV90YXNrc19kaXJlY3RvcnklM0QlMjIuJTJGcGF0aCUyRnRvJTJGY3VzdG9tJTJGdGFza3MlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjMlMjAuLi4lMjBvdGhlciUyMHBhcmFtZXRlcnMlMEEp",highlighted:`pipeline_params = PipelineParameters(
custom_tasks_directory=<span class="hljs-string">&quot;./path/to/custom/tasks&quot;</span>,
<span class="hljs-comment"># ... other parameters</span>
)`,wrap:!1}}),W=new Ae({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/using-the-python-api.mdx"}}),{c(){u=i("meta"),F=s(),R=i("p"),Y=s(),p(J.$$.fragment),N=s(),j=i("p"),j.innerHTML=ce,x=s(),d=i("p"),d.textContent=ye,P=s(),p(w.$$.fragment),H=s(),p(U.$$.fragment),S=s(),p(f.$$.fragment),Q=s(),g=i("p"),g.innerHTML=ue,z=s(),p(b.$$.fragment),L=s(),I=i("p"),I.innerHTML=he,q=s(),p(C.$$.fragment),K=s(),v=i("p"),v.textContent=Te,D=s(),p(k.$$.fragment),O=s(),$=i("p"),$.innerHTML=Je,ee=s(),p(A.$$.fragment),le=s(),G=i("p"),G.textContent=je,te=s(),p(Z.$$.fragment),ae=s(),p(B.$$.fragment),se=s(),E=i("p"),E.innerHTML=de,ne=s(),p(_.$$.fragment),ie=s(),X=i("p"),X.innerHTML=we,pe=s(),p(W.$$.fragment),Me=s(),V=i("p"),this.h()},l(e){const l=ke("svelte-u9bgzb",document.head);u=M(l,"META",{name:!0,content:!0}),l.forEach(t),F=n(e),R=M(e,"P",{}),Ue(R).forEach(t),Y=n(e),r(J.$$.fragment,e),N=n(e),j=M(e,"P",{"data-svelte-h":!0}),h(j)!=="svelte-lef0cv"&&(j.innerHTML=ce),x=n(e),d=M(e,"P",{"data-svelte-h":!0}),h(d)!=="svelte-cbze7g"&&(d.textContent=ye),P=n(e),r(w.$$.fragment,e),H=n(e),r(U.$$.fragment,e),S=n(e),r(f.$$.fragment,e),Q=n(e),g=M(e,"P",{"data-svelte-h":!0}),h(g)!=="svelte-tdx8vg"&&(g.innerHTML=ue),z=n(e),r(b.$$.fragment,e),L=n(e),I=M(e,"P",{"data-svelte-h":!0}),h(I)!=="svelte-du9tjt"&&(I.innerHTML=he),q=n(e),r(C.$$.fragment,e),K=n(e),v=M(e,"P",{"data-svelte-h":!0}),h(v)!=="svelte-q5c85f"&&(v.textContent=Te),D=n(e),r(k.$$.fragment,e),O=n(e),$=M(e,"P",{"data-svelte-h":!0}),h($)!=="svelte-iynlxo"&&($.innerHTML=Je),ee=n(e),r(A.$$.fragment,e),le=n(e),G=M(e,"P",{"data-svelte-h":!0}),h(G)!=="svelte-1d3iqab"&&(G.textContent=je),te=n(e),r(Z.$$.fragment,e),ae=n(e),r(B.$$.fragment,e),se=n(e),E=M(e,"P",{"data-svelte-h":!0}),h(E)!=="svelte-hhge1w"&&(E.innerHTML=de),ne=n(e),r(_.$$.fragment,e),ie=n(e),X=M(e,"P",{"data-svelte-h":!0}),h(X)!=="svelte-1eelx5z"&&(X.innerHTML=we),pe=n(e),r(W.$$.fragment,e),Me=n(e),V=M(e,"P",{}),Ue(V).forEach(t),this.h()},h(){fe(u,"name","hf:doc:metadata"),fe(u,"content",Ze)},m(e,l){$e(document.head,u),a(e,F,l),a(e,R,l),a(e,Y,l),o(J,e,l),a(e,N,l),a(e,j,l),a(e,x,l),a(e,d,l),a(e,P,l),o(w,e,l),a(e,H,l),o(U,e,l),a(e,S,l),o(f,e,l),a(e,Q,l),a(e,g,l),a(e,z,l),o(b,e,l),a(e,L,l),a(e,I,l),a(e,q,l),o(C,e,l),a(e,K,l),a(e,v,l),a(e,D,l),o(k,e,l),a(e,O,l),a(e,$,l),a(e,ee,l),o(A,e,l),a(e,le,l),a(e,G,l),a(e,te,l),o(Z,e,l),a(e,ae,l),o(B,e,l),a(e,se,l),a(e,E,l),a(e,ne,l),o(_,e,l),a(e,ie,l),a(e,X,l),a(e,pe,l),o(W,e,l),a(e,Me,l),a(e,V,l),re=!0},p:be,i(e){re||(m(J.$$.fragment,e),m(w.$$.fragment,e),m(U.$$.fragment,e),m(f.$$.fragment,e),m(b.$$.fragment,e),m(C.$$.fragment,e),m(k.$$.fragment,e),m(A.$$.fragment,e),m(Z.$$.fragment,e),m(B.$$.fragment,e),m(_.$$.fragment,e),m(W.$$.fragment,e),re=!0)},o(e){c(J.$$.fragment,e),c(w.$$.fragment,e),c(U.$$.fragment,e),c(f.$$.fragment,e),c(b.$$.fragment,e),c(C.$$.fragment,e),c(k.$$.fragment,e),c(A.$$.fragment,e),c(Z.$$.fragment,e),c(B.$$.fragment,e),c(_.$$.fragment,e),c(W.$$.fragment,e),re=!1},d(e){e&&(t(F),t(R),t(Y),t(N),t(j),t(x),t(d),t(P),t(H),t(S),t(Q),t(g),t(z),t(L),t(I),t(q),t(K),t(v),t(D),t(O),t($),t(ee),t(le),t(G),t(te),t(ae),t(se),t(E),t(ne),t(ie),t(X),t(pe),t(Me),t(V)),t(u),y(J,e),y(w,e),y(U,e),y(f,e),y(b,e),y(C,e),y(k,e),y(A,e),y(Z,e),y(B,e),y(_,e),y(W,e)}}}const Ze='{"title":"Using the Python API","local":"using-the-python-api","sections":[{"title":"Key Components","local":"key-components","sections":[{"title":"EvaluationTracker","local":"evaluationtracker","sections":[],"depth":3},{"title":"PipelineParameters","local":"pipelineparameters","sections":[],"depth":3},{"title":"Model Configuration","local":"model-configuration","sections":[],"depth":3},{"title":"Pipeline","local":"pipeline","sections":[],"depth":3}],"depth":2},{"title":"Running Multiple Tasks","local":"running-multiple-tasks","sections":[],"depth":2},{"title":"Custom Tasks","local":"custom-tasks","sections":[],"depth":2}],"depth":1}';function Be(me){return Ie(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Re extends Ce{constructor(u){super(),ve(this,u,Be,Ge,ge,{})}}export{Re as component};

Xet Storage Details

Size:
14.1 kB
·
Xet hash:
ea9dce7f5953c33e00177c703260f6ed55ce2814864f487a56415bd738dc7682

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.