Buckets:
| import{s as He,o as Re,n as ze}from"../chunks/scheduler.7da89386.js";import{S as Le,i as Fe,g as i,s as a,r,A as Se,h as o,f as t,c as n,j as Ye,u as d,x as p,k as Ue,y as Ne,a as s,v as c,d as M,t as u,w as h}from"../chunks/index.20910acc.js";import{T as Qe}from"../chunks/Tip.53e22153.js";import{C as je}from"../chunks/CodeBlock.143bd81e.js";import{H as f,E as Pe}from"../chunks/getInferenceSnippets.fc2ce523.js";function qe(Q){let m,J="You can find a complete example of a custom model implementation in <code>examples/custom_models/google_translate_model.py</code>.";return{c(){m=i("p"),m.innerHTML=J},l(y){m=o(y,"P",{"data-svelte-h":!0}),p(m)!=="svelte-1s5ktfo"&&(m.innerHTML=J)},m(y,S){s(y,m,S)},p:ze,d(y){y&&t(m)}}}function De(Q){let m,J,y,S,g,P,b,Ce="Lighteval allows you to evaluate custom model implementations by creating a custom model class that inherits from <code>LightevalModel</code>. This is useful when you want to evaluate models that aren’t directly supported by the standard backends (transformers, vllm, etc).",q,U,D,j,$e="<li>Create a Python file containing your custom model implementation. The model must inherit from <code>LightevalModel</code> and implement all required methods.</li>",K,C,Ie="Here’s a basic example:",O,$,ee,T,ve="<li>The custom model file should contain exactly one class that inherits from <code>LightevalModel</code>. This class will be automatically detected and instantiated when loading the model.</li>",le,w,te,I,se,v,Ze="You can evaluate your custom model using either the command line interface or the Python API.",ae,Z,ne,_,ie,B,_e="The command takes three required arguments:",oe,W,Be="<li>The model name (used for tracking in results/logs)</li> <li>The path to your model implementation file</li> <li>The tasks to evaluate on (same format as other backends)</li>",me,x,pe,G,re,k,de,X,We="Your custom model must implement these core methods:",ce,V,xe="<li><code>greedy_until</code>: For generating text until a stop sequence or max tokens is reached</li> <li><code>loglikelihood</code>: For computing log probabilities of specific continuations</li> <li><code>loglikelihood_rolling</code>: For computing rolling log probabilities of sequences</li> <li><code>loglikelihood_single_token</code>: For computing log probabilities of single tokens</li>",Me,E,Ge="See the <code>LightevalModel</code> base class documentation for detailed method signatures and requirements.",ue,A,he,Y,ke="<li><p><strong>Error Handling</strong>: Implement robust error handling in your model methods to gracefully handle edge cases.</p></li> <li><p><strong>Batching</strong>: Consider implementing efficient batching in your model methods to improve performance.</p></li> <li><p><strong>Resource Management</strong>: Properly manage any resources (e.g., API connections, model weights) in your model’s <code>__init__</code> and <code>__del__</code> methods.</p></li> <li><p><strong>Documentation</strong>: Add clear docstrings to your model class and methods explaining any specific requirements or limitations.</p></li>",ye,H,Te,R,Xe="Custom models are particularly useful for:",we,z,Ve="<li>Evaluating models accessed through custom APIs</li> <li>Wrapping models with specialized preprocessing/postprocessing</li> <li>Testing novel model architectures</li> <li>Evaluating ensemble models</li> <li>Integrating with external services or tools</li>",fe,L,Ee="For a complete example of a custom model that wraps the Google Translate API, see <code>examples/custom_models/google_translate_model.py</code>.",Je,F,ge,N,be;return g=new f({props:{title:"Evaluating a Custom Model",local:"evaluating-a-custom-model",headingTag:"h1"}}),U=new f({props:{title:"Creating a Custom Model",local:"creating-a-custom-model",headingTag:"h2"}}),$=new je({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5tb2RlbHMuYWJzdHJhY3RfbW9kZWwlMjBpbXBvcnQlMjBMaWdodGV2YWxNb2RlbCUwQSUwQWNsYXNzJTIwTXlDdXN0b21Nb2RlbChMaWdodGV2YWxNb2RlbCklM0ElMEElMjAlMjAlMjAlMjBkZWYlMjBfX2luaXRfXyhzZWxmJTJDJTIwY29uZmlnKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN1cGVyKCkuX19pbml0X18oY29uZmlnKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEluaXRpYWxpemUlMjB5b3VyJTIwbW9kZWwlMjBoZXJlLi4uJTBBJTBBJTIwJTIwJTIwJTIwZGVmJTIwZ3JlZWR5X3VudGlsKHNlbGYlMkMlMjByZXF1ZXN0cyUyQyUyMG1heF90b2tlbnMlM0ROb25lJTJDJTIwc3RvcF9zZXF1ZW5jZXMlM0ROb25lKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEltcGxlbWVudCUyMGdlbmVyYXRpb24lMjBsb2dpYyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhc3MlMEElMEElMjAlMjAlMjAlMjBkZWYlMjBsb2dsaWtlbGlob29kKHNlbGYlMkMlMjByZXF1ZXN0cyUyQyUyMGxvZyUzRFRydWUpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwSW1wbGVtZW50JTIwbG9nbGlrZWxpaG9vZCUyMGNvbXB1dGF0aW9uJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcGFzcyUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGxvZ2xpa2VsaWhvb2Rfcm9sbGluZyhzZWxmJTJDJTIwcmVxdWVzdHMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwSW1wbGVtZW50JTIwcm9sbGluZyUyMGxvZ2xpa2VsaWhvb2QlMjBjb21wdXRhdGlvbiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHBhc3MlMEElMEElMjAlMjAlMjAlMjBkZWYlMjBsb2dsaWtlbGlob29kX3NpbmdsZV90b2tlbihzZWxmJTJDJTIwcmVxdWVzdHMpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwSW1wbGVtZW50JTIwc2luZ2xlJTIwdG9rZW4lMjBsb2dsaWtlbGlob29kJTIwY29tcHV0YXRpb24lMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBwYXNz",highlighted:`<span class="hljs-keyword">from</span> lighteval.models.abstract_model <span class="hljs-keyword">import</span> LightevalModel | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">MyCustomModel</span>(<span class="hljs-title class_ inherited__">LightevalModel</span>): | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, config</span>): | |
| <span class="hljs-built_in">super</span>().__init__(config) | |
| <span class="hljs-comment"># Initialize your model here...</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">greedy_until</span>(<span class="hljs-params">self, requests, max_tokens=<span class="hljs-literal">None</span>, stop_sequences=<span class="hljs-literal">None</span></span>): | |
| <span class="hljs-comment"># Implement generation logic</span> | |
| <span class="hljs-keyword">pass</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">loglikelihood</span>(<span class="hljs-params">self, requests, log=<span class="hljs-literal">True</span></span>): | |
| <span class="hljs-comment"># Implement loglikelihood computation</span> | |
| <span class="hljs-keyword">pass</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">loglikelihood_rolling</span>(<span class="hljs-params">self, requests</span>): | |
| <span class="hljs-comment"># Implement rolling loglikelihood computation</span> | |
| <span class="hljs-keyword">pass</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">loglikelihood_single_token</span>(<span class="hljs-params">self, requests</span>): | |
| <span class="hljs-comment"># Implement single token loglikelihood computation</span> | |
| <span class="hljs-keyword">pass</span>`,wrap:!1}}),w=new Qe({props:{warning:!1,$$slots:{default:[qe]},$$scope:{ctx:Q}}}),I=new f({props:{title:"Running the Evaluation",local:"running-the-evaluation",headingTag:"h2"}}),Z=new f({props:{title:"Using the Command Line",local:"using-the-command-line",headingTag:"h3"}}),_=new je({props:{code:"bGlnaHRldmFsJTIwY3VzdG9tJTIwJTVDJTBBJTIwJTIwJTIwJTIwJTIyZ29vZ2xlLXRyYW5zbGF0ZSUyMiUyMCU1QyUwQSUyMCUyMCUyMCUyMCUyMmV4YW1wbGVzJTJGY3VzdG9tX21vZGVscyUyRmdvb2dsZV90cmFuc2xhdGVfbW9kZWwucHklMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAlMjJsaWdodGV2YWwlN0N3bXQyMCUzQWZyLWRlJTdDMCU3QzAlMjIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC1zYW1wbGVzJTIwMTA=",highlighted:`lighteval custom \\ | |
| <span class="hljs-string">"google-translate"</span> \\ | |
| <span class="hljs-string">"examples/custom_models/google_translate_model.py"</span> \\ | |
| <span class="hljs-string">"lighteval|wmt20:fr-de|0|0"</span> \\ | |
| --max-samples 10`,wrap:!1}}),x=new f({props:{title:"Using the Python API",local:"using-the-python-api",headingTag:"h3"}}),G=new je({props:{code:"ZnJvbSUyMGxpZ2h0ZXZhbC5sb2dnaW5nLmV2YWx1YXRpb25fdHJhY2tlciUyMGltcG9ydCUyMEV2YWx1YXRpb25UcmFja2VyJTBBZnJvbSUyMGxpZ2h0ZXZhbC5tb2RlbHMuY3VzdG9tLmN1c3RvbV9tb2RlbCUyMGltcG9ydCUyMEN1c3RvbU1vZGVsQ29uZmlnJTBBZnJvbSUyMGxpZ2h0ZXZhbC5waXBlbGluZSUyMGltcG9ydCUyMFBpcGVsaW5lJTJDJTIwUGlwZWxpbmVQYXJhbWV0ZXJzJTBBJTBBJTIzJTIwU2V0JTIwdXAlMjBldmFsdWF0aW9uJTIwdHJhY2tpbmclMEFldmFsdWF0aW9uX3RyYWNrZXIlMjAlM0QlMjBFdmFsdWF0aW9uVHJhY2tlciglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEJTIycmVzdWx0cyUyMiUyQyUwQSUyMCUyMCUyMCUyMHNhdmVfZGV0YWlscyUzRFRydWUlMEEpJTBBJTBBJTIzJTIwQ29uZmlndXJlJTIwdGhlJTIwcGlwZWxpbmUlMEFwaXBlbGluZV9wYXJhbXMlMjAlM0QlMjBQaXBlbGluZVBhcmFtZXRlcnMoJTBBJTIwJTIwJTIwJTIwbGF1bmNoZXJfdHlwZSUzRFBhcmFsbGVsaXNtTWFuYWdlci5DVVNUT00lMkMlMEEpJTBBJTBBJTIzJTIwQ29uZmlndXJlJTIweW91ciUyMGN1c3RvbSUyMG1vZGVsJTBBbW9kZWxfY29uZmlnJTIwJTNEJTIwQ3VzdG9tTW9kZWxDb25maWcoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJteS1jdXN0b20tbW9kZWwlMjIlMkMlMEElMjAlMjAlMjAlMjBtb2RlbF9kZWZpbml0aW9uX2ZpbGVfcGF0aCUzRCUyMnBhdGglMkZ0byUyRm15X21vZGVsLnB5JTIyJTBBKSUwQSUwQSUyMyUyMENyZWF0ZSUyMGFuZCUyMHJ1biUyMHRoZSUyMHBpcGVsaW5lJTBBcGlwZWxpbmUlMjAlM0QlMjBQaXBlbGluZSglMEElMjAlMjAlMjAlMjB0YXNrcyUzRCUyMmxlYWRlcmJvYXJkJTdDdHJ1dGhmdWxxYSUzQW1jJTdDMCU3QzAlMjIlMkMlMEElMjAlMjAlMjAlMjBwaXBlbGluZV9wYXJhbWV0ZXJzJTNEcGlwZWxpbmVfcGFyYW1zJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl90cmFja2VyJTNEZXZhbHVhdGlvbl90cmFja2VyJTJDJTBBJTIwJTIwJTIwJTIwbW9kZWxfY29uZmlnJTNEbW9kZWxfY29uZmlnJTBBKSUwQSUwQXBpcGVsaW5lLmV2YWx1YXRlKCklMEFwaXBlbGluZS5zYXZlX2FuZF9wdXNoX3Jlc3VsdHMoKQ==",highlighted:`<span class="hljs-keyword">from</span> lighteval.logging.evaluation_tracker <span class="hljs-keyword">import</span> EvaluationTracker | |
| <span class="hljs-keyword">from</span> lighteval.models.custom.custom_model <span class="hljs-keyword">import</span> CustomModelConfig | |
| <span class="hljs-keyword">from</span> lighteval.pipeline <span class="hljs-keyword">import</span> Pipeline, PipelineParameters | |
| <span class="hljs-comment"># Set up evaluation tracking</span> | |
| evaluation_tracker = EvaluationTracker( | |
| output_dir=<span class="hljs-string">"results"</span>, | |
| save_details=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># Configure the pipeline</span> | |
| pipeline_params = PipelineParameters( | |
| launcher_type=ParallelismManager.CUSTOM, | |
| ) | |
| <span class="hljs-comment"># Configure your custom model</span> | |
| model_config = CustomModelConfig( | |
| model=<span class="hljs-string">"my-custom-model"</span>, | |
| model_definition_file_path=<span class="hljs-string">"path/to/my_model.py"</span> | |
| ) | |
| <span class="hljs-comment"># Create and run the pipeline</span> | |
| pipeline = Pipeline( | |
| tasks=<span class="hljs-string">"leaderboard|truthfulqa:mc|0|0"</span>, | |
| pipeline_parameters=pipeline_params, | |
| evaluation_tracker=evaluation_tracker, | |
| model_config=model_config | |
| ) | |
| pipeline.evaluate() | |
| pipeline.save_and_push_results()`,wrap:!1}}),k=new f({props:{title:"Required Methods",local:"required-methods",headingTag:"h2"}}),A=new f({props:{title:"Best Practices",local:"best-practices",headingTag:"h2"}}),H=new f({props:{title:"Example Use Cases",local:"example-use-cases",headingTag:"h2"}}),F=new Pe({props:{source:"https://github.com/huggingface/lighteval/blob/main/docs/source/evaluating-a-custom-model.mdx"}}),{c(){m=i("meta"),J=a(),y=i("p"),S=a(),r(g.$$.fragment),P=a(),b=i("p"),b.innerHTML=Ce,q=a(),r(U.$$.fragment),D=a(),j=i("ol"),j.innerHTML=$e,K=a(),C=i("p"),C.textContent=Ie,O=a(),r($.$$.fragment),ee=a(),T=i("ol"),T.innerHTML=ve,le=a(),r(w.$$.fragment),te=a(),r(I.$$.fragment),se=a(),v=i("p"),v.textContent=Ze,ae=a(),r(Z.$$.fragment),ne=a(),r(_.$$.fragment),ie=a(),B=i("p"),B.textContent=_e,oe=a(),W=i("ul"),W.innerHTML=Be,me=a(),r(x.$$.fragment),pe=a(),r(G.$$.fragment),re=a(),r(k.$$.fragment),de=a(),X=i("p"),X.textContent=We,ce=a(),V=i("ul"),V.innerHTML=xe,Me=a(),E=i("p"),E.innerHTML=Ge,ue=a(),r(A.$$.fragment),he=a(),Y=i("ol"),Y.innerHTML=ke,ye=a(),r(H.$$.fragment),Te=a(),R=i("p"),R.textContent=Xe,we=a(),z=i("ul"),z.innerHTML=Ve,fe=a(),L=i("p"),L.innerHTML=Ee,Je=a(),r(F.$$.fragment),ge=a(),N=i("p"),this.h()},l(e){const l=Se("svelte-u9bgzb",document.head);m=o(l,"META",{name:!0,content:!0}),l.forEach(t),J=n(e),y=o(e,"P",{}),Ye(y).forEach(t),S=n(e),d(g.$$.fragment,e),P=n(e),b=o(e,"P",{"data-svelte-h":!0}),p(b)!=="svelte-2jwd2x"&&(b.innerHTML=Ce),q=n(e),d(U.$$.fragment,e),D=n(e),j=o(e,"OL",{"data-svelte-h":!0}),p(j)!=="svelte-5nepfb"&&(j.innerHTML=$e),K=n(e),C=o(e,"P",{"data-svelte-h":!0}),p(C)!=="svelte-1frc3d"&&(C.textContent=Ie),O=n(e),d($.$$.fragment,e),ee=n(e),T=o(e,"OL",{start:!0,"data-svelte-h":!0}),p(T)!=="svelte-1uhsqbo"&&(T.innerHTML=ve),le=n(e),d(w.$$.fragment,e),te=n(e),d(I.$$.fragment,e),se=n(e),v=o(e,"P",{"data-svelte-h":!0}),p(v)!=="svelte-1icw64v"&&(v.textContent=Ze),ae=n(e),d(Z.$$.fragment,e),ne=n(e),d(_.$$.fragment,e),ie=n(e),B=o(e,"P",{"data-svelte-h":!0}),p(B)!=="svelte-9ha93v"&&(B.textContent=_e),oe=n(e),W=o(e,"UL",{"data-svelte-h":!0}),p(W)!=="svelte-1dz6vvj"&&(W.innerHTML=Be),me=n(e),d(x.$$.fragment,e),pe=n(e),d(G.$$.fragment,e),re=n(e),d(k.$$.fragment,e),de=n(e),X=o(e,"P",{"data-svelte-h":!0}),p(X)!=="svelte-abp4ax"&&(X.textContent=We),ce=n(e),V=o(e,"UL",{"data-svelte-h":!0}),p(V)!=="svelte-z35vrb"&&(V.innerHTML=xe),Me=n(e),E=o(e,"P",{"data-svelte-h":!0}),p(E)!=="svelte-845vib"&&(E.innerHTML=Ge),ue=n(e),d(A.$$.fragment,e),he=n(e),Y=o(e,"OL",{"data-svelte-h":!0}),p(Y)!=="svelte-10no4pl"&&(Y.innerHTML=ke),ye=n(e),d(H.$$.fragment,e),Te=n(e),R=o(e,"P",{"data-svelte-h":!0}),p(R)!=="svelte-1rlntna"&&(R.textContent=Xe),we=n(e),z=o(e,"UL",{"data-svelte-h":!0}),p(z)!=="svelte-1qdupmh"&&(z.innerHTML=Ve),fe=n(e),L=o(e,"P",{"data-svelte-h":!0}),p(L)!=="svelte-ok3x77"&&(L.innerHTML=Ee),Je=n(e),d(F.$$.fragment,e),ge=n(e),N=o(e,"P",{}),Ye(N).forEach(t),this.h()},h(){Ue(m,"name","hf:doc:metadata"),Ue(m,"content",Ke),Ue(T,"start","2")},m(e,l){Ne(document.head,m),s(e,J,l),s(e,y,l),s(e,S,l),c(g,e,l),s(e,P,l),s(e,b,l),s(e,q,l),c(U,e,l),s(e,D,l),s(e,j,l),s(e,K,l),s(e,C,l),s(e,O,l),c($,e,l),s(e,ee,l),s(e,T,l),s(e,le,l),c(w,e,l),s(e,te,l),c(I,e,l),s(e,se,l),s(e,v,l),s(e,ae,l),c(Z,e,l),s(e,ne,l),c(_,e,l),s(e,ie,l),s(e,B,l),s(e,oe,l),s(e,W,l),s(e,me,l),c(x,e,l),s(e,pe,l),c(G,e,l),s(e,re,l),c(k,e,l),s(e,de,l),s(e,X,l),s(e,ce,l),s(e,V,l),s(e,Me,l),s(e,E,l),s(e,ue,l),c(A,e,l),s(e,he,l),s(e,Y,l),s(e,ye,l),c(H,e,l),s(e,Te,l),s(e,R,l),s(e,we,l),s(e,z,l),s(e,fe,l),s(e,L,l),s(e,Je,l),c(F,e,l),s(e,ge,l),s(e,N,l),be=!0},p(e,[l]){const Ae={};l&2&&(Ae.$$scope={dirty:l,ctx:e}),w.$set(Ae)},i(e){be||(M(g.$$.fragment,e),M(U.$$.fragment,e),M($.$$.fragment,e),M(w.$$.fragment,e),M(I.$$.fragment,e),M(Z.$$.fragment,e),M(_.$$.fragment,e),M(x.$$.fragment,e),M(G.$$.fragment,e),M(k.$$.fragment,e),M(A.$$.fragment,e),M(H.$$.fragment,e),M(F.$$.fragment,e),be=!0)},o(e){u(g.$$.fragment,e),u(U.$$.fragment,e),u($.$$.fragment,e),u(w.$$.fragment,e),u(I.$$.fragment,e),u(Z.$$.fragment,e),u(_.$$.fragment,e),u(x.$$.fragment,e),u(G.$$.fragment,e),u(k.$$.fragment,e),u(A.$$.fragment,e),u(H.$$.fragment,e),u(F.$$.fragment,e),be=!1},d(e){e&&(t(J),t(y),t(S),t(P),t(b),t(q),t(D),t(j),t(K),t(C),t(O),t(ee),t(T),t(le),t(te),t(se),t(v),t(ae),t(ne),t(ie),t(B),t(oe),t(W),t(me),t(pe),t(re),t(de),t(X),t(ce),t(V),t(Me),t(E),t(ue),t(he),t(Y),t(ye),t(Te),t(R),t(we),t(z),t(fe),t(L),t(Je),t(ge),t(N)),t(m),h(g,e),h(U,e),h($,e),h(w,e),h(I,e),h(Z,e),h(_,e),h(x,e),h(G,e),h(k,e),h(A,e),h(H,e),h(F,e)}}}const Ke='{"title":"Evaluating a Custom Model","local":"evaluating-a-custom-model","sections":[{"title":"Creating a Custom Model","local":"creating-a-custom-model","sections":[],"depth":2},{"title":"Running the Evaluation","local":"running-the-evaluation","sections":[{"title":"Using the Command Line","local":"using-the-command-line","sections":[],"depth":3},{"title":"Using the Python API","local":"using-the-python-api","sections":[],"depth":3}],"depth":2},{"title":"Required Methods","local":"required-methods","sections":[],"depth":2},{"title":"Best Practices","local":"best-practices","sections":[],"depth":2},{"title":"Example Use Cases","local":"example-use-cases","sections":[],"depth":2}],"depth":1}';function Oe(Q){return Re(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class nl extends Le{constructor(m){super(),Fe(this,m,Oe,De,He,{})}}export{nl as component}; | |
Xet Storage Details
- Size:
- 16.4 kB
- Xet hash:
- 99cf16727dfc651b71d0c12c8173cc68487fdd8bb2d51e9eb8bf8da37ed3531e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.