Buckets:
| import{s as Ye,c as He,u as Le,g as De,d as Pe,e as Oe,A as de,o as _e,f as Ke,n as Zl}from"../chunks/scheduler.37c15a92.js";import{S as qe,i as Fe,g as w,s as c,h as I,j as Ol,f as e,c as o,k as Wl,a as t,d as b,t as C,z as lt,m as z,n as N,y as be,D as et,o as tt,r as f,A as st,u as B,x as G,v as g,w as Z}from"../chunks/index.2bf4358c.js";import{T as he}from"../chunks/Tip.363c041f.js";import{C as V}from"../chunks/CodeBlock.4e987730.js";import{H as yl,E as nt}from"../chunks/getInferenceSnippets.24b50994.js";import{e as ke}from"../chunks/each.e59479a4.js";import{s as Ie}from"../chunks/stores.cb4752a8.js";function at(v,m){const p=new URL(window.location.href),j=new URLSearchParams(p.search);j.set(v,m),p.search=j.toString(),history.replaceState(null,"",p.toString())}function Mt(v){const m=new URL(window.location.href);return new URLSearchParams(m.search).get(v)}function Re(v,m,p){const j=v.slice();return j[7]=m[p],j}function xe(v){let m,p=v[7]+"",j,J,u,y,M;function U(){return v[6](v[7])}return{c(){m=w("div"),j=z(p),J=c(),this.h()},l(r){m=I(r,"DIV",{class:!0});var i=Ol(m);j=N(i,p),J=o(i),i.forEach(e),this.h()},h(){Wl(m,"class",u="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd "+(v[2][v[0]]===v[7]?"border-gray-800 bg-black dark:bg-gray-700 text-white":"text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm"))},m(r,i){t(r,m,i),be(m,j),be(m,J),y||(M=et(m,"click",U),y=!0)},p(r,i){v=r,i&2&&p!==(p=v[7]+"")&&tt(j,p),i&7&&u!==(u="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd "+(v[2][v[0]]===v[7]?"border-gray-800 bg-black dark:bg-gray-700 text-white":"text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm"))&&Wl(m,"class",u)},d(r){r&&e(m),y=!1,M()}}}function it(v){let m,p,j,J,u=ke(v[1]),y=[];for(let r=0;r<u.length;r+=1)y[r]=xe(Re(v,u,r));const M=v[5].default,U=He(M,v,v[4],null);return{c(){m=w("div");for(let r=0;r<y.length;r+=1)y[r].c();p=c(),j=w("div"),U&&U.c(),this.h()},l(r){m=I(r,"DIV",{class:!0});var i=Ol(m);for(let A=0;A<y.length;A+=1)y[A].l(i);i.forEach(e),p=o(r),j=I(r,"DIV",{class:!0});var Q=Ol(j);U&&U.l(Q),Q.forEach(e),this.h()},h(){Wl(m,"class","flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"),Wl(j,"class","language-select")},m(r,i){t(r,m,i);for(let Q=0;Q<y.length;Q+=1)y[Q]&&y[Q].m(m,null);t(r,p,i),t(r,j,i),U&&U.m(j,null),J=!0},p(r,[i]){if(i&15){u=ke(r[1]);let Q;for(Q=0;Q<u.length;Q+=1){const A=Re(r,u,Q);y[Q]?y[Q].p(A,i):(y[Q]=xe(A),y[Q].c(),y[Q].m(m,null))}for(;Q<y.length;Q+=1)y[Q].d(1);y.length=u.length}U&&U.p&&(!J||i&16)&&Le(U,M,r,r[4],J?Pe(M,r[4],i,null):De(r[4]),null)},i(r){J||(b(U,r),J=!0)},o(r){C(U,r),J=!1},d(r){r&&(e(m),e(p),e(j)),lt(y,r),U&&U.d(r)}}}function pt(v,m,p){let j;Oe(v,Ie,i=>p(2,j=i));let{$$slots:J={},$$scope:u}=m,{id:y}=m,{options:M}=m;de(Ie,j[y]=M[0],j);function U(i){de(Ie,j[y]=i,j),at(y,i)}_e(()=>{const i=Mt(y);i&&M.includes(i)&&de(Ie,j[y]=i,j)});const r=i=>U(i);return v.$$set=i=>{"id"in i&&p(0,y=i.id),"options"in i&&p(1,M=i.options),"$$scope"in i&&p(4,u=i.$$scope)},[y,M,j,U,u,J,r]}class Dl extends qe{constructor(m){super(),Fe(this,m,pt,it,Ye,{id:0,options:1})}}function rt(v){let m,p,j="Inovația cheie constă în modul în care gestionează transferurile de memorie între High Bandwidth Memory (HBM) și cache-ul SRAM mai rapid. Atenția tradițională transferă în mod repetat date între HBM și SRAM, creând blocaje prin lăsarea GPU-ului inactiv. Flash Attention încarcă datele o dată în SRAM și efectuează toate calculele acolo, minimizând transferurile costisitoare de memorie.",J,u,y="Deși beneficiile sunt cele mai semnificative în timpul antrenamentului, utilizarea redusă de VRAM și eficiența îmbunătățită a Flash Attention o fac valoroasă și pentru inferență, permițând servirea LLM mai rapidă și mai scalabilă.";return{c(){m=z(`Flash Attention este o tehnică care optimizează mecanismul de atenție în modelele transformer prin abordarea blocajelor de lățime de bandă a memoriei. Așa cum s-a discutat mai devreme în [Capitolul 1.8](/course/chapter1/8), mecanismul de atenție are complexitate și utilizare de memorie pătratică, făcându-l ineficient pentru secvențe lungi. | |
| `),p=w("p"),p.textContent=j,J=c(),u=w("p"),u.textContent=y},l(M){m=N(M,`Flash Attention este o tehnică care optimizează mecanismul de atenție în modelele transformer prin abordarea blocajelor de lățime de bandă a memoriei. Așa cum s-a discutat mai devreme în [Capitolul 1.8](/course/chapter1/8), mecanismul de atenție are complexitate și utilizare de memorie pătratică, făcându-l ineficient pentru secvențe lungi. | |
| `),p=I(M,"P",{"data-svelte-h":!0}),G(p)!=="svelte-bck4r0"&&(p.textContent=j),J=o(M),u=I(M,"P",{"data-svelte-h":!0}),G(u)!=="svelte-89j1zd"&&(u.textContent=y)},m(M,U){t(M,m,U),t(M,p,U),t(M,J,U),t(M,u,U)},p:Zl,d(M){M&&(e(m),e(p),e(J),e(u))}}}function ct(v){let m,p,j="Inovația cheie a vLLM constă în modul în care gestionează acest cache:",J,u,y="<li><strong>Paginarea Memoriei</strong>: În loc să trateze cache-ul KV ca un bloc mare, este împărțit în “pagini” de dimensiune fixă (similar cu memoria virtuală în sistemele de operare).</li> <li><strong>Stocare Non-Contigua</strong>: Paginile nu trebuie să fie stocate contiguu în memoria GPU, permițând o alocare mai flexibilă a memoriei.</li> <li><strong>Gestionarea Tabelului de Pagini</strong>: Un tabel de pagini urmărește care pagini aparțin cărei secvențe, permițând căutare și acces eficient.</li> <li><strong>Partajarea Memoriei</strong>: Pentru operații precum eșantionarea paralelă, paginile care stochează cache-ul KV pentru prompt pot fi partajate între multiple secvențe.</li>",M,U,r='Abordarea PagedAttention poate duce la un throughput de până la 24 de ori mai mare comparativ cu metodele tradiționale, făcând-o o schimbare de paradigmă pentru implementările LLM de producție. Dacă vrei să înțelegi cu adevărat în profunzime cum funcționează PagedAttention, poți citi <a href="https://docs.vllm.ai/en/latest/design/kernel/paged_attention.html" rel="nofollow">ghidul din documentația vLLM</a>.';return{c(){m=z(`PagedAttention este o tehnică care abordează un alt blocaj critic în inferența LLM: gestionarea memoriei cache KV. Așa cum s-a discutat în [Capitolul 1.8](/course/chapter1/8), în timpul generării de text, modelul stochează cheile și valorile de atenție (cache KV) pentru fiecare token generat pentru a reduce calculele redundante. Cache-ul KV poate deveni enorm, în special cu secvențe lungi sau multiple cereri concurente. | |
| `),p=w("p"),p.textContent=j,J=c(),u=w("ol"),u.innerHTML=y,M=c(),U=w("p"),U.innerHTML=r},l(i){m=N(i,`PagedAttention este o tehnică care abordează un alt blocaj critic în inferența LLM: gestionarea memoriei cache KV. Așa cum s-a discutat în [Capitolul 1.8](/course/chapter1/8), în timpul generării de text, modelul stochează cheile și valorile de atenție (cache KV) pentru fiecare token generat pentru a reduce calculele redundante. Cache-ul KV poate deveni enorm, în special cu secvențe lungi sau multiple cereri concurente. | |
| `),p=I(i,"P",{"data-svelte-h":!0}),G(p)!=="svelte-1q7w70u"&&(p.textContent=j),J=o(i),u=I(i,"OL",{"data-svelte-h":!0}),G(u)!=="svelte-o4cwji"&&(u.innerHTML=y),M=o(i),U=I(i,"P",{"data-svelte-h":!0}),G(U)!=="svelte-1hypydk"&&(U.innerHTML=r)},m(i,Q){t(i,m,Q),t(i,p,Q),t(i,J,Q),t(i,u,Q),t(i,M,Q),t(i,U,Q)},p:Zl,d(i){i&&(e(m),e(p),e(J),e(u),e(M),e(U))}}}function ot(v){let m,p,j="Caracteristicile cheie de cuantificare în llama.cpp includ:",J,u,y="<li><strong>Multiple Niveluri de Cuantificare</strong>: Suportă cuantificare pe 8-bit, 4-bit, 3-bit și chiar 2-bit</li> <li><strong>Format GGML/GGUF</strong>: Folosește formate tensoriale personalizate optimizate pentru inferența cuantificată</li> <li><strong>Precizie Mixtă</strong>: Poate aplica diferite niveluri de cuantificare la diferite părți ale modelului</li> <li><strong>Optimizări Specifice Hardware</strong>: Include căi de cod optimizate pentru diverse arhitecturi CPU (AVX2, AVX-512, NEON)</li>",M,U,r="Această abordare permite rularea modelelor cu miliarde de parametri pe hardware de consum cu memorie limitată, făcând-o perfectă pentru implementări locale și dispozitive de margine.";return{c(){m=z(`Cuantificarea în llama.cpp reduce precizia ponderilor modelului de la puncte mobile pe 32-bit sau 16-bit la formate de precizie mai mică precum întregi pe 8-bit (INT8), 4-bit sau chiar mai mic. Aceasta reduce semnificativ utilizarea memoriei și îmbunătățește viteza de inferență cu pierderi minime de calitate. | |
| `),p=w("p"),p.textContent=j,J=c(),u=w("ol"),u.innerHTML=y,M=c(),U=w("p"),U.textContent=r},l(i){m=N(i,`Cuantificarea în llama.cpp reduce precizia ponderilor modelului de la puncte mobile pe 32-bit sau 16-bit la formate de precizie mai mică precum întregi pe 8-bit (INT8), 4-bit sau chiar mai mic. Aceasta reduce semnificativ utilizarea memoriei și îmbunătățește viteza de inferență cu pierderi minime de calitate. | |
| `),p=I(i,"P",{"data-svelte-h":!0}),G(p)!=="svelte-1wsf8af"&&(p.textContent=j),J=o(i),u=I(i,"OL",{"data-svelte-h":!0}),G(u)!=="svelte-6dlvtp"&&(u.innerHTML=y),M=o(i),U=I(i,"P",{"data-svelte-h":!0}),G(U)!=="svelte-1fu05c7"&&(U.textContent=r)},m(i,Q){t(i,m,Q),t(i,p,Q),t(i,J,Q),t(i,u,Q),t(i,M,Q),t(i,U,Q)},p:Zl,d(i){i&&(e(m),e(p),e(J),e(u),e(M),e(U))}}}function mt(v){let m,p,j="TGI este ușor de instalat și utilizat, cu integrare profundă în ecosistemul Hugging Face.",J,u,y="Mai întâi, lansează serverul TGI folosind Docker:",M,U,r,i,Q="Apoi interacționează cu acesta folosind InferenceClient de la Hugging Face:",A,$,k,W,ol="Alternativ, poți folosi clientul OpenAI:",E,X,H,T,S="llama.cpp este ușor de instalat și utilizat, necesitând dependențe minime și suportând atât inferența CPU cât și GPU.",ll,L,dl="Mai întâi, instalează și construiește llama.cpp:",el,R,tl,D,hl="Apoi, lansează serverul (cu compatibilitate API OpenAI):",sl,x,nl,P,bl="Interacționează cu serverul folosind InferenceClient de la Hugging Face:",al,Y,Ml,O,Tl="Alternativ, poți folosi clientul OpenAI:",_,ml,il,K,Cl="vLLM este ușor de instalat și utilizat, cu atât compatibilitatea API OpenAI cât și o interfață Python nativă.",pl,q,Jl="Mai întâi, lansează serverul compatibil OpenAI vLLM:",jl,F,ul,rl,Ul="Apoi interacționează cu acesta folosind InferenceClient de la Hugging Face:",n,d,Sl,wl,fl="Alternativ, poți folosi clientul OpenAI:",Bl,cl,gl,Il;return U=new V({props:{code:"ZG9ja2VyJTIwcnVuJTIwLS1ncHVzJTIwYWxsJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1zaG0tc2l6ZSUyMDFnJTIwJTVDJTBBJTIwJTIwJTIwJTIwLXAlMjA4MDgwJTNBODAlMjAlNUMlMEElMjAlMjAlMjAlMjAtdiUyMH4lMkYuY2FjaGUlMkZodWdnaW5nZmFjZSUzQSUyRmRhdGElMjAlNUMlMEElMjAlMjAlMjAlMjBnaGNyLmlvJTJGaHVnZ2luZ2ZhY2UlMkZ0ZXh0LWdlbmVyYXRpb24taW5mZXJlbmNlJTNBbGF0ZXN0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tb2RlbC1pZCUyMEh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0yLTM2ME0tSW5zdHJ1Y3Q=",highlighted:`docker run --gpus all \\ | |
| --shm-size 1g \\ | |
| -p 8080:80 \\ | |
| -v ~/.cache/huggingface:/data \\ | |
| ghcr.io/huggingface/text-generation-inference:latest \\ | |
| --model-id HuggingFaceTB/SmolLM2-360M-Instruct`,wrap:!1}}),$=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMGVuZHBvaW50LXVsJTIwVEdJJTBBY2xpZW50JTIwJTNEJTIwSW5mZXJlbmNlQ2xpZW50KCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyaHR0cCUzQSUyRiUyRmxvY2FsaG9zdCUzQTgwODAlMjIlMkMlMjAlMjAlMjMlMjBVUkwlMjBjJUM0JTgzdHJlJTIwc2VydmVydWwlMjBUR0klMEEpJTBBJTBBJTIzJTIwR2VuZXJhcmVhJTIwZGUlMjB0ZXh0JTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQudGV4dF9nZW5lcmF0aW9uKCUwQSUyMCUyMCUyMCUyMCUyMlNwdW5lLW1pJTIwbyUyMHBvdmVzdGUlMjIlMkMlMEElMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBJTIwJTIwJTIwJTIwZGV0YWlscyUzRFRydWUlMkMlMEElMjAlMjAlMjAlMjBzdG9wX3NlcXVlbmNlcyUzRCU1QiU1RCUyQyUwQSklMEFwcmludChyZXNwb25zZS5nZW5lcmF0ZWRfdGV4dCklMEElMEElMjMlMjBQZW50cnUlMjBmb3JtYXQlMjBkZSUyMGNoYXQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jaGF0X2NvbXBsZXRpb24oJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwYXNpc3RlbnQlMjB1dGlsLiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMlNwdW5lLW1pJTIwbyUyMHBvdmVzdGUlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMTAwJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjclMkMlMEElMjAlMjAlMjAlMjB0b3BfcCUzRDAuOTUlMkMlMEEpJTBBcHJpbnQocmVzcG9uc2UuY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50KQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| <span class="hljs-comment"># Inițializează clientul pointând către endpoint-ul TGI</span> | |
| client = InferenceClient( | |
| model=<span class="hljs-string">"http://localhost:8080"</span>, <span class="hljs-comment"># URL către serverul TGI</span> | |
| ) | |
| <span class="hljs-comment"># Generarea de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Spune-mi o poveste"</span>, | |
| max_new_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| stop_sequences=[], | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text) | |
| <span class="hljs-comment"># Pentru format de chat</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),X=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMGVuZHBvaW50LXVsJTIwVEdJJTBBY2xpZW50JTIwJTNEJTIwT3BlbkFJKCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEJTIyaHR0cCUzQSUyRiUyRmxvY2FsaG9zdCUzQTgwODAlMkZ2MSUyMiUyQyUyMCUyMCUyMyUyMEFzaWd1ciVDNCU4My10ZSUyMHMlQzQlODMlMjBpbmNsdXppJTIwJTJGdjElMEElMjAlMjAlMjAlMjBhcGlfa2V5JTNEJTIybm90LW5lZWRlZCUyMiUyQyUyMCUyMCUyMyUyMFRHSSUyMG51JTIwbmVjZXNpdCVDNCU4MyUyMG8lMjBjaGVpZSUyMEFQSSUyMCVDMyVBRW4lMjBtb2QlMjBpbXBsaWNpdCUwQSklMEElMEElMjMlMjBDb21wbGV0YXJlYSUyMGRlJTIwY2hhdCUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LmNoYXQuY29tcGxldGlvbnMuY3JlYXRlKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIySHVnZ2luZ0ZhY2VUQiUyRlNtb2xMTTItMzYwTS1JbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJFJUM4JTk5dGklMjB1biUyMGFzaXN0ZW50JTIwdXRpbC4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJTcHVuZS1taSUyMG8lMjBwb3Zlc3RlJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmNob2ljZXMlNUIwJTVELm1lc3NhZ2UuY29udGVudCk=",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| <span class="hljs-comment"># Inițializează clientul pointând către endpoint-ul TGI</span> | |
| client = OpenAI( | |
| base_url=<span class="hljs-string">"http://localhost:8080/v1"</span>, <span class="hljs-comment"># Asigură-te să incluzi /v1</span> | |
| api_key=<span class="hljs-string">"not-needed"</span>, <span class="hljs-comment"># TGI nu necesită o cheie API în mod implicit</span> | |
| ) | |
| <span class="hljs-comment"># Completarea de chat</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-360M-Instruct"</span>, | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),R=new V({props:{code:"JTIzJTIwQ2xvbmVheiVDNCU4MyUyMGRlcG96aXR1bCUwQWdpdCUyMGNsb25lJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGZ2dlcmdhbm92JTJGbGxhbWEuY3BwJTBBY2QlMjBsbGFtYS5jcHAlMEElMEElMjMlMjBDb25zdHJ1aWUlQzglOTl0ZSUyMHByb2llY3R1bCUwQW1ha2UlMEElMEElMjMlMjBEZXNjYXJjJUM0JTgzJTIwbW9kZWx1bCUyMFNtb2xMTTItMS43Qi1JbnN0cnVjdC1HR1VGJTBBY3VybCUyMC1MJTIwLU8lMjBodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGSHVnZ2luZ0ZhY2VUQiUyRlNtb2xMTTItMS43Qi1JbnN0cnVjdC1HR1VGJTJGcmVzb2x2ZSUyRm1haW4lMkZzbW9sbG0yLTEuN2ItaW5zdHJ1Y3QuUTRfS19NLmdndWY=",highlighted:`<span class="hljs-comment"># Clonează depozitul</span> | |
| git <span class="hljs-built_in">clone</span> https://github.com/ggerganov/llama.cpp | |
| <span class="hljs-built_in">cd</span> llama.cpp | |
| <span class="hljs-comment"># Construiește proiectul</span> | |
| make | |
| <span class="hljs-comment"># Descarcă modelul SmolLM2-1.7B-Instruct-GGUF</span> | |
| curl -L -O https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/resolve/main/smollm2-1.7b-instruct.Q4_K_M.gguf`,wrap:!1}}),x=new V({props:{code:"JTIzJTIwUG9ybmUlQzglOTl0ZSUyMHNlcnZlcnVsJTBBLiUyRnNlcnZlciUyMCU1QyUwQSUyMCUyMCUyMCUyMC1tJTIwc21vbGxtMi0xLjdiLWluc3RydWN0LlE0X0tfTS5nZ3VmJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1ob3N0JTIwMC4wLjAuMCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcG9ydCUyMDgwODAlMjAlNUMlMEElMjAlMjAlMjAlMjAtYyUyMDQwOTYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW4tZ3B1LWxheWVycyUyMDAlMjAlMjAlMjMlMjBTZXRlYXolQzQlODMlMjBsYSUyMHVuJTIwbnVtJUM0JTgzciUyMG1haSUyMG1hcmUlMjBwZW50cnUlMjBhJTIwZm9sb3NpJTIwR1BV",highlighted:`<span class="hljs-comment"># Pornește serverul</span> | |
| ./server \\ | |
| -m smollm2-1.7b-instruct.Q4_K_M.gguf \\ | |
| --host 0.0.0.0 \\ | |
| --port 8080 \\ | |
| -c 4096 \\ | |
| --n-gpu-layers 0 <span class="hljs-comment"># Setează la un număr mai mare pentru a folosi GPU</span>`,wrap:!1}}),Y=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMHNlcnZlcnVsJTIwbGxhbWEuY3BwJTBBY2xpZW50JTIwJTNEJTIwSW5mZXJlbmNlQ2xpZW50KCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyaHR0cCUzQSUyRiUyRmxvY2FsaG9zdCUzQTgwODAlMkZ2MSUyMiUyQyUyMCUyMCUyMyUyMFVSTCUyMGMlQzQlODN0cmUlMjBzZXJ2ZXJ1bCUyMGxsYW1hLmNwcCUwQSUyMCUyMCUyMCUyMHRva2VuJTNEJTIyc2stbm8ta2V5LXJlcXVpcmVkJTIyJTJDJTIwJTIwJTIzJTIwc2VydmVydWwlMjBsbGFtYS5jcHAlMjBuZWNlc2l0JUM0JTgzJTIwYWNlc3QlMjBwbGFjZWhvbGRlciUwQSklMEElMEElMjMlMjBHZW5lcmFyZWElMjBkZSUyMHRleHQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC50ZXh0X2dlbmVyYXRpb24oJTBBJTIwJTIwJTIwJTIwJTIyU3B1bmUtbWklMjBvJTIwcG92ZXN0ZSUyMiUyQyUwQSUyMCUyMCUyMCUyMG1heF9uZXdfdG9rZW5zJTNEMTAwJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjclMkMlMEElMjAlMjAlMjAlMjB0b3BfcCUzRDAuOTUlMkMlMEElMjAlMjAlMjAlMjBkZXRhaWxzJTNEVHJ1ZSUyQyUwQSklMEFwcmludChyZXNwb25zZS5nZW5lcmF0ZWRfdGV4dCklMEElMEElMjMlMjBQZW50cnUlMjBmb3JtYXQlMjBkZSUyMGNoYXQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jaGF0X2NvbXBsZXRpb24oJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwYXNpc3RlbnQlMjB1dGlsLiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMlNwdW5lLW1pJTIwbyUyMHBvdmVzdGUlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMTAwJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjclMkMlMEElMjAlMjAlMjAlMjB0b3BfcCUzRDAuOTUlMkMlMEEpJTBBcHJpbnQocmVzcG9uc2UuY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50KQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| <span class="hljs-comment"># Inițializează clientul pointând către serverul llama.cpp</span> | |
| client = InferenceClient( | |
| model=<span class="hljs-string">"http://localhost:8080/v1"</span>, <span class="hljs-comment"># URL către serverul llama.cpp</span> | |
| token=<span class="hljs-string">"sk-no-key-required"</span>, <span class="hljs-comment"># serverul llama.cpp necesită acest placeholder</span> | |
| ) | |
| <span class="hljs-comment"># Generarea de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Spune-mi o poveste"</span>, | |
| max_new_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text) | |
| <span class="hljs-comment"># Pentru format de chat</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),ml=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMHNlcnZlcnVsJTIwbGxhbWEuY3BwJTBBY2xpZW50JTIwJTNEJTIwT3BlbkFJKCUwQSUyMCUyMCUyMCUyMGJhc2VfdXJsJTNEJTIyaHR0cCUzQSUyRiUyRmxvY2FsaG9zdCUzQTgwODAlMkZ2MSUyMiUyQyUwQSUyMCUyMCUyMCUyMGFwaV9rZXklM0QlMjJzay1uby1rZXktcmVxdWlyZWQlMjIlMkMlMjAlMjAlMjMlMjBzZXJ2ZXJ1bCUyMGxsYW1hLmNwcCUyMG5lY2VzaXQlQzQlODMlMjBhY2VzdCUyMHBsYWNlaG9sZGVyJTBBKSUwQSUwQSUyMyUyMENvbXBsZXRhcmVhJTIwZGUlMjBjaGF0JTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJzbW9sbG0yLTEuN2ItaW5zdHJ1Y3QlMjIlMkMlMjAlMjAlMjMlMjBJZGVudGlmaWNhdG9ydWwlMjBtb2RlbHVsdWklMjBwb2F0ZSUyMGZpJTIwb3JpY2UlMjBkZW9hcmVjZSUyMHNlcnZlcnVsJTIwJUMzJUFFbmNhcmMlQzQlODMlMjBkb2FyJTIwdW4lMjBtb2RlbCUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJFJUM4JTk5dGklMjB1biUyMGFzaXN0ZW50JTIwdXRpbC4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJTcHVuZS1taSUyMG8lMjBwb3Zlc3RlJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC43JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmNob2ljZXMlNUIwJTVELm1lc3NhZ2UuY29udGVudCk=",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| <span class="hljs-comment"># Inițializează clientul pointând către serverul llama.cpp</span> | |
| client = OpenAI( | |
| base_url=<span class="hljs-string">"http://localhost:8080/v1"</span>, | |
| api_key=<span class="hljs-string">"sk-no-key-required"</span>, <span class="hljs-comment"># serverul llama.cpp necesită acest placeholder</span> | |
| ) | |
| <span class="hljs-comment"># Completarea de chat</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"smollm2-1.7b-instruct"</span>, <span class="hljs-comment"># Identificatorul modelului poate fi orice deoarece serverul încarcă doar un model</span> | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),F=new V({props:{code:"cHl0aG9uJTIwLW0lMjB2bGxtLmVudHJ5cG9pbnRzLm9wZW5haS5hcGlfc2VydmVyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tb2RlbCUyMEh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0yLTM2ME0tSW5zdHJ1Y3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWhvc3QlMjAwLjAuMC4wJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1wb3J0JTIwODAwMA==",highlighted:`python -m vllm.entrypoints.openai.api_server \\ | |
| --model HuggingFaceTB/SmolLM2-360M-Instruct \\ | |
| --host 0.0.0.0 \\ | |
| --port 8000`,wrap:!1}}),d=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMGVuZHBvaW50LXVsJTIwdkxMTSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudCglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDAwJTJGdjElMjIlMkMlMjAlMjAlMjMlMjBVUkwlMjBjJUM0JTgzdHJlJTIwc2VydmVydWwlMjB2TExNJTBBKSUwQSUwQSUyMyUyMEdlbmVyYXJlYSUyMGRlJTIwdGV4dCUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LnRleHRfZ2VuZXJhdGlvbiglMEElMjAlMjAlMjAlMjAlMjJTcHVuZS1taSUyMG8lMjBwb3Zlc3RlJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X25ld190b2tlbnMlM0QxMDAlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSUyMCUyMCUyMCUyMGRldGFpbHMlM0RUcnVlJTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmdlbmVyYXRlZF90ZXh0KSUwQSUwQSUyMyUyMFBlbnRydSUyMGZvcm1hdCUyMGRlJTIwY2hhdCUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LmNoYXRfY29tcGxldGlvbiglMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJzeXN0ZW0lMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyRSVDOCU5OXRpJTIwdW4lMjBhc2lzdGVudCUyMHV0aWwuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU3B1bmUtbWklMjBvJTIwcG92ZXN0ZSUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMDAlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSklMEFwcmludChyZXNwb25zZS5jaG9pY2VzJTVCMCU1RC5tZXNzYWdlLmNvbnRlbnQp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| <span class="hljs-comment"># Inițializează clientul pointând către endpoint-ul vLLM</span> | |
| client = InferenceClient( | |
| model=<span class="hljs-string">"http://localhost:8000/v1"</span>, <span class="hljs-comment"># URL către serverul vLLM</span> | |
| ) | |
| <span class="hljs-comment"># Generarea de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Spune-mi o poveste"</span>, | |
| max_new_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text) | |
| <span class="hljs-comment"># Pentru format de chat</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),cl=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwY2xpZW50dWwlMjBwb2ludCVDMyVBMm5kJTIwYyVDNCU4M3RyZSUyMGVuZHBvaW50LXVsJTIwdkxMTSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSSglMEElMjAlMjAlMjAlMjBiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDAwJTJGdjElMjIlMkMlMEElMjAlMjAlMjAlMjBhcGlfa2V5JTNEJTIybm90LW5lZWRlZCUyMiUyQyUyMCUyMCUyMyUyMHZMTE0lMjBudSUyMG5lY2VzaXQlQzQlODMlMjBvJTIwY2hlaWUlMjBBUEklMjAlQzMlQUVuJTIwbW9kJTIwaW1wbGljaXQlMEEpJTBBJTBBJTIzJTIwQ29tcGxldGFyZWElMjBkZSUyMGNoYXQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jaGF0LmNvbXBsZXRpb25zLmNyZWF0ZSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMkh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0yLTM2ME0tSW5zdHJ1Y3QlMjIlMkMlMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJzeXN0ZW0lMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyRSVDOCU5OXRpJTIwdW4lMjBhc2lzdGVudCUyMHV0aWwuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU3B1bmUtbWklMjBvJTIwcG92ZXN0ZSUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMDAlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuNyUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSklMEFwcmludChyZXNwb25zZS5jaG9pY2VzJTVCMCU1RC5tZXNzYWdlLmNvbnRlbnQp",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| <span class="hljs-comment"># Inițializează clientul pointând către endpoint-ul vLLM</span> | |
| client = OpenAI( | |
| base_url=<span class="hljs-string">"http://localhost:8000/v1"</span>, | |
| api_key=<span class="hljs-string">"not-needed"</span>, <span class="hljs-comment"># vLLM nu necesită o cheie API în mod implicit</span> | |
| ) | |
| <span class="hljs-comment"># Completarea de chat</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-360M-Instruct"</span>, | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un asistent util."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Spune-mi o poveste"</span>}, | |
| ], | |
| max_tokens=<span class="hljs-number">100</span>, | |
| temperature=<span class="hljs-number">0.7</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| `),p=w("p"),p.textContent=j,J=c(),u=w("p"),u.textContent=y,M=c(),f(U.$$.fragment),r=c(),i=w("p"),i.textContent=Q,A=c(),f($.$$.fragment),k=c(),W=w("p"),W.textContent=ol,E=c(),f(X.$$.fragment),H=z(` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),T=w("p"),T.textContent=S,ll=c(),L=w("p"),L.textContent=dl,el=c(),f(R.$$.fragment),tl=c(),D=w("p"),D.textContent=hl,sl=c(),f(x.$$.fragment),nl=c(),P=w("p"),P.textContent=bl,al=c(),f(Y.$$.fragment),Ml=c(),O=w("p"),O.textContent=Tl,_=c(),f(ml.$$.fragment),il=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),K=w("p"),K.textContent=Cl,pl=c(),q=w("p"),q.textContent=Jl,jl=c(),f(F.$$.fragment),ul=c(),rl=w("p"),rl.textContent=Ul,n=c(),f(d.$$.fragment),Sl=c(),wl=w("p"),wl.textContent=fl,Bl=c(),f(cl.$$.fragment),gl=z(` | |
| </hfoption>`)},l(s){m=N(s,`<hfoption value="tgi" label="TGI"> | |
| `),p=I(s,"P",{"data-svelte-h":!0}),G(p)!=="svelte-d3ibl6"&&(p.textContent=j),J=o(s),u=I(s,"P",{"data-svelte-h":!0}),G(u)!=="svelte-5he4zd"&&(u.textContent=y),M=o(s),B(U.$$.fragment,s),r=o(s),i=I(s,"P",{"data-svelte-h":!0}),G(i)!=="svelte-njbfao"&&(i.textContent=Q),A=o(s),B($.$$.fragment,s),k=o(s),W=I(s,"P",{"data-svelte-h":!0}),G(W)!=="svelte-dmbprn"&&(W.textContent=ol),E=o(s),B(X.$$.fragment,s),H=N(s,` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),T=I(s,"P",{"data-svelte-h":!0}),G(T)!=="svelte-910ccc"&&(T.textContent=S),ll=o(s),L=I(s,"P",{"data-svelte-h":!0}),G(L)!=="svelte-yvknv6"&&(L.textContent=dl),el=o(s),B(R.$$.fragment,s),tl=o(s),D=I(s,"P",{"data-svelte-h":!0}),G(D)!=="svelte-1t3nfo6"&&(D.textContent=hl),sl=o(s),B(x.$$.fragment,s),nl=o(s),P=I(s,"P",{"data-svelte-h":!0}),G(P)!=="svelte-15q65ck"&&(P.textContent=bl),al=o(s),B(Y.$$.fragment,s),Ml=o(s),O=I(s,"P",{"data-svelte-h":!0}),G(O)!=="svelte-dmbprn"&&(O.textContent=Tl),_=o(s),B(ml.$$.fragment,s),il=N(s,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),K=I(s,"P",{"data-svelte-h":!0}),G(K)!=="svelte-1eczzi3"&&(K.textContent=Cl),pl=o(s),q=I(s,"P",{"data-svelte-h":!0}),G(q)!=="svelte-sv1ope"&&(q.textContent=Jl),jl=o(s),B(F.$$.fragment,s),ul=o(s),rl=I(s,"P",{"data-svelte-h":!0}),G(rl)!=="svelte-njbfao"&&(rl.textContent=Ul),n=o(s),B(d.$$.fragment,s),Sl=o(s),wl=I(s,"P",{"data-svelte-h":!0}),G(wl)!=="svelte-dmbprn"&&(wl.textContent=fl),Bl=o(s),B(cl.$$.fragment,s),gl=N(s,` | |
| </hfoption>`)},m(s,h){t(s,m,h),t(s,p,h),t(s,J,h),t(s,u,h),t(s,M,h),g(U,s,h),t(s,r,h),t(s,i,h),t(s,A,h),g($,s,h),t(s,k,h),t(s,W,h),t(s,E,h),g(X,s,h),t(s,H,h),t(s,T,h),t(s,ll,h),t(s,L,h),t(s,el,h),g(R,s,h),t(s,tl,h),t(s,D,h),t(s,sl,h),g(x,s,h),t(s,nl,h),t(s,P,h),t(s,al,h),g(Y,s,h),t(s,Ml,h),t(s,O,h),t(s,_,h),g(ml,s,h),t(s,il,h),t(s,K,h),t(s,pl,h),t(s,q,h),t(s,jl,h),g(F,s,h),t(s,ul,h),t(s,rl,h),t(s,n,h),g(d,s,h),t(s,Sl,h),t(s,wl,h),t(s,Bl,h),g(cl,s,h),t(s,gl,h),Il=!0},p:Zl,i(s){Il||(b(U.$$.fragment,s),b($.$$.fragment,s),b(X.$$.fragment,s),b(R.$$.fragment,s),b(x.$$.fragment,s),b(Y.$$.fragment,s),b(ml.$$.fragment,s),b(F.$$.fragment,s),b(d.$$.fragment,s),b(cl.$$.fragment,s),Il=!0)},o(s){C(U.$$.fragment,s),C($.$$.fragment,s),C(X.$$.fragment,s),C(R.$$.fragment,s),C(x.$$.fragment,s),C(Y.$$.fragment,s),C(ml.$$.fragment,s),C(F.$$.fragment,s),C(d.$$.fragment,s),C(cl.$$.fragment,s),Il=!1},d(s){s&&(e(m),e(p),e(J),e(u),e(M),e(r),e(i),e(A),e(k),e(W),e(E),e(H),e(T),e(ll),e(L),e(el),e(tl),e(D),e(sl),e(nl),e(P),e(al),e(Ml),e(O),e(_),e(il),e(K),e(pl),e(q),e(jl),e(ul),e(rl),e(n),e(Sl),e(wl),e(Bl),e(gl)),Z(U,s),Z($,s),Z(X,s),Z(R,s),Z(x,s),Z(Y,s),Z(ml,s),Z(F,s),Z(d,s),Z(cl,s)}}}function ut(v){let m,p,j="Mai întâi, implementează TGI cu parametri avansați:",J,u,y,M,U="Folosește InferenceClient pentru generarea flexibilă de text:",r,i,Q,A,$="Sau folosește clientul OpenAI:",k,W,ol,E,X="Pentru llama.cpp, poți seta parametri avansați când lansezi serverul:",H,T,S,ll,L="Folosește InferenceClient:",dl,el,R,tl,D="Sau folosește clientul OpenAI pentru generare cu control asupra parametrilor de eșantionare:",hl,sl,x,nl,P="Poți folosi, de asemenea, biblioteca nativă llama.cpp pentru un control și mai mare:",bl,al,Y,Ml,O="Pentru utilizarea avansată cu vLLM, poți folosi InferenceClient:",Tl,_,ml,il,K="Poți folosi, de asemenea, clientul OpenAI:",Cl,pl,q,Jl,jl="vLLM oferă, de asemenea, o interfață Python nativă cu control fin:",F,ul,rl,Ul;return u=new V({props:{code:"ZG9ja2VyJTIwcnVuJTIwLS1ncHVzJTIwYWxsJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1zaG0tc2l6ZSUyMDFnJTIwJTVDJTBBJTIwJTIwJTIwJTIwLXAlMjA4MDgwJTNBODAlMjAlNUMlMEElMjAlMjAlMjAlMjAtdiUyMH4lMkYuY2FjaGUlMkZodWdnaW5nZmFjZSUzQSUyRmRhdGElMjAlNUMlMEElMjAlMjAlMjAlMjBnaGNyLmlvJTJGaHVnZ2luZ2ZhY2UlMkZ0ZXh0LWdlbmVyYXRpb24taW5mZXJlbmNlJTNBbGF0ZXN0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tb2RlbC1pZCUyMEh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0yLTM2ME0tSW5zdHJ1Y3QlMjAlNUMlMEElMjAlMjAlMjAlMjAtLW1heC10b3RhbC10b2tlbnMlMjA0MDk2JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tYXgtaW5wdXQtbGVuZ3RoJTIwMzA3MiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbWF4LWJhdGNoLXRvdGFsLXRva2VucyUyMDgxOTIlMjAlNUMlMEElMjAlMjAlMjAlMjAtLXdhaXRpbmctc2VydmVkLXJhdGlvJTIwMS4y",highlighted:`docker run --gpus all \\ | |
| --shm-size 1g \\ | |
| -p 8080:80 \\ | |
| -v ~/.cache/huggingface:/data \\ | |
| ghcr.io/huggingface/text-generation-inference:latest \\ | |
| --model-id HuggingFaceTB/SmolLM2-360M-Instruct \\ | |
| --max-total-tokens 4096 \\ | |
| --max-input-length 3072 \\ | |
| --max-batch-total-tokens 8192 \\ | |
| --waiting-served-ratio 1.2`,wrap:!1}}),i=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudChtb2RlbCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTIyKSUwQSUwQSUyMyUyMEV4ZW1wbHUlMjBkZSUyMHBhcmFtZXRyaSUyMGF2YW5zYSVDOCU5QmklMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jaGF0X2NvbXBsZXRpb24oJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwcG92ZXN0aXRvciUyMGNyZWF0aXYuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QyMDAlMkMlMEElMjAlMjAlMjAlMjB0b3BfcCUzRDAuOTUlMkMlMEEpJTBBcHJpbnQocmVzcG9uc2UuY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50KSUwQSUwQSUyMyUyMEdlbmVyYXJlYSUyMGJydXQlQzQlODMlMjBkZSUyMHRleHQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC50ZXh0X2dlbmVyYXRpb24oJTBBJTIwJTIwJTIwJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjBkZXNwcmUlMjBleHBsb3JhcmVhJTIwc3BhJUM4JTlCaXVsdWklMjIlMkMlMEElMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDIwMCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC44JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBJTIwJTIwJTIwJTIwcmVwZXRpdGlvbl9wZW5hbHR5JTNEMS4xJTJDJTBBJTIwJTIwJTIwJTIwZG9fc2FtcGxlJTNEVHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMGRldGFpbHMlM0RUcnVlJTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmdlbmVyYXRlZF90ZXh0KQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient(model=<span class="hljs-string">"http://localhost:8080"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, | |
| max_tokens=<span class="hljs-number">200</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content) | |
| <span class="hljs-comment"># Generarea brută de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Scrie o poveste creativă despre explorarea spațiului"</span>, | |
| max_new_tokens=<span class="hljs-number">200</span>, | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| repetition_penalty=<span class="hljs-number">1.1</span>, | |
| do_sample=<span class="hljs-literal">True</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text)`,wrap:!1}}),W=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSShiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGdjElMjIlMkMlMjBhcGlfa2V5JTNEJTIybm90LW5lZWRlZCUyMiklMEElMEElMjMlMjBFeGVtcGx1JTIwZGUlMjBwYXJhbWV0cmklMjBhdmFuc2ElQzglOUJpJTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNMi0zNjBNLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwcG92ZXN0aXRvciUyMGNyZWF0aXYuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUyMCUyMCUyMyUyME1haSUyMG1hcmUlMjBwZW50cnUlMjBtYWklMjBtdWx0JUM0JTgzJTIwY3JlYXRpdml0YXRlJTBBKSUwQXByaW50KHJlc3BvbnNlLmNob2ljZXMlNUIwJTVELm1lc3NhZ2UuY29udGVudCk=",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| client = OpenAI(base_url=<span class="hljs-string">"http://localhost:8080/v1"</span>, api_key=<span class="hljs-string">"not-needed"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-360M-Instruct"</span>, | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),T=new V({props:{code:"LiUyRnNlcnZlciUyMCU1QyUwQSUyMCUyMCUyMCUyMC1tJTIwc21vbGxtMi0xLjdiLWluc3RydWN0LlE0X0tfTS5nZ3VmJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1ob3N0JTIwMC4wLjAuMCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tcG9ydCUyMDgwODAlMjAlNUMlMEElMjAlMjAlMjAlMjAtYyUyMDQwOTYlMjAlNUMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBEaW1lbnNpdW5lYSUyMGNvbnRleHR1bHVpJTBBJTIwJTIwJTIwJTIwLS10aHJlYWRzJTIwOCUyMCU1QyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFRocmVhZC11cmklMjBDUFUlMjBkZSUyMGZvbG9zaXQlMEElMjAlMjAlMjAlMjAtLWJhdGNoLXNpemUlMjA1MTIlMjAlNUMlMjAlMjAlMjAlMjMlMjBEaW1lbnNpdW5lYSUyMGxvdHVsdWklMjBwZW50cnUlMjBldmFsdWFyZWElMjBwcm9tcHQtdWx1aSUwQSUyMCUyMCUyMCUyMC0tbi1ncHUtbGF5ZXJzJTIwMCUyMCUyMCUyMCUyMCUyMCUyMyUyMFN0cmF0dXJpJTIwR1BVJTIwKDAlMjAlM0QlMjBkb2FyJTIwQ1BVKQ==",highlighted:`./server \\ | |
| -m smollm2-1.7b-instruct.Q4_K_M.gguf \\ | |
| --host 0.0.0.0 \\ | |
| --port 8080 \\ | |
| -c 4096 \\ <span class="hljs-comment"># Dimensiunea contextului</span> | |
| --threads 8 \\ <span class="hljs-comment"># Thread-uri CPU de folosit</span> | |
| --batch-size 512 \\ <span class="hljs-comment"># Dimensiunea lotului pentru evaluarea prompt-ului</span> | |
| --n-gpu-layers 0 <span class="hljs-comment"># Straturi GPU (0 = doar CPU)</span>`,wrap:!1}}),el=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudChtb2RlbCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGdjElMjIlMkMlMjB0b2tlbiUzRCUyMnNrLW5vLWtleS1yZXF1aXJlZCUyMiklMEElMEElMjMlMjBFeGVtcGx1JTIwZGUlMjBwYXJhbWV0cmklMjBhdmFuc2ElQzglOUJpJTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQuY2hhdF9jb21wbGV0aW9uKCUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJFJUM4JTk5dGklMjB1biUyMHBvdmVzdGl0b3IlMjBjcmVhdGl2LiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMlNjcmllJTIwbyUyMHBvdmVzdGUlMjBjcmVhdGl2JUM0JTgzJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjglMkMlMEElMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMjAwJTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmNob2ljZXMlNUIwJTVELm1lc3NhZ2UuY29udGVudCklMEElMEElMjMlMjBQZW50cnUlMjBnZW5lcmFyZWElMjBkaXJlY3QlQzQlODMlMjBkZSUyMHRleHQlMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC50ZXh0X2dlbmVyYXRpb24oJTBBJTIwJTIwJTIwJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjBkZXNwcmUlMjBleHBsb3JhcmVhJTIwc3BhJUM4JTlCaXVsdWklMjIlMkMlMEElMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDIwMCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC44JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBJTIwJTIwJTIwJTIwcmVwZXRpdGlvbl9wZW5hbHR5JTNEMS4xJTJDJTBBJTIwJTIwJTIwJTIwZGV0YWlscyUzRFRydWUlMkMlMEEpJTBBcHJpbnQocmVzcG9uc2UuZ2VuZXJhdGVkX3RleHQp",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient(model=<span class="hljs-string">"http://localhost:8080/v1"</span>, token=<span class="hljs-string">"sk-no-key-required"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, | |
| max_tokens=<span class="hljs-number">200</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content) | |
| <span class="hljs-comment"># Pentru generarea directă de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Scrie o poveste creativă despre explorarea spațiului"</span>, | |
| max_new_tokens=<span class="hljs-number">200</span>, | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| repetition_penalty=<span class="hljs-number">1.1</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text)`,wrap:!1}}),sl=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSShiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDgwJTJGdjElMjIlMkMlMjBhcGlfa2V5JTNEJTIyc2stbm8ta2V5LXJlcXVpcmVkJTIyKSUwQSUwQSUyMyUyMEV4ZW1wbHUlMjBkZSUyMHBhcmFtZXRyaSUyMGF2YW5zYSVDOCU5QmklMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jaGF0LmNvbXBsZXRpb25zLmNyZWF0ZSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMnNtb2xsbTItMS43Yi1pbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMG1lc3NhZ2VzJTNEJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJFJUM4JTk5dGklMjB1biUyMHBvdmVzdGl0b3IlMjBjcmVhdGl2LiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMlNjcmllJTIwbyUyMHBvdmVzdGUlMjBjcmVhdGl2JUM0JTgzJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjglMkMlMjAlMjAlMjMlMjBNYWklMjBtYXJlJTIwcGVudHJ1JTIwbWFpJTIwbXVsdCVDNCU4MyUyMGNyZWF0aXZpdGF0ZSUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUyMCUyMCUyMyUyMFByb2JhYmlsaXRhdGVhJTIwZSVDOCU5OWFudGlvbiVDNCU4M3JpaSUyMG51Y2xldXMlMEElMjAlMjAlMjAlMjBmcmVxdWVuY3lfcGVuYWx0eSUzRDAuNSUyQyUyMCUyMCUyMyUyMFJlZHVjZSUyMHJlcGV0YXJlYSUyMHRva2VuLXVyaWxvciUyMGZyZWN2ZW50ZSUwQSUyMCUyMCUyMCUyMHByZXNlbmNlX3BlbmFsdHklM0QwLjUlMkMlMjAlMjAlMjMlMjBSZWR1Y2UlMjByZXBldGFyZWElMjBwcmluJTIwcGVuYWxpemFyZWElMjB0b2tlbi11cmlsb3IlMjBkZWphJTIwcHJlemVudGUlMEElMjAlMjAlMjAlMjBtYXhfdG9rZW5zJTNEMjAwJTJDJTIwJTIwJTIzJTIwTHVuZ2ltZWElMjBtYXhpbSVDNCU4MyUyMGRlJTIwZ2VuZXJhcmUlMEEpJTBBcHJpbnQocmVzcG9uc2UuY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50KQ==",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| client = OpenAI(base_url=<span class="hljs-string">"http://localhost:8080/v1"</span>, api_key=<span class="hljs-string">"sk-no-key-required"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"smollm2-1.7b-instruct"</span>, | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| top_p=<span class="hljs-number">0.95</span>, <span class="hljs-comment"># Probabilitatea eșantionării nucleus</span> | |
| frequency_penalty=<span class="hljs-number">0.5</span>, <span class="hljs-comment"># Reduce repetarea token-urilor frecvente</span> | |
| presence_penalty=<span class="hljs-number">0.5</span>, <span class="hljs-comment"># Reduce repetarea prin penalizarea token-urilor deja prezente</span> | |
| max_tokens=<span class="hljs-number">200</span>, <span class="hljs-comment"># Lungimea maximă de generare</span> | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),al=new V({props:{code:"JTIzJTIwRm9sb3NpbmQlMjBwYWNoZXR1bCUyMGxsYW1hLWNwcC1weXRob24lMjBwZW50cnUlMjBhY2Nlc3VsJTIwZGlyZWN0JTIwbGElMjBtb2RlbCUwQWZyb20lMjBsbGFtYV9jcHAlMjBpbXBvcnQlMjBMbGFtYSUwQSUwQSUyMyUyMCVDMyU4RW5jYXJjJUM0JTgzJTIwbW9kZWx1bCUwQWxsbSUyMCUzRCUyMExsYW1hKCUwQSUyMCUyMCUyMCUyMG1vZGVsX3BhdGglM0QlMjJzbW9sbG0yLTEuN2ItaW5zdHJ1Y3QuUTRfS19NLmdndWYlMjIlMkMlMEElMjAlMjAlMjAlMjBuX2N0eCUzRDQwOTYlMkMlMjAlMjAlMjMlMjBEaW1lbnNpdW5lYSUyMGZlcmVzdHJlaSUyMGRlJTIwY29udGV4dCUwQSUyMCUyMCUyMCUyMG5fdGhyZWFkcyUzRDglMkMlMjAlMjAlMjMlMjBUaHJlYWQtdXJpJTIwQ1BVJTBBJTIwJTIwJTIwJTIwbl9ncHVfbGF5ZXJzJTNEMCUyQyUyMCUyMCUyMyUyMFN0cmF0dXJpJTIwR1BVJTIwKDAlMjAlM0QlMjBkb2FyJTIwQ1BVKSUwQSklMEElMEElMjMlMjBGb3JtYXRlYXolQzQlODMlMjBwcm9tcHQtdWwlMjBjb25mb3JtJTIwZm9ybWF0dWx1aSUyMGElQzglOTl0ZXB0YXQlMjBhbCUyMG1vZGVsdWx1aSUwQXByb21wdCUyMCUzRCUyMCUyMiUyMiUyMiUzQyU3Q2ltX3N0YXJ0JTdDJTNFc3lzdGVtJTBBRSVDOCU5OXRpJTIwdW4lMjBwb3Zlc3RpdG9yJTIwY3JlYXRpdi4lMEElM0MlN0NpbV9lbmQlN0MlM0UlMEElM0MlN0NpbV9zdGFydCU3QyUzRXVzZXIlMEFTY3JpZSUyMG8lMjBwb3Zlc3RlJTIwY3JlYXRpdiVDNCU4MyUwQSUzQyU3Q2ltX2VuZCU3QyUzRSUwQSUzQyU3Q2ltX3N0YXJ0JTdDJTNFYXNzaXN0YW50JTBBJTIyJTIyJTIyJTBBJTBBJTIzJTIwR2VuZXJlYXolQzQlODMlMjByJUM0JTgzc3B1bnN1bCUyMGN1JTIwY29udHJvbCUyMHByZWNpcyUyMGFsJTIwcGFyYW1ldHJpbG9yJTBBb3V0cHV0JTIwJTNEJTIwbGxtKCUwQSUyMCUyMCUyMCUyMHByb21wdCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QyMDAlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNEMC41JTJDJTBBJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzRDAuNSUyQyUwQSUyMCUyMCUyMCUyMHN0b3AlM0QlNUIlMjIlM0MlN0NpbV9lbmQlN0MlM0UlMjIlNUQlMkMlMEEpJTBBJTBBcHJpbnQob3V0cHV0JTVCJTIyY2hvaWNlcyUyMiU1RCU1QjAlNUQlNUIlMjJ0ZXh0JTIyJTVEKQ==",highlighted:`<span class="hljs-comment"># Folosind pachetul llama-cpp-python pentru accesul direct la model</span> | |
| <span class="hljs-keyword">from</span> llama_cpp <span class="hljs-keyword">import</span> Llama | |
| <span class="hljs-comment"># Încarcă modelul</span> | |
| llm = Llama( | |
| model_path=<span class="hljs-string">"smollm2-1.7b-instruct.Q4_K_M.gguf"</span>, | |
| n_ctx=<span class="hljs-number">4096</span>, <span class="hljs-comment"># Dimensiunea ferestrei de context</span> | |
| n_threads=<span class="hljs-number">8</span>, <span class="hljs-comment"># Thread-uri CPU</span> | |
| n_gpu_layers=<span class="hljs-number">0</span>, <span class="hljs-comment"># Straturi GPU (0 = doar CPU)</span> | |
| ) | |
| <span class="hljs-comment"># Formatează prompt-ul conform formatului așteptat al modelului</span> | |
| prompt = <span class="hljs-string">"""<|im_start|>system | |
| Ești un povestitor creativ. | |
| <|im_end|> | |
| <|im_start|>user | |
| Scrie o poveste creativă | |
| <|im_end|> | |
| <|im_start|>assistant | |
| """</span> | |
| <span class="hljs-comment"># Generează răspunsul cu control precis al parametrilor</span> | |
| output = llm( | |
| prompt, | |
| max_tokens=<span class="hljs-number">200</span>, | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| frequency_penalty=<span class="hljs-number">0.5</span>, | |
| presence_penalty=<span class="hljs-number">0.5</span>, | |
| stop=[<span class="hljs-string">"<|im_end|>"</span>], | |
| ) | |
| <span class="hljs-built_in">print</span>(output[<span class="hljs-string">"choices"</span>][<span class="hljs-number">0</span>][<span class="hljs-string">"text"</span>])`,wrap:!1}}),_=new V({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMEluZmVyZW5jZUNsaWVudCUwQSUwQWNsaWVudCUyMCUzRCUyMEluZmVyZW5jZUNsaWVudChtb2RlbCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDAwJTJGdjElMjIpJTBBJTBBJTIzJTIwRXhlbXBsdSUyMGRlJTIwcGFyYW1ldHJpJTIwYXZhbnNhJUM4JTlCaSUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LmNoYXRfY29tcGxldGlvbiglMEElMjAlMjAlMjAlMjBtZXNzYWdlcyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJzeXN0ZW0lMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyRSVDOCU5OXRpJTIwdW4lMjBwb3Zlc3RpdG9yJTIwY3JlYXRpdi4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJTY3JpZSUyMG8lMjBwb3Zlc3RlJTIwY3JlYXRpdiVDNCU4MyUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCU1RCUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC44JTJDJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDIwMCUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSklMEFwcmludChyZXNwb25zZS5jaG9pY2VzJTVCMCU1RC5tZXNzYWdlLmNvbnRlbnQpJTBBJTBBJTIzJTIwUGVudHJ1JTIwZ2VuZXJhcmVhJTIwZGlyZWN0JUM0JTgzJTIwZGUlMjB0ZXh0JTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQudGV4dF9nZW5lcmF0aW9uKCUwQSUyMCUyMCUyMCUyMCUyMlNjcmllJTIwbyUyMHBvdmVzdGUlMjBjcmVhdGl2JUM0JTgzJTIwZGVzcHJlJTIwZXhwbG9yYXJlYSUyMHNwYSVDOCU5Qml1bHVpJTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X25ld190b2tlbnMlM0QyMDAlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSUyMCUyMCUyMCUyMGRldGFpbHMlM0RUcnVlJTJDJTBBKSUwQXByaW50KHJlc3BvbnNlLmdlbmVyYXRlZF90ZXh0KQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> InferenceClient | |
| client = InferenceClient(model=<span class="hljs-string">"http://localhost:8000/v1"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat_completion( | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, | |
| max_tokens=<span class="hljs-number">200</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content) | |
| <span class="hljs-comment"># Pentru generarea directă de text</span> | |
| response = client.text_generation( | |
| <span class="hljs-string">"Scrie o poveste creativă despre explorarea spațiului"</span>, | |
| max_new_tokens=<span class="hljs-number">200</span>, | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| details=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.generated_text)`,wrap:!1}}),pl=new V({props:{code:"ZnJvbSUyMG9wZW5haSUyMGltcG9ydCUyME9wZW5BSSUwQSUwQWNsaWVudCUyMCUzRCUyME9wZW5BSShiYXNlX3VybCUzRCUyMmh0dHAlM0ElMkYlMkZsb2NhbGhvc3QlM0E4MDAwJTJGdjElMjIlMkMlMjBhcGlfa2V5JTNEJTIybm90LW5lZWRlZCUyMiklMEElMEElMjMlMjBFeGVtcGx1JTIwZGUlMjBwYXJhbWV0cmklMjBhdmFuc2ElQzglOUJpJTBBcmVzcG9uc2UlMjAlM0QlMjBjbGllbnQuY2hhdC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJIdWdnaW5nRmFjZVRCJTJGU21vbExNMi0zNjBNLUluc3RydWN0JTIyJTJDJTBBJTIwJTIwJTIwJTIwbWVzc2FnZXMlM0QlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwcG92ZXN0aXRvciUyMGNyZWF0aXYuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QyMDAlMkMlMEEpJTBBcHJpbnQocmVzcG9uc2UuY2hvaWNlcyU1QjAlNUQubWVzc2FnZS5jb250ZW50KQ==",highlighted:`<span class="hljs-keyword">from</span> openai <span class="hljs-keyword">import</span> OpenAI | |
| client = OpenAI(base_url=<span class="hljs-string">"http://localhost:8000/v1"</span>, api_key=<span class="hljs-string">"not-needed"</span>) | |
| <span class="hljs-comment"># Exemplu de parametri avansați</span> | |
| response = client.chat.completions.create( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-360M-Instruct"</span>, | |
| messages=[ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ], | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| max_tokens=<span class="hljs-number">200</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(response.choices[<span class="hljs-number">0</span>].message.content)`,wrap:!1}}),ul=new V({props:{code:"ZnJvbSUyMHZsbG0lMjBpbXBvcnQlMjBMTE0lMkMlMjBTYW1wbGluZ1BhcmFtcyUwQSUwQSUyMyUyMEluaSVDOCU5QmlhbGl6ZWF6JUM0JTgzJTIwbW9kZWx1bCUyMGN1JTIwcGFyYW1ldHJpJTIwYXZhbnNhJUM4JTlCaSUwQWxsbSUyMCUzRCUyMExMTSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMkh1Z2dpbmdGYWNlVEIlMkZTbW9sTE0yLTM2ME0tSW5zdHJ1Y3QlMjIlMkMlMEElMjAlMjAlMjAlMjBncHVfbWVtb3J5X3V0aWxpemF0aW9uJTNEMC44NSUyQyUwQSUyMCUyMCUyMCUyMG1heF9udW1fYmF0Y2hlZF90b2tlbnMlM0Q4MTkyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X251bV9zZXFzJTNEMjU2JTJDJTBBJTIwJTIwJTIwJTIwYmxvY2tfc2l6ZSUzRDE2JTJDJTBBKSUwQSUwQSUyMyUyMENvbmZpZ3VyZWF6JUM0JTgzJTIwcGFyYW1ldHJpaSUyMGRlJTIwZSVDOCU5OWFudGlvbmFyZSUwQXNhbXBsaW5nX3BhcmFtcyUyMCUzRCUyMFNhbXBsaW5nUGFyYW1zKCUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC44JTJDJTIwJTIwJTIzJTIwTWFpJTIwbWFyZSUyMHBlbnRydSUyMG1haSUyMG11bHQlQzQlODMlMjBjcmVhdGl2aXRhdGUlMEElMjAlMjAlMjAlMjB0b3BfcCUzRDAuOTUlMkMlMjAlMjAlMjMlMjBDb25zaWRlciVDNCU4MyUyMG1hc2ElMjBkZSUyMHByb2JhYmlsaXRhdGUlMjBkZSUyMHRvcCUyMDk1JTI1JTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUyMCUyMCUyMyUyMEx1bmdpbWVhJTIwbWF4aW0lQzQlODMlMEElMjAlMjAlMjAlMjBwcmVzZW5jZV9wZW5hbHR5JTNEMS4xJTJDJTIwJTIwJTIzJTIwUmVkdWNlJTIwcmVwZXRhcmVhJTBBJTIwJTIwJTIwJTIwZnJlcXVlbmN5X3BlbmFsdHklM0QxLjElMkMlMjAlMjAlMjMlMjBSZWR1Y2UlMjByZXBldGFyZWElMEElMjAlMjAlMjAlMjBzdG9wJTNEJTVCJTIyJTVDbiU1Q24lMjIlMkMlMjAlMjIlMjMlMjMlMjMlMjIlNUQlMkMlMjAlMjAlMjMlMjBTZWN2ZW4lQzglOUJlJTIwZGUlMjBvcHJpcmUlMEEpJTBBJTBBJTIzJTIwR2VuZXJlYXolQzQlODMlMjB0ZXh0JTBBcHJvbXB0JTIwJTNEJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlMEFvdXRwdXRzJTIwJTNEJTIwbGxtLmdlbmVyYXRlKHByb21wdCUyQyUyMHNhbXBsaW5nX3BhcmFtcyklMEFwcmludChvdXRwdXRzJTVCMCU1RC5vdXRwdXRzJTVCMCU1RC50ZXh0KSUwQSUwQSUyMyUyMFBlbnRydSUyMGludGVyYWMlQzglOUJpdW5pJTIwJUMzJUFFbiUyMHN0aWwlMjBjaGF0JTBBY2hhdF9wcm9tcHQlMjAlM0QlMjAlNUIlMEElMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIyc3lzdGVtJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMkUlQzglOTl0aSUyMHVuJTIwcG92ZXN0aXRvciUyMGNyZWF0aXYuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlN0QlMkMlMEElNUQlMEFmb3JtYXR0ZWRfcHJvbXB0JTIwJTNEJTIwbGxtLmdldF9jaGF0X3RlbXBsYXRlKCkoJTBBJTIwJTIwJTIwJTIwY2hhdF9wcm9tcHQlMEEpJTIwJTIwJTIzJTIwRm9sb3NlJUM4JTk5dGUlMjAlQzglOTlhYmxvbnVsJTIwZGUlMjBjaGF0JTIwYWwlMjBtb2RlbHVsdWklMEFvdXRwdXRzJTIwJTNEJTIwbGxtLmdlbmVyYXRlKGZvcm1hdHRlZF9wcm9tcHQlMkMlMjBzYW1wbGluZ19wYXJhbXMpJTBBcHJpbnQob3V0cHV0cyU1QjAlNUQub3V0cHV0cyU1QjAlNUQudGV4dCk=",highlighted:`<span class="hljs-keyword">from</span> vllm <span class="hljs-keyword">import</span> LLM, SamplingParams | |
| <span class="hljs-comment"># Inițializează modelul cu parametri avansați</span> | |
| llm = LLM( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-360M-Instruct"</span>, | |
| gpu_memory_utilization=<span class="hljs-number">0.85</span>, | |
| max_num_batched_tokens=<span class="hljs-number">8192</span>, | |
| max_num_seqs=<span class="hljs-number">256</span>, | |
| block_size=<span class="hljs-number">16</span>, | |
| ) | |
| <span class="hljs-comment"># Configurează parametrii de eșantionare</span> | |
| sampling_params = SamplingParams( | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| top_p=<span class="hljs-number">0.95</span>, <span class="hljs-comment"># Consideră masa de probabilitate de top 95%</span> | |
| max_tokens=<span class="hljs-number">100</span>, <span class="hljs-comment"># Lungimea maximă</span> | |
| presence_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| frequency_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| stop=[<span class="hljs-string">"\\n\\n"</span>, <span class="hljs-string">"###"</span>], <span class="hljs-comment"># Secvențe de oprire</span> | |
| ) | |
| <span class="hljs-comment"># Generează text</span> | |
| prompt = <span class="hljs-string">"Scrie o poveste creativă"</span> | |
| outputs = llm.generate(prompt, sampling_params) | |
| <span class="hljs-built_in">print</span>(outputs[<span class="hljs-number">0</span>].outputs[<span class="hljs-number">0</span>].text) | |
| <span class="hljs-comment"># Pentru interacțiuni în stil chat</span> | |
| chat_prompt = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Ești un povestitor creativ."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Scrie o poveste creativă"</span>}, | |
| ] | |
| formatted_prompt = llm.get_chat_template()( | |
| chat_prompt | |
| ) <span class="hljs-comment"># Folosește șablonul de chat al modelului</span> | |
| outputs = llm.generate(formatted_prompt, sampling_params) | |
| <span class="hljs-built_in">print</span>(outputs[<span class="hljs-number">0</span>].outputs[<span class="hljs-number">0</span>].text)`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| `),p=w("p"),p.textContent=j,J=c(),f(u.$$.fragment),y=c(),M=w("p"),M.textContent=U,r=c(),f(i.$$.fragment),Q=c(),A=w("p"),A.textContent=$,k=c(),f(W.$$.fragment),ol=z(` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),E=w("p"),E.textContent=X,H=c(),f(T.$$.fragment),S=c(),ll=w("p"),ll.textContent=L,dl=c(),f(el.$$.fragment),R=c(),tl=w("p"),tl.textContent=D,hl=c(),f(sl.$$.fragment),x=c(),nl=w("p"),nl.textContent=P,bl=c(),f(al.$$.fragment),Y=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),Ml=w("p"),Ml.textContent=O,Tl=c(),f(_.$$.fragment),ml=c(),il=w("p"),il.textContent=K,Cl=c(),f(pl.$$.fragment),q=c(),Jl=w("p"),Jl.textContent=jl,F=c(),f(ul.$$.fragment),rl=z(` | |
| </hfoption>`)},l(n){m=N(n,`<hfoption value="tgi" label="TGI"> | |
| `),p=I(n,"P",{"data-svelte-h":!0}),G(p)!=="svelte-1vmaxnf"&&(p.textContent=j),J=o(n),B(u.$$.fragment,n),y=o(n),M=I(n,"P",{"data-svelte-h":!0}),G(M)!=="svelte-1ajrx6o"&&(M.textContent=U),r=o(n),B(i.$$.fragment,n),Q=o(n),A=I(n,"P",{"data-svelte-h":!0}),G(A)!=="svelte-4b84cz"&&(A.textContent=$),k=o(n),B(W.$$.fragment,n),ol=N(n,` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),E=I(n,"P",{"data-svelte-h":!0}),G(E)!=="svelte-gc9vgi"&&(E.textContent=X),H=o(n),B(T.$$.fragment,n),S=o(n),ll=I(n,"P",{"data-svelte-h":!0}),G(ll)!=="svelte-1w27s98"&&(ll.textContent=L),dl=o(n),B(el.$$.fragment,n),R=o(n),tl=I(n,"P",{"data-svelte-h":!0}),G(tl)!=="svelte-17sbnw3"&&(tl.textContent=D),hl=o(n),B(sl.$$.fragment,n),x=o(n),nl=I(n,"P",{"data-svelte-h":!0}),G(nl)!=="svelte-1k6lxxy"&&(nl.textContent=P),bl=o(n),B(al.$$.fragment,n),Y=N(n,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),Ml=I(n,"P",{"data-svelte-h":!0}),G(Ml)!=="svelte-vd4z8t"&&(Ml.textContent=O),Tl=o(n),B(_.$$.fragment,n),ml=o(n),il=I(n,"P",{"data-svelte-h":!0}),G(il)!=="svelte-1ns2jx"&&(il.textContent=K),Cl=o(n),B(pl.$$.fragment,n),q=o(n),Jl=I(n,"P",{"data-svelte-h":!0}),G(Jl)!=="svelte-1rrz7ab"&&(Jl.textContent=jl),F=o(n),B(ul.$$.fragment,n),rl=N(n,` | |
| </hfoption>`)},m(n,d){t(n,m,d),t(n,p,d),t(n,J,d),g(u,n,d),t(n,y,d),t(n,M,d),t(n,r,d),g(i,n,d),t(n,Q,d),t(n,A,d),t(n,k,d),g(W,n,d),t(n,ol,d),t(n,E,d),t(n,H,d),g(T,n,d),t(n,S,d),t(n,ll,d),t(n,dl,d),g(el,n,d),t(n,R,d),t(n,tl,d),t(n,hl,d),g(sl,n,d),t(n,x,d),t(n,nl,d),t(n,bl,d),g(al,n,d),t(n,Y,d),t(n,Ml,d),t(n,Tl,d),g(_,n,d),t(n,ml,d),t(n,il,d),t(n,Cl,d),g(pl,n,d),t(n,q,d),t(n,Jl,d),t(n,F,d),g(ul,n,d),t(n,rl,d),Ul=!0},p:Zl,i(n){Ul||(b(u.$$.fragment,n),b(i.$$.fragment,n),b(W.$$.fragment,n),b(T.$$.fragment,n),b(el.$$.fragment,n),b(sl.$$.fragment,n),b(al.$$.fragment,n),b(_.$$.fragment,n),b(pl.$$.fragment,n),b(ul.$$.fragment,n),Ul=!0)},o(n){C(u.$$.fragment,n),C(i.$$.fragment,n),C(W.$$.fragment,n),C(T.$$.fragment,n),C(el.$$.fragment,n),C(sl.$$.fragment,n),C(al.$$.fragment,n),C(_.$$.fragment,n),C(pl.$$.fragment,n),C(ul.$$.fragment,n),Ul=!1},d(n){n&&(e(m),e(p),e(J),e(y),e(M),e(r),e(Q),e(A),e(k),e(ol),e(E),e(H),e(S),e(ll),e(dl),e(R),e(tl),e(hl),e(x),e(nl),e(bl),e(Y),e(Ml),e(Tl),e(ml),e(il),e(Cl),e(q),e(Jl),e(F),e(rl)),Z(u,n),Z(i,n),Z(W,n),Z(T,n),Z(el,n),Z(sl,n),Z(al,n),Z(_,n),Z(pl,n),Z(ul,n)}}}function Ut(v){let m,p,j,J,u,y,M,U;return p=new V({props:{code:"Y2xpZW50LmdlbmVyYXRlKCUwQSUyMCUyMCUyMCUyMCUyMlNjcmllJTIwbyUyMHBvdmVzdGUlMjBjcmVhdGl2JUM0JTgzJTIyJTJDJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjglMkMlMjAlMjAlMjMlMjBNYWklMjBtYXJlJTIwcGVudHJ1JTIwbWFpJTIwbXVsdCVDNCU4MyUyMGNyZWF0aXZpdGF0ZSUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUyMCUyMCUyMyUyMENvbnNpZGVyJUM0JTgzJTIwbWFzYSUyMGRlJTIwcHJvYmFiaWxpdGF0ZSUyMGRlJTIwdG9wJTIwOTUlMjUlMEElMjAlMjAlMjAlMjB0b3BfayUzRDUwJTJDJTIwJTIwJTIzJTIwQ29uc2lkZXIlQzQlODMlMjB0b3AlMjA1MCUyMGRlJTIwdG9rZW4tdXJpJTBBJTIwJTIwJTIwJTIwbWF4X25ld190b2tlbnMlM0QxMDAlMkMlMjAlMjAlMjMlMjBMdW5naW1lYSUyMG1heGltJUM0JTgzJTBBJTIwJTIwJTIwJTIwcmVwZXRpdGlvbl9wZW5hbHR5JTNEMS4xJTJDJTIwJTIwJTIzJTIwUmVkdWNlJTIwcmVwZXRhcmVhJTBBKQ==",highlighted:`client.generate( | |
| <span class="hljs-string">"Scrie o poveste creativă"</span>, | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| top_p=<span class="hljs-number">0.95</span>, <span class="hljs-comment"># Consideră masa de probabilitate de top 95%</span> | |
| top_k=<span class="hljs-number">50</span>, <span class="hljs-comment"># Consideră top 50 de token-uri</span> | |
| max_new_tokens=<span class="hljs-number">100</span>, <span class="hljs-comment"># Lungimea maximă</span> | |
| repetition_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| )`,wrap:!1}}),J=new V({props:{code:"JTIzJTIwUHJpbiUyMGNvbXBhdGliaWxpdGF0ZWElMjBBUEklMjBPcGVuQUklMEFyZXNwb25zZSUyMCUzRCUyMGNsaWVudC5jb21wbGV0aW9ucy5jcmVhdGUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlM0QlMjJzbW9sbG0yLTEuN2ItaW5zdHJ1Y3QlMjIlMkMlMjAlMjAlMjMlMjBOdW1lbGUlMjBtb2RlbHVsdWklMjAocG9hdGUlMjBmaSUyMG9yaWNlJTIwc3RyaW5nJTIwcGVudHJ1JTIwc2VydmVydWwlMjBsbGFtYS5jcHApJTBBJTIwJTIwJTIwJTIwcHJvbXB0JTNEJTIyU2NyaWUlMjBvJTIwcG92ZXN0ZSUyMGNyZWF0aXYlQzQlODMlMjIlMkMlMEElMjAlMjAlMjAlMjB0ZW1wZXJhdHVyZSUzRDAuOCUyQyUyMCUyMCUyMyUyME1haSUyMG1hcmUlMjBwZW50cnUlMjBtYWklMjBtdWx0JUM0JTgzJTIwY3JlYXRpdml0YXRlJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTIwJTIwJTIzJTIwQ29uc2lkZXIlQzQlODMlMjBtYXNhJTIwZGUlMjBwcm9iYWJpbGl0YXRlJTIwZGUlMjB0b3AlMjA5NSUyNSUwQSUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNEMS4xJTJDJTIwJTIwJTIzJTIwUmVkdWNlJTIwcmVwZXRhcmVhJTBBJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzRDAuMSUyQyUyMCUyMCUyMyUyMFJlZHVjZSUyMHJlcGV0YXJlYSUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMDAlMkMlMjAlMjAlMjMlMjBMdW5naW1lYSUyMG1heGltJUM0JTgzJTBBKSUwQSUwQSUyMyUyMFByaW4lMjBhY2Nlc3VsJTIwZGlyZWN0JTIwbGxhbWEtY3BwLXB5dGhvbiUwQW91dHB1dCUyMCUzRCUyMGxsbSglMEElMjAlMjAlMjAlMjAlMjJTY3JpZSUyMG8lMjBwb3Zlc3RlJTIwY3JlYXRpdiVDNCU4MyUyMiUyQyUwQSUyMCUyMCUyMCUyMHRlbXBlcmF0dXJlJTNEMC44JTJDJTBBJTIwJTIwJTIwJTIwdG9wX3AlM0QwLjk1JTJDJTBBJTIwJTIwJTIwJTIwdG9wX2slM0Q1MCUyQyUwQSUyMCUyMCUyMCUyMG1heF90b2tlbnMlM0QxMDAlMkMlMEElMjAlMjAlMjAlMjByZXBlYXRfcGVuYWx0eSUzRDEuMSUyQyUwQSk=",highlighted:`<span class="hljs-comment"># Prin compatibilitatea API OpenAI</span> | |
| response = client.completions.create( | |
| model=<span class="hljs-string">"smollm2-1.7b-instruct"</span>, <span class="hljs-comment"># Numele modelului (poate fi orice string pentru serverul llama.cpp)</span> | |
| prompt=<span class="hljs-string">"Scrie o poveste creativă"</span>, | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| top_p=<span class="hljs-number">0.95</span>, <span class="hljs-comment"># Consideră masa de probabilitate de top 95%</span> | |
| frequency_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| presence_penalty=<span class="hljs-number">0.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| max_tokens=<span class="hljs-number">100</span>, <span class="hljs-comment"># Lungimea maximă</span> | |
| ) | |
| <span class="hljs-comment"># Prin accesul direct llama-cpp-python</span> | |
| output = llm( | |
| <span class="hljs-string">"Scrie o poveste creativă"</span>, | |
| temperature=<span class="hljs-number">0.8</span>, | |
| top_p=<span class="hljs-number">0.95</span>, | |
| top_k=<span class="hljs-number">50</span>, | |
| max_tokens=<span class="hljs-number">100</span>, | |
| repeat_penalty=<span class="hljs-number">1.1</span>, | |
| )`,wrap:!1}}),y=new V({props:{code:"cGFyYW1zJTIwJTNEJTIwU2FtcGxpbmdQYXJhbXMoJTBBJTIwJTIwJTIwJTIwdGVtcGVyYXR1cmUlM0QwLjglMkMlMjAlMjAlMjMlMjBNYWklMjBtYXJlJTIwcGVudHJ1JTIwbWFpJTIwbXVsdCVDNCU4MyUyMGNyZWF0aXZpdGF0ZSUwQSUyMCUyMCUyMCUyMHRvcF9wJTNEMC45NSUyQyUyMCUyMCUyMyUyMENvbnNpZGVyJUM0JTgzJTIwbWFzYSUyMGRlJTIwcHJvYmFiaWxpdGF0ZSUyMGRlJTIwdG9wJTIwOTUlMjUlMEElMjAlMjAlMjAlMjB0b3BfayUzRDUwJTJDJTIwJTIwJTIzJTIwQ29uc2lkZXIlQzQlODMlMjB0b3AlMjA1MCUyMGRlJTIwdG9rZW4tdXJpJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUyMCUyMCUyMyUyMEx1bmdpbWVhJTIwbWF4aW0lQzQlODMlMEElMjAlMjAlMjAlMjBwcmVzZW5jZV9wZW5hbHR5JTNEMC4xJTJDJTIwJTIwJTIzJTIwUmVkdWNlJTIwcmVwZXRhcmVhJTBBKSUwQWxsbS5nZW5lcmF0ZSglMjJTY3JpZSUyMG8lMjBwb3Zlc3RlJTIwY3JlYXRpdiVDNCU4MyUyMiUyQyUyMHNhbXBsaW5nX3BhcmFtcyUzRHBhcmFtcyk=",highlighted:`params = SamplingParams( | |
| temperature=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Mai mare pentru mai multă creativitate</span> | |
| top_p=<span class="hljs-number">0.95</span>, <span class="hljs-comment"># Consideră masa de probabilitate de top 95%</span> | |
| top_k=<span class="hljs-number">50</span>, <span class="hljs-comment"># Consideră top 50 de token-uri</span> | |
| max_tokens=<span class="hljs-number">100</span>, <span class="hljs-comment"># Lungimea maximă</span> | |
| presence_penalty=<span class="hljs-number">0.1</span>, <span class="hljs-comment"># Reduce repetarea</span> | |
| ) | |
| llm.generate(<span class="hljs-string">"Scrie o poveste creativă"</span>, sampling_params=params)`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| `),f(p.$$.fragment),j=z(` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),f(J.$$.fragment),u=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),f(y.$$.fragment),M=z(` | |
| </hfoption>`)},l(r){m=N(r,`<hfoption value="tgi" label="TGI"> | |
| `),B(p.$$.fragment,r),j=N(r,` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),B(J.$$.fragment,r),u=N(r,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),B(y.$$.fragment,r),M=N(r,` | |
| </hfoption>`)},m(r,i){t(r,m,i),g(p,r,i),t(r,j,i),g(J,r,i),t(r,u,i),g(y,r,i),t(r,M,i),U=!0},p:Zl,i(r){U||(b(p.$$.fragment,r),b(J.$$.fragment,r),b(y.$$.fragment,r),U=!0)},o(r){C(p.$$.fragment,r),C(J.$$.fragment,r),C(y.$$.fragment,r),U=!1},d(r){r&&(e(m),e(j),e(u),e(M)),Z(p,r),Z(J,r),Z(y,r)}}}function Jt(v){let m,p,j,J,u,y;return p=new V({props:{code:"JTIzJTIwUHJpbiUyMEFQSSUyME9wZW5BSSUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LmNvbXBsZXRpb25zLmNyZWF0ZSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMnNtb2xsbTItMS43Yi1pbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRCUyMlNjcmllJTIwdW4lMjB0ZXh0JTIwdmFyaWF0JTIyJTJDJTBBJTIwJTIwJTIwJTIwZnJlcXVlbmN5X3BlbmFsdHklM0QxLjElMkMlMjAlMjAlMjMlMjBQZW5hbGl6ZWF6JUM0JTgzJTIwdG9rZW4tdXJpbGUlMjBmcmVjdmVudGUlMEElMjAlMjAlMjAlMjBwcmVzZW5jZV9wZW5hbHR5JTNEMC44JTJDJTIwJTIwJTIzJTIwUGVuYWxpemVheiVDNCU4MyUyMHRva2VuLXVyaWxlJTIwZGVqYSUyMHByZXplbnRlJTBBKSUwQSUwQSUyMyUyMFByaW4lMjBiaWJsaW90ZWNhJTIwZGlyZWN0JUM0JTgzJTBBb3V0cHV0JTIwJTNEJTIwbGxtKCUwQSUyMCUyMCUyMCUyMCUyMlNjcmllJTIwdW4lMjB0ZXh0JTIwdmFyaWF0JTIyJTJDJTBBJTIwJTIwJTIwJTIwcmVwZWF0X3BlbmFsdHklM0QxLjElMkMlMjAlMjAlMjMlMjBQZW5hbGl6ZWF6JUM0JTgzJTIwdG9rZW4tdXJpbGUlMjByZXBldGF0ZSUwQSUyMCUyMCUyMCUyMGZyZXF1ZW5jeV9wZW5hbHR5JTNEMC41JTJDJTIwJTIwJTIzJTIwUGVuYWxpdGF0ZSUyMGRlJTIwZnJlY3ZlbiVDOCU5QiVDNCU4MyUyMGFkaSVDOCU5QmlvbmFsJUM0JTgzJTBBJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzRDAuNSUyQyUyMCUyMCUyMyUyMFBlbmFsaXRhdGUlMjBkZSUyMHByZXplbiVDOCU5QiVDNCU4MyUyMGFkaSVDOCU5QmlvbmFsJUM0JTgzJTBBKQ==",highlighted:`<span class="hljs-comment"># Prin API OpenAI</span> | |
| response = client.completions.create( | |
| model=<span class="hljs-string">"smollm2-1.7b-instruct"</span>, | |
| prompt=<span class="hljs-string">"Scrie un text variat"</span>, | |
| frequency_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Penalizează token-urile frecvente</span> | |
| presence_penalty=<span class="hljs-number">0.8</span>, <span class="hljs-comment"># Penalizează token-urile deja prezente</span> | |
| ) | |
| <span class="hljs-comment"># Prin biblioteca directă</span> | |
| output = llm( | |
| <span class="hljs-string">"Scrie un text variat"</span>, | |
| repeat_penalty=<span class="hljs-number">1.1</span>, <span class="hljs-comment"># Penalizează token-urile repetate</span> | |
| frequency_penalty=<span class="hljs-number">0.5</span>, <span class="hljs-comment"># Penalitate de frecvență adițională</span> | |
| presence_penalty=<span class="hljs-number">0.5</span>, <span class="hljs-comment"># Penalitate de prezență adițională</span> | |
| )`,wrap:!1}}),J=new V({props:{code:"cGFyYW1zJTIwJTNEJTIwU2FtcGxpbmdQYXJhbXMoJTBBJTIwJTIwJTIwJTIwcHJlc2VuY2VfcGVuYWx0eSUzRDAuMSUyQyUyMCUyMCUyMyUyMFBlbmFsaXplYXolQzQlODMlMjBwcmV6ZW4lQzglOUJhJTIwdG9rZW4tdXJpbG9yJTBBJTIwJTIwJTIwJTIwZnJlcXVlbmN5X3BlbmFsdHklM0QwLjElMkMlMjAlMjAlMjMlMjBQZW5hbGl6ZWF6JUM0JTgzJTIwZnJlY3ZlbiVDOCU5QmElMjB0b2tlbi11cmlsb3IlMEEp",highlighted:`params = SamplingParams( | |
| presence_penalty=<span class="hljs-number">0.1</span>, <span class="hljs-comment"># Penalizează prezența token-urilor</span> | |
| frequency_penalty=<span class="hljs-number">0.1</span>, <span class="hljs-comment"># Penalizează frecvența token-urilor</span> | |
| )`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| \`\`\`python | |
| client.generate( | |
| "Scrie un text variat", | |
| repetition_penalty=1.1, # Penalizează token-urile repetate | |
| no_repeat_ngram_size=3, # Previne repetarea de 3-grame | |
| ) | |
| \`\`\` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),f(p.$$.fragment),j=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),f(J.$$.fragment),u=z(` | |
| </hfoption>`)},l(M){m=N(M,`<hfoption value="tgi" label="TGI"> | |
| \`\`\`python | |
| client.generate( | |
| "Scrie un text variat", | |
| repetition_penalty=1.1, # Penalizează token-urile repetate | |
| no_repeat_ngram_size=3, # Previne repetarea de 3-grame | |
| ) | |
| \`\`\` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),B(p.$$.fragment,M),j=N(M,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),B(J.$$.fragment,M),u=N(M,` | |
| </hfoption>`)},m(M,U){t(M,m,U),g(p,M,U),t(M,j,U),g(J,M,U),t(M,u,U),y=!0},p:Zl,i(M){y||(b(p.$$.fragment,M),b(J.$$.fragment,M),y=!0)},o(M){C(p.$$.fragment,M),C(J.$$.fragment,M),y=!1},d(M){M&&(e(m),e(j),e(u)),Z(p,M),Z(J,M)}}}function yt(v){let m,p,j,J,u,y;return p=new V({props:{code:"JTIzJTIwUHJpbiUyMEFQSSUyME9wZW5BSSUwQXJlc3BvbnNlJTIwJTNEJTIwY2xpZW50LmNvbXBsZXRpb25zLmNyZWF0ZSglMEElMjAlMjAlMjAlMjBtb2RlbCUzRCUyMnNtb2xsbTItMS43Yi1pbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMHByb21wdCUzRCUyMkdlbmVyZWF6JUM0JTgzJTIwdW4lMjBwYXJhZ3JhZiUyMHNjdXJ0JTIyJTJDJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMHN0b3AlM0QlNUIlMjIlNUNuJTVDbiUyMiUyQyUyMCUyMiUyMyUyMyUyMyUyMiU1RCUyQyUwQSklMEElMEElMjMlMjBQcmluJTIwYmlibGlvdGVjYSUyMGRpcmVjdCVDNCU4MyUwQW91dHB1dCUyMCUzRCUyMGxsbSglMjJHZW5lcmVheiVDNCU4MyUyMHVuJTIwcGFyYWdyYWYlMjBzY3VydCUyMiUyQyUyMG1heF90b2tlbnMlM0QxMDAlMkMlMjBzdG9wJTNEJTVCJTIyJTVDbiU1Q24lMjIlMkMlMjAlMjIlMjMlMjMlMjMlMjIlNUQp",highlighted:`<span class="hljs-comment"># Prin API OpenAI</span> | |
| response = client.completions.create( | |
| model=<span class="hljs-string">"smollm2-1.7b-instruct"</span>, | |
| prompt=<span class="hljs-string">"Generează un paragraf scurt"</span>, | |
| max_tokens=<span class="hljs-number">100</span>, | |
| stop=[<span class="hljs-string">"\\n\\n"</span>, <span class="hljs-string">"###"</span>], | |
| ) | |
| <span class="hljs-comment"># Prin biblioteca directă</span> | |
| output = llm(<span class="hljs-string">"Generează un paragraf scurt"</span>, max_tokens=<span class="hljs-number">100</span>, stop=[<span class="hljs-string">"\\n\\n"</span>, <span class="hljs-string">"###"</span>])`,wrap:!1}}),J=new V({props:{code:"cGFyYW1zJTIwJTNEJTIwU2FtcGxpbmdQYXJhbXMoJTBBJTIwJTIwJTIwJTIwbWF4X3Rva2VucyUzRDEwMCUyQyUwQSUyMCUyMCUyMCUyMG1pbl90b2tlbnMlM0QxMCUyQyUwQSUyMCUyMCUyMCUyMHN0b3AlM0QlNUIlMjIlMjMlMjMlMjMlMjIlMkMlMjAlMjIlNUNuJTVDbiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMGlnbm9yZV9lb3MlM0RGYWxzZSUyQyUwQSUyMCUyMCUyMCUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlJTJDJTBBKQ==",highlighted:`params = SamplingParams( | |
| max_tokens=<span class="hljs-number">100</span>, | |
| min_tokens=<span class="hljs-number">10</span>, | |
| stop=[<span class="hljs-string">"###"</span>, <span class="hljs-string">"\\n\\n"</span>], | |
| ignore_eos=<span class="hljs-literal">False</span>, | |
| skip_special_tokens=<span class="hljs-literal">True</span>, | |
| )`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| \`\`\`python | |
| client.generate( | |
| "Generează un paragraf scurt", | |
| max_new_tokens=100, | |
| min_new_tokens=10, | |
| stop_sequences=["\\n\\n", "###"], | |
| ) | |
| \`\`\` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),f(p.$$.fragment),j=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),f(J.$$.fragment),u=z(` | |
| </hfoption>`)},l(M){m=N(M,`<hfoption value="tgi" label="TGI"> | |
| \`\`\`python | |
| client.generate( | |
| "Generează un paragraf scurt", | |
| max_new_tokens=100, | |
| min_new_tokens=10, | |
| stop_sequences=["\\n\\n", "###"], | |
| ) | |
| \`\`\` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),B(p.$$.fragment,M),j=N(M,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),B(J.$$.fragment,M),u=N(M,` | |
| </hfoption>`)},m(M,U){t(M,m,U),g(p,M,U),t(M,j,U),g(J,M,U),t(M,u,U),y=!0},p:Zl,i(M){y||(b(p.$$.fragment,M),b(J.$$.fragment,M),y=!0)},o(M){C(p.$$.fragment,M),C(J.$$.fragment,M),y=!1},d(M){M&&(e(m),e(j),e(u)),Z(p,M),Z(J,M)}}}function Tt(v){let m,p,j,J,u="llama.cpp folosește cuantificarea și dispunerea optimată a memoriei:",y,M,U,r,i="Pentru modele prea mari pentru GPU-ul tău, poți folosi offloading-ul CPU:",Q,A,$,k,W="vLLM folosește PagedAttention pentru gestionarea optimă a memoriei:",ol,E,X,H;return p=new V({props:{code:"JTIzJTIwSW1wbGVtZW50YXJlYSUyMERvY2tlciUyMGN1JTIwb3B0aW1pemFyZWElMjBtZW1vcmllaSUwQWRvY2tlciUyMHJ1biUyMC0tZ3B1cyUyMGFsbCUyMC1wJTIwODA4MCUzQTgwJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1zaG0tc2l6ZSUyMDFnJTIwJTVDJTBBJTIwJTIwJTIwJTIwZ2hjci5pbyUyRmh1Z2dpbmdmYWNlJTJGdGV4dC1nZW5lcmF0aW9uLWluZmVyZW5jZSUzQWxhdGVzdCUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbW9kZWwtaWQlMjBIdWdnaW5nRmFjZVRCJTJGU21vbExNMi0xLjdCLUluc3RydWN0JTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1tYXgtYmF0Y2gtdG90YWwtdG9rZW5zJTIwODE5MiUyMCU1QyUwQSUyMCUyMCUyMCUyMC0tbWF4LWlucHV0LWxlbmd0aCUyMDQwOTY=",highlighted:`<span class="hljs-comment"># Implementarea Docker cu optimizarea memoriei</span> | |
| docker run --gpus all -p 8080:80 \\ | |
| --shm-size 1g \\ | |
| ghcr.io/huggingface/text-generation-inference:latest \\ | |
| --model-id HuggingFaceTB/SmolLM2-1.7B-Instruct \\ | |
| --max-batch-total-tokens 8192 \\ | |
| --max-input-length 4096`,wrap:!1}}),M=new V({props:{code:"JTIzJTIwU2VydmVyJTIwY3UlMjBvcHRpbWl6JUM0JTgzcmklMjBkZSUyMG1lbW9yaWUlMEEuJTJGc2VydmVyJTIwJTVDJTBBJTIwJTIwJTIwJTIwLW0lMjBzbW9sbG0yLTEuN2ItaW5zdHJ1Y3QuUTRfS19NLmdndWYlMjAlNUMlMEElMjAlMjAlMjAlMjAtLWhvc3QlMjAwLjAuMC4wJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1wb3J0JTIwODA4MCUyMCU1QyUwQSUyMCUyMCUyMCUyMC1jJTIwMjA0OCUyMCU1QyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMERpbWVuc2l1bmVhJTIwY29udGV4dHVsdWklMEElMjAlMjAlMjAlMjAtLXRocmVhZHMlMjA0JTIwJTVDJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwVGhyZWFkLXVyaSUyMENQVSUwQSUyMCUyMCUyMCUyMC0tbi1ncHUtbGF5ZXJzJTIwMzIlMjAlNUMlMjAlMjAlMjAlMjAlMjAlMjMlMjBGb2xvc2UlQzglOTl0ZSUyMG1haSUyMG11bHRlJTIwc3RyYXR1cmklMjBHUFUlMjBwZW50cnUlMjBtb2RlbGUlMjBtYWklMjBtYXJpJTBBJTIwJTIwJTIwJTIwLS1tbG9jayUyMCU1QyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMEJsb2NoZWF6JUM0JTgzJTIwbWVtb3JpYSUyMHBlbnRydSUyMGElMjBwcmV2ZW5pJTIwc3dhcHBpbmctdWwlMEElMjAlMjAlMjAlMjAtLWNvbnQtYmF0Y2hpbmclMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjBBY3RpdmVheiVDNCU4MyUyMHByb2Nlc2FyZWElMjBjb250aW51JUM0JTgzJTIwJUMzJUFFbiUyMGxvdHVyaQ==",highlighted:`<span class="hljs-comment"># Server cu optimizări de memorie</span> | |
| ./server \\ | |
| -m smollm2-1.7b-instruct.Q4_K_M.gguf \\ | |
| --host 0.0.0.0 \\ | |
| --port 8080 \\ | |
| -c 2048 \\ <span class="hljs-comment"># Dimensiunea contextului</span> | |
| --threads 4 \\ <span class="hljs-comment"># Thread-uri CPU</span> | |
| --n-gpu-layers 32 \\ <span class="hljs-comment"># Folosește mai multe straturi GPU pentru modele mai mari</span> | |
| --mlock \\ <span class="hljs-comment"># Blochează memoria pentru a preveni swapping-ul</span> | |
| --cont-batching <span class="hljs-comment"># Activează procesarea continuă în loturi</span>`,wrap:!1}}),A=new V({props:{code:"LiUyRnNlcnZlciUyMCU1QyUwQSUyMCUyMCUyMCUyMC1tJTIwc21vbGxtMi0xLjdiLWluc3RydWN0LlE0X0tfTS5nZ3VmJTIwJTVDJTBBJTIwJTIwJTIwJTIwLS1uLWdwdS1sYXllcnMlMjAyMCUyMCU1QyUyMCUyMCUyMCUyMCUyMCUyMyUyMFAlQzQlODNzdHJlYXolQzQlODMlMjBwcmltZWxlJTIwMjAlMjBkZSUyMHN0cmF0dXJpJTIwcGUlMjBHUFUlMEElMjAlMjAlMjAlMjAtLXRocmVhZHMlMjA4JTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwRm9sb3NlJUM4JTk5dGUlMjBtYWklMjBtdWx0ZSUyMHRocmVhZC11cmklMjBDUFUlMjBwZW50cnUlMjBzdHJhdHVyaWxlJTIwQ1BV",highlighted:`./server \\ | |
| -m smollm2-1.7b-instruct.Q4_K_M.gguf \\ | |
| --n-gpu-layers 20 \\ <span class="hljs-comment"># Păstrează primele 20 de straturi pe GPU</span> | |
| --threads 8 <span class="hljs-comment"># Folosește mai multe thread-uri CPU pentru straturile CPU</span>`,wrap:!1}}),E=new V({props:{code:"ZnJvbSUyMHZsbG0uZW5naW5lLmFyZ191dGlscyUyMGltcG9ydCUyMEFzeW5jRW5naW5lQXJncyUwQSUwQWVuZ2luZV9hcmdzJTIwJTNEJTIwQXN5bmNFbmdpbmVBcmdzKCUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIySHVnZ2luZ0ZhY2VUQiUyRlNtb2xMTTItMS43Qi1JbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCUyMCUyMGdwdV9tZW1vcnlfdXRpbGl6YXRpb24lM0QwLjg1JTJDJTBBJTIwJTIwJTIwJTIwbWF4X251bV9iYXRjaGVkX3Rva2VucyUzRDgxOTIlMkMlMEElMjAlMjAlMjAlMjBibG9ja19zaXplJTNEMTYlMkMlMEEpJTBBJTBBbGxtJTIwJTNEJTIwTExNKGVuZ2luZV9hcmdzJTNEZW5naW5lX2FyZ3Mp",highlighted:`<span class="hljs-keyword">from</span> vllm.engine.arg_utils <span class="hljs-keyword">import</span> AsyncEngineArgs | |
| engine_args = AsyncEngineArgs( | |
| model=<span class="hljs-string">"HuggingFaceTB/SmolLM2-1.7B-Instruct"</span>, | |
| gpu_memory_utilization=<span class="hljs-number">0.85</span>, | |
| max_num_batched_tokens=<span class="hljs-number">8192</span>, | |
| block_size=<span class="hljs-number">16</span>, | |
| ) | |
| llm = LLM(engine_args=engine_args)`,wrap:!1}}),{c(){m=z(`<hfoption value="tgi" label="TGI"> | |
| TGI folosește Flash Attention 2 și procesarea continuă în loturi: | |
| `),f(p.$$.fragment),j=z(` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),J=w("p"),J.textContent=u,y=c(),f(M.$$.fragment),U=c(),r=w("p"),r.textContent=i,Q=c(),f(A.$$.fragment),$=z(` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),k=w("p"),k.textContent=W,ol=c(),f(E.$$.fragment),X=z(` | |
| </hfoption>`)},l(T){m=N(T,`<hfoption value="tgi" label="TGI"> | |
| TGI folosește Flash Attention 2 și procesarea continuă în loturi: | |
| `),B(p.$$.fragment,T),j=N(T,` | |
| </hfoption> | |
| <hfoption value="llama.cpp" label="llama.cpp"> | |
| `),J=I(T,"P",{"data-svelte-h":!0}),G(J)!=="svelte-1sfqsd0"&&(J.textContent=u),y=o(T),B(M.$$.fragment,T),U=o(T),r=I(T,"P",{"data-svelte-h":!0}),G(r)!=="svelte-1p18jlx"&&(r.textContent=i),Q=o(T),B(A.$$.fragment,T),$=N(T,` | |
| </hfoption> | |
| <hfoption value="vllm" label="vLLM"> | |
| `),k=I(T,"P",{"data-svelte-h":!0}),G(k)!=="svelte-1p9u0qw"&&(k.textContent=W),ol=o(T),B(E.$$.fragment,T),X=N(T,` | |
| </hfoption>`)},m(T,S){t(T,m,S),g(p,T,S),t(T,j,S),t(T,J,S),t(T,y,S),g(M,T,S),t(T,U,S),t(T,r,S),t(T,Q,S),g(A,T,S),t(T,$,S),t(T,k,S),t(T,ol,S),g(E,T,S),t(T,X,S),H=!0},p:Zl,i(T){H||(b(p.$$.fragment,T),b(M.$$.fragment,T),b(A.$$.fragment,T),b(E.$$.fragment,T),H=!0)},o(T){C(p.$$.fragment,T),C(M.$$.fragment,T),C(A.$$.fragment,T),C(E.$$.fragment,T),H=!1},d(T){T&&(e(m),e(j),e(J),e(y),e(U),e(r),e(Q),e($),e(k),e(ol),e(X)),Z(p,T),Z(M,T),Z(A,T),Z(E,T)}}}function jt(v){let m,p,j,J,u,y,M,U="În această secțiune, vom explora framework-uri avansate pentru optimizarea implementărilor LLM: Text Generation Inference (TGI), vLLM și llama.cpp. Aceste aplicații sunt utilizate în principal în medii de producție pentru a servi LLM-uri utilizatorilor. Această secțiune se concentrează pe modul de implementare a acestor framework-uri în producție, mai degrabă decât pe modul de utilizare pentru inferență pe o singură mașină.",r,i,Q="Vom acoperi modul în care aceste instrumente maximizează eficiența inferenței și simplifică implementările de producție ale Modelelor de Limbaj de Mari Dimensiuni.",A,$,k,W,ol="TGI, vLLM și llama.cpp servesc scopuri similare, dar au caracteristici distincte care le fac mai potrivite pentru diferite cazuri de utilizare. Să ne uităm la diferențele cheie dintre ele, concentrându-ne pe performanță și integrare.",E,X,H,T,S="<strong>TGI</strong> este proiectat să fie stabil și previzibil în producție, folosind lungimi fixe de secvență pentru a menține utilizarea memoriei consistentă. TGI gestionează memoria folosind Flash Attention 2 și tehnici de procesare continuă în loturi. Aceasta înseamnă că poate procesa calculele de atenție foarte eficient și poate menține GPU-ul ocupat prin alimentarea constantă cu lucru. Sistemul poate muta părți ale modelului între CPU și GPU când este necesar, ceea ce ajută la gestionarea modelelor mai mari.",ll,L,dl,el,R,tl,D,hl="<strong>vLLM</strong> adoptă o abordare diferită prin utilizarea PagedAttention. La fel cum un computer își gestionează memoria în pagini, vLLM împarte memoria modelului în blocuri mai mici. Acest sistem inteligent înseamnă că poate gestiona cereri de dimensiuni diferite mai flexibil și nu risipește spațiul de memorie. Este deosebit de bun la partajarea memoriei între diferite cereri și reduce fragmentarea memoriei, ceea ce face întregul sistem mai eficient.",sl,x,nl,P,bl="<strong>llama.cpp</strong> este o implementare C/C++ extrem de optimizată proiectată inițial pentru rularea modelelor LLaMA pe hardware de consum. Se concentrează pe eficiența CPU cu accelerație GPU opțională și este ideal pentru medii cu resurse limitate. llama.cpp folosește tehnici de cuantificare pentru a reduce dimensiunea modelului și cerințele de memorie menținând în același timp performanță bună. Implementează kerneluri optimizate pentru diverse arhitecturi CPU și suportă gestionarea de bază a cache-ului KV pentru generarea eficientă de token-uri.",al,Y,Ml,O,Tl,_,ml="Să trecem la diferențele de implementare și integrare între framework-uri.",il,K,Cl="<strong>TGI</strong> excelează în implementarea la nivel enterprise cu caracteristicile sale gata pentru producție. Vine cu suport Kubernetes integrat și include tot ce ai nevoie pentru rularea în producție, precum monitorizarea prin Prometheus și Grafana, scalarea automată și caracteristici cuprinzătoare de siguranță. Sistemul include, de asemenea, logare de nivel enterprise și diverse măsuri de protecție precum filtrarea conținutului și limitarea ratei pentru a menține implementarea sigură și stabilă.",pl,q,Jl="<strong>vLLM</strong> adoptă o abordare mai flexibilă, prietenoasă cu dezvoltatorii pentru implementare. Este construit cu Python în nucleu și poate înlocui cu ușurință API-ul OpenAI în aplicațiile tale existente. Framework-ul se concentrează pe livrarea performanței brute și poate fi personalizat pentru a se potrivi nevoilor tale specifice. Funcționează deosebit de bine cu Ray pentru gestionarea clusterelor, făcându-l o alegere excelentă când ai nevoie de performanță înaltă și adaptabilitate.",jl,F,ul="<strong>llama.cpp</strong> prioritizează simplitatea și portabilitatea. Implementarea sa de server este ușoară și poate rula pe o gamă largă de hardware, de la servere puternice la laptopuri de consum și chiar unele dispozitive mobile de înaltă performanță. Cu dependențe minime și un nucleu C/C++ simplu, este ușor de implementat în medii unde instalarea framework-urilor Python ar fi provocatoare. Serverul oferă un API compatibil cu OpenAI menținând în același timp un amprentă de resurse mult mai mică decât alte soluții.",rl,Ul,n,d,Sl="Să explorăm cum să folosim aceste framework-uri pentru implementarea LLM-urilor, începând cu instalarea și configurarea de bază.",wl,fl,Bl,cl,gl,Il,s,h,Ce="Să ne uităm la exemple de generare de text cu framework-urile:",Kl,Gl,le,zl,ee,Nl,te,El,fe="Procesul de generare a textului implică selectarea următorului token la fiecare pas. Acest proces de selecție poate fi controlat prin diverși parametri:",se,$l,Be="<li><strong>Logit-uri Brute</strong>: Probabilitățile inițiale de ieșire pentru fiecare token</li> <li><strong>Temperatura</strong>: Controlează aleatoriul în selecție (mai mare = mai creativ)</li> <li><strong>Eșantionarea Top-p (Nucleus)</strong>: Filtrează la token-urile de top care alcătuiesc X% din masa de probabilitate</li> <li><strong>Filtrarea Top-k</strong>: Limitează selecția la k token-uri cele mai probabile</li>",ne,Xl,ge="Iată cum să configurezi acești parametri:",ae,Ql,Me,kl,ie,Rl,Ze="Ambele framework-uri oferă modalități de a preveni generarea repetitivă de text:",pe,vl,re,xl,ce,Yl,Ge="Poți controla lungimea generării și specifica când să se oprească:",oe,Al,me,_l,ue,ql,Qe="Ambele framework-uri implementează tehnici avansate de gestionare a memoriei pentru inferența eficientă.",Ue,Vl,Je,Fl,ye,Hl,ve='<li><a href="https://huggingface.co/docs/text-generation-inference" rel="nofollow">Documentația Text Generation Inference</a></li> <li><a href="https://github.com/huggingface/text-generation-inference" rel="nofollow">Depozitul GitHub TGI</a></li> <li><a href="https://vllm.readthedocs.io/" rel="nofollow">Documentația vLLM</a></li> <li><a href="https://github.com/vllm-project/vllm" rel="nofollow">Depozitul GitHub vLLM</a></li> <li><a href="https://arxiv.org/abs/2309.06180" rel="nofollow">Lucrarea PagedAttention</a></li> <li><a href="https://github.com/ggerganov/llama.cpp" rel="nofollow">Depozitul GitHub llama.cpp</a></li> <li><a href="https://github.com/abetlen/llama-cpp-python" rel="nofollow">Depozitul llama-cpp-python</a></li>',Te,Ll,je,Pl,we;return u=new yl({props:{title:"Implementarea Optimizată a Inferenței",local:"implementarea-optimizată-a-inferenței",headingTag:"h1"}}),$=new yl({props:{title:"Ghid de Selecție a Framework-ului",local:"ghid-de-selecție-a-framework-ului",headingTag:"h2"}}),X=new yl({props:{title:"Gestionarea Memoriei și Performanța",local:"gestionarea-memoriei-și-performanța",headingTag:"h3"}}),R=new he({props:{title:"Cum Funcționează Flash Attention",$$slots:{default:[rt]},$$scope:{ctx:v}}}),x=new he({props:{title:"Cum Funcționează PagedAttention",$$slots:{default:[ct]},$$scope:{ctx:v}}}),Y=new he({props:{title:"Cum Funcționează Cuantificarea llama.cpp",$$slots:{default:[ot]},$$scope:{ctx:v}}}),O=new yl({props:{title:"Implementarea și Integrarea",local:"implementarea-și-integrarea",headingTag:"h3"}}),Ul=new yl({props:{title:"Începerea",local:"începerea",headingTag:"h2"}}),fl=new yl({props:{title:"Instalarea și Configurarea de Bază",local:"instalarea-și-configurarea-de-bază",headingTag:"h3"}}),cl=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[mt]},$$scope:{ctx:v}}}),Il=new yl({props:{title:"Generarea de Bază a Textului",local:"generarea-de-bază-a-textului",headingTag:"h3"}}),Gl=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[ut]},$$scope:{ctx:v}}}),zl=new yl({props:{title:"Controlul Avansat al Generării",local:"controlul-avansat-al-generării",headingTag:"h2"}}),Nl=new yl({props:{title:"Selecția Token-urilor și Eșantionarea",local:"selecția-token-urilor-și-eșantionarea",headingTag:"h3"}}),Ql=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[Ut]},$$scope:{ctx:v}}}),kl=new yl({props:{title:"Controlul Repetării",local:"controlul-repetării",headingTag:"h3"}}),vl=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[Jt]},$$scope:{ctx:v}}}),xl=new yl({props:{title:"Controlul Lungimii și Secvențele de Oprire",local:"controlul-lungimii-și-secvențele-de-oprire",headingTag:"h3"}}),Al=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[yt]},$$scope:{ctx:v}}}),_l=new yl({props:{title:"Gestionarea Memoriei",local:"gestionarea-memoriei",headingTag:"h2"}}),Vl=new Dl({props:{id:"inference-frameworks",options:[],$$slots:{default:[Tt]},$$scope:{ctx:v}}}),Fl=new yl({props:{title:"Resurse",local:"resurse",headingTag:"h2"}}),Ll=new nt({props:{source:"https://github.com/huggingface/course/blob/main/chapters/rum/chapter2/8.mdx"}}),{c(){m=w("meta"),p=c(),j=w("p"),J=c(),f(u.$$.fragment),y=c(),M=w("p"),M.textContent=U,r=c(),i=w("p"),i.textContent=Q,A=c(),f($.$$.fragment),k=c(),W=w("p"),W.textContent=ol,E=c(),f(X.$$.fragment),H=c(),T=w("p"),T.innerHTML=S,ll=c(),L=w("img"),el=c(),f(R.$$.fragment),tl=c(),D=w("p"),D.innerHTML=hl,sl=c(),f(x.$$.fragment),nl=c(),P=w("p"),P.innerHTML=bl,al=c(),f(Y.$$.fragment),Ml=c(),f(O.$$.fragment),Tl=c(),_=w("p"),_.textContent=ml,il=c(),K=w("p"),K.innerHTML=Cl,pl=c(),q=w("p"),q.innerHTML=Jl,jl=c(),F=w("p"),F.innerHTML=ul,rl=c(),f(Ul.$$.fragment),n=c(),d=w("p"),d.textContent=Sl,wl=c(),f(fl.$$.fragment),Bl=c(),f(cl.$$.fragment),gl=c(),f(Il.$$.fragment),s=c(),h=w("p"),h.textContent=Ce,Kl=c(),f(Gl.$$.fragment),le=c(),f(zl.$$.fragment),ee=c(),f(Nl.$$.fragment),te=c(),El=w("p"),El.textContent=fe,se=c(),$l=w("ol"),$l.innerHTML=Be,ne=c(),Xl=w("p"),Xl.textContent=ge,ae=c(),f(Ql.$$.fragment),Me=c(),f(kl.$$.fragment),ie=c(),Rl=w("p"),Rl.textContent=Ze,pe=c(),f(vl.$$.fragment),re=c(),f(xl.$$.fragment),ce=c(),Yl=w("p"),Yl.textContent=Ge,oe=c(),f(Al.$$.fragment),me=c(),f(_l.$$.fragment),ue=c(),ql=w("p"),ql.textContent=Qe,Ue=c(),f(Vl.$$.fragment),Je=c(),f(Fl.$$.fragment),ye=c(),Hl=w("ul"),Hl.innerHTML=ve,Te=c(),f(Ll.$$.fragment),je=c(),Pl=w("p"),this.h()},l(l){const a=st("svelte-u9bgzb",document.head);m=I(a,"META",{name:!0,content:!0}),a.forEach(e),p=o(l),j=I(l,"P",{}),Ol(j).forEach(e),J=o(l),B(u.$$.fragment,l),y=o(l),M=I(l,"P",{"data-svelte-h":!0}),G(M)!=="svelte-l25pl2"&&(M.textContent=U),r=o(l),i=I(l,"P",{"data-svelte-h":!0}),G(i)!=="svelte-ieh7in"&&(i.textContent=Q),A=o(l),B($.$$.fragment,l),k=o(l),W=I(l,"P",{"data-svelte-h":!0}),G(W)!=="svelte-a0brur"&&(W.textContent=ol),E=o(l),B(X.$$.fragment,l),H=o(l),T=I(l,"P",{"data-svelte-h":!0}),G(T)!=="svelte-bi6vas"&&(T.innerHTML=S),ll=o(l),L=I(l,"IMG",{src:!0,alt:!0}),el=o(l),B(R.$$.fragment,l),tl=o(l),D=I(l,"P",{"data-svelte-h":!0}),G(D)!=="svelte-zoy9o2"&&(D.innerHTML=hl),sl=o(l),B(x.$$.fragment,l),nl=o(l),P=I(l,"P",{"data-svelte-h":!0}),G(P)!=="svelte-bz5wux"&&(P.innerHTML=bl),al=o(l),B(Y.$$.fragment,l),Ml=o(l),B(O.$$.fragment,l),Tl=o(l),_=I(l,"P",{"data-svelte-h":!0}),G(_)!=="svelte-rjqdb8"&&(_.textContent=ml),il=o(l),K=I(l,"P",{"data-svelte-h":!0}),G(K)!=="svelte-vkzd9m"&&(K.innerHTML=Cl),pl=o(l),q=I(l,"P",{"data-svelte-h":!0}),G(q)!=="svelte-1gkp50b"&&(q.innerHTML=Jl),jl=o(l),F=I(l,"P",{"data-svelte-h":!0}),G(F)!=="svelte-ej2h50"&&(F.innerHTML=ul),rl=o(l),B(Ul.$$.fragment,l),n=o(l),d=I(l,"P",{"data-svelte-h":!0}),G(d)!=="svelte-k038du"&&(d.textContent=Sl),wl=o(l),B(fl.$$.fragment,l),Bl=o(l),B(cl.$$.fragment,l),gl=o(l),B(Il.$$.fragment,l),s=o(l),h=I(l,"P",{"data-svelte-h":!0}),G(h)!=="svelte-1pfzauy"&&(h.textContent=Ce),Kl=o(l),B(Gl.$$.fragment,l),le=o(l),B(zl.$$.fragment,l),ee=o(l),B(Nl.$$.fragment,l),te=o(l),El=I(l,"P",{"data-svelte-h":!0}),G(El)!=="svelte-vv01ra"&&(El.textContent=fe),se=o(l),$l=I(l,"OL",{"data-svelte-h":!0}),G($l)!=="svelte-tnbogk"&&($l.innerHTML=Be),ne=o(l),Xl=I(l,"P",{"data-svelte-h":!0}),G(Xl)!=="svelte-1to0u51"&&(Xl.textContent=ge),ae=o(l),B(Ql.$$.fragment,l),Me=o(l),B(kl.$$.fragment,l),ie=o(l),Rl=I(l,"P",{"data-svelte-h":!0}),G(Rl)!=="svelte-17cj06z"&&(Rl.textContent=Ze),pe=o(l),B(vl.$$.fragment,l),re=o(l),B(xl.$$.fragment,l),ce=o(l),Yl=I(l,"P",{"data-svelte-h":!0}),G(Yl)!=="svelte-3631at"&&(Yl.textContent=Ge),oe=o(l),B(Al.$$.fragment,l),me=o(l),B(_l.$$.fragment,l),ue=o(l),ql=I(l,"P",{"data-svelte-h":!0}),G(ql)!=="svelte-m10jfl"&&(ql.textContent=Qe),Ue=o(l),B(Vl.$$.fragment,l),Je=o(l),B(Fl.$$.fragment,l),ye=o(l),Hl=I(l,"UL",{"data-svelte-h":!0}),G(Hl)!=="svelte-1jjsuar"&&(Hl.innerHTML=ve),Te=o(l),B(Ll.$$.fragment,l),je=o(l),Pl=I(l,"P",{}),Ol(Pl).forEach(e),this.h()},h(){Wl(m,"name","hf:doc:metadata"),Wl(m,"content",wt),Ke(L.src,dl="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/flash-attn.png")||Wl(L,"src",dl),Wl(L,"alt","Flash Attention")},m(l,a){be(document.head,m),t(l,p,a),t(l,j,a),t(l,J,a),g(u,l,a),t(l,y,a),t(l,M,a),t(l,r,a),t(l,i,a),t(l,A,a),g($,l,a),t(l,k,a),t(l,W,a),t(l,E,a),g(X,l,a),t(l,H,a),t(l,T,a),t(l,ll,a),t(l,L,a),t(l,el,a),g(R,l,a),t(l,tl,a),t(l,D,a),t(l,sl,a),g(x,l,a),t(l,nl,a),t(l,P,a),t(l,al,a),g(Y,l,a),t(l,Ml,a),g(O,l,a),t(l,Tl,a),t(l,_,a),t(l,il,a),t(l,K,a),t(l,pl,a),t(l,q,a),t(l,jl,a),t(l,F,a),t(l,rl,a),g(Ul,l,a),t(l,n,a),t(l,d,a),t(l,wl,a),g(fl,l,a),t(l,Bl,a),g(cl,l,a),t(l,gl,a),g(Il,l,a),t(l,s,a),t(l,h,a),t(l,Kl,a),g(Gl,l,a),t(l,le,a),g(zl,l,a),t(l,ee,a),g(Nl,l,a),t(l,te,a),t(l,El,a),t(l,se,a),t(l,$l,a),t(l,ne,a),t(l,Xl,a),t(l,ae,a),g(Ql,l,a),t(l,Me,a),g(kl,l,a),t(l,ie,a),t(l,Rl,a),t(l,pe,a),g(vl,l,a),t(l,re,a),g(xl,l,a),t(l,ce,a),t(l,Yl,a),t(l,oe,a),g(Al,l,a),t(l,me,a),g(_l,l,a),t(l,ue,a),t(l,ql,a),t(l,Ue,a),g(Vl,l,a),t(l,Je,a),g(Fl,l,a),t(l,ye,a),t(l,Hl,a),t(l,Te,a),g(Ll,l,a),t(l,je,a),t(l,Pl,a),we=!0},p(l,[a]){const Ae={};a&2&&(Ae.$$scope={dirty:a,ctx:l}),R.$set(Ae);const Ve={};a&2&&(Ve.$$scope={dirty:a,ctx:l}),x.$set(Ve);const We={};a&2&&(We.$$scope={dirty:a,ctx:l}),Y.$set(We);const Se={};a&2&&(Se.$$scope={dirty:a,ctx:l}),cl.$set(Se);const ze={};a&2&&(ze.$$scope={dirty:a,ctx:l}),Gl.$set(ze);const Ne={};a&2&&(Ne.$$scope={dirty:a,ctx:l}),Ql.$set(Ne);const Ee={};a&2&&(Ee.$$scope={dirty:a,ctx:l}),vl.$set(Ee);const $e={};a&2&&($e.$$scope={dirty:a,ctx:l}),Al.$set($e);const Xe={};a&2&&(Xe.$$scope={dirty:a,ctx:l}),Vl.$set(Xe)},i(l){we||(b(u.$$.fragment,l),b($.$$.fragment,l),b(X.$$.fragment,l),b(R.$$.fragment,l),b(x.$$.fragment,l),b(Y.$$.fragment,l),b(O.$$.fragment,l),b(Ul.$$.fragment,l),b(fl.$$.fragment,l),b(cl.$$.fragment,l),b(Il.$$.fragment,l),b(Gl.$$.fragment,l),b(zl.$$.fragment,l),b(Nl.$$.fragment,l),b(Ql.$$.fragment,l),b(kl.$$.fragment,l),b(vl.$$.fragment,l),b(xl.$$.fragment,l),b(Al.$$.fragment,l),b(_l.$$.fragment,l),b(Vl.$$.fragment,l),b(Fl.$$.fragment,l),b(Ll.$$.fragment,l),we=!0)},o(l){C(u.$$.fragment,l),C($.$$.fragment,l),C(X.$$.fragment,l),C(R.$$.fragment,l),C(x.$$.fragment,l),C(Y.$$.fragment,l),C(O.$$.fragment,l),C(Ul.$$.fragment,l),C(fl.$$.fragment,l),C(cl.$$.fragment,l),C(Il.$$.fragment,l),C(Gl.$$.fragment,l),C(zl.$$.fragment,l),C(Nl.$$.fragment,l),C(Ql.$$.fragment,l),C(kl.$$.fragment,l),C(vl.$$.fragment,l),C(xl.$$.fragment,l),C(Al.$$.fragment,l),C(_l.$$.fragment,l),C(Vl.$$.fragment,l),C(Fl.$$.fragment,l),C(Ll.$$.fragment,l),we=!1},d(l){l&&(e(p),e(j),e(J),e(y),e(M),e(r),e(i),e(A),e(k),e(W),e(E),e(H),e(T),e(ll),e(L),e(el),e(tl),e(D),e(sl),e(nl),e(P),e(al),e(Ml),e(Tl),e(_),e(il),e(K),e(pl),e(q),e(jl),e(F),e(rl),e(n),e(d),e(wl),e(Bl),e(gl),e(s),e(h),e(Kl),e(le),e(ee),e(te),e(El),e(se),e($l),e(ne),e(Xl),e(ae),e(Me),e(ie),e(Rl),e(pe),e(re),e(ce),e(Yl),e(oe),e(me),e(ue),e(ql),e(Ue),e(Je),e(ye),e(Hl),e(Te),e(je),e(Pl)),e(m),Z(u,l),Z($,l),Z(X,l),Z(R,l),Z(x,l),Z(Y,l),Z(O,l),Z(Ul,l),Z(fl,l),Z(cl,l),Z(Il,l),Z(Gl,l),Z(zl,l),Z(Nl,l),Z(Ql,l),Z(kl,l),Z(vl,l),Z(xl,l),Z(Al,l),Z(_l,l),Z(Vl,l),Z(Fl,l),Z(Ll,l)}}}const wt='{"title":"Implementarea Optimizată a Inferenței","local":"implementarea-optimizată-a-inferenței","sections":[{"title":"Ghid de Selecție a Framework-ului","local":"ghid-de-selecție-a-framework-ului","sections":[{"title":"Gestionarea Memoriei și Performanța","local":"gestionarea-memoriei-și-performanța","sections":[],"depth":3},{"title":"Implementarea și Integrarea","local":"implementarea-și-integrarea","sections":[],"depth":3}],"depth":2},{"title":"Începerea","local":"începerea","sections":[{"title":"Instalarea și Configurarea de Bază","local":"instalarea-și-configurarea-de-bază","sections":[],"depth":3},{"title":"Generarea de Bază a Textului","local":"generarea-de-bază-a-textului","sections":[],"depth":3}],"depth":2},{"title":"Controlul Avansat al Generării","local":"controlul-avansat-al-generării","sections":[{"title":"Selecția Token-urilor și Eșantionarea","local":"selecția-token-urilor-și-eșantionarea","sections":[],"depth":3},{"title":"Controlul Repetării","local":"controlul-repetării","sections":[],"depth":3},{"title":"Controlul Lungimii și Secvențele de Oprire","local":"controlul-lungimii-și-secvențele-de-oprire","sections":[],"depth":3}],"depth":2},{"title":"Gestionarea Memoriei","local":"gestionarea-memoriei","sections":[],"depth":2},{"title":"Resurse","local":"resurse","sections":[],"depth":2}],"depth":1}';function It(v){return _e(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Zt extends qe{constructor(m){super(),Fe(this,m,It,jt,Ye,{})}}export{Zt as component}; | |
Xet Storage Details
- Size:
- 105 kB
- Xet hash:
- 56a6ddf6d7254644c2212ecc59270f48b8eaff324930f9e0d1f5c9efbb53da99
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.