Buckets:
| import{s as G,n as F,o as K}from"../chunks/scheduler.37c15a92.js";import{S as N,i as O,g as u,s,r as C,A as R,h,f as n,c as o,j as A,u as P,x as T,k as S,y as W,a,v as E,d as B,t as H,w as L}from"../chunks/index.7cb9c9b8.js";import{C as D}from"../chunks/CourseFloatingBanner.df82c153.js";import{H as I,E as J}from"../chunks/getInferenceSnippets.a2135f3c.js";function Q(M){let i,b,d,_,r,g,l,k,m,U="Great job finishing this chapter!",x,p,j="After this deep dive into tokenizers, you should:",w,f,q="<li>Be able to train a new tokenizer using an old one as a template</li> <li>Understand how to use offsets to map tokens’ positions to their original span of text</li> <li>Know the differences between BPE, WordPiece, and Unigram</li> <li>Be able to mix and match the blocks provided by the 🤗 Tokenizers library to build your own tokenizer</li> <li>Be able to use that tokenizer inside the 🤗 Transformers library</li>",v,c,z,$,y;return r=new I({props:{title:"Tokenizers, check!",local:"tokenizers-check",headingTag:"h1"}}),l=new D({props:{chapter:6,classNames:"absolute z-10 right-0 top-0"}}),c=new J({props:{source:"https://github.com/huggingface/course/blob/main/chapters/en/chapter6/9.mdx"}}),{c(){i=u("meta"),b=s(),d=u("p"),_=s(),C(r.$$.fragment),g=s(),C(l.$$.fragment),k=s(),m=u("p"),m.textContent=U,x=s(),p=u("p"),p.textContent=j,w=s(),f=u("ul"),f.innerHTML=q,v=s(),C(c.$$.fragment),z=s(),$=u("p"),this.h()},l(e){const t=R("svelte-u9bgzb",document.head);i=h(t,"META",{name:!0,content:!0}),t.forEach(n),b=o(e),d=h(e,"P",{}),A(d).forEach(n),_=o(e),P(r.$$.fragment,e),g=o(e),P(l.$$.fragment,e),k=o(e),m=h(e,"P",{"data-svelte-h":!0}),T(m)!=="svelte-qrdqcf"&&(m.textContent=U),x=o(e),p=h(e,"P",{"data-svelte-h":!0}),T(p)!=="svelte-ziaxv6"&&(p.textContent=j),w=o(e),f=h(e,"UL",{"data-svelte-h":!0}),T(f)!=="svelte-jl1wny"&&(f.innerHTML=q),v=o(e),P(c.$$.fragment,e),z=o(e),$=h(e,"P",{}),A($).forEach(n),this.h()},h(){S(i,"name","hf:doc:metadata"),S(i,"content",V)},m(e,t){W(document.head,i),a(e,b,t),a(e,d,t),a(e,_,t),E(r,e,t),a(e,g,t),E(l,e,t),a(e,k,t),a(e,m,t),a(e,x,t),a(e,p,t),a(e,w,t),a(e,f,t),a(e,v,t),E(c,e,t),a(e,z,t),a(e,$,t),y=!0},p:F,i(e){y||(B(r.$$.fragment,e),B(l.$$.fragment,e),B(c.$$.fragment,e),y=!0)},o(e){H(r.$$.fragment,e),H(l.$$.fragment,e),H(c.$$.fragment,e),y=!1},d(e){e&&(n(b),n(d),n(_),n(g),n(k),n(m),n(x),n(p),n(w),n(f),n(v),n(z),n($)),n(i),L(r,e),L(l,e),L(c,e)}}}const V='{"title":"Tokenizers, check!","local":"tokenizers-check","sections":[],"depth":1}';function X(M){return K(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ne extends N{constructor(i){super(),O(this,i,X,Q,G,{})}}export{ne as component}; | |
Xet Storage Details
- Size:
- 2.66 kB
- Xet hash:
- 9614a86b4780f69fe07e7e3e908732f4e4a2a04c029c7309532e2883202aa2be
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.