Buckets:
| import{s as ce,n as Me,o as de}from"../chunks/scheduler.9bc65507.js";import{S as ge,i as ke,g as r,s as n,r as m,A as ye,h as p,f as s,c as a,j as oe,u as o,x as f,k as fe,y as ue,a as l,v as c,d as M,t as d,w as g}from"../chunks/index.707bf1b6.js";import{C as D}from"../chunks/CodeBlock.54a9f38d.js";import{H as ee,E as je}from"../chunks/EditOnGithub.922df6ba.js";function he(te){let i,F,v,N,k,I,y,se=`<code>PreTrainedTokenizerFast</code>λ <a href="https://huggingface.co/docs/tokenizers" rel="nofollow">π€ Tokenizers</a> λΌμ΄λΈλ¬λ¦¬μ κΈ°λ°ν©λλ€. π€ Tokenizers λΌμ΄λΈλ¬λ¦¬μ ν ν¬λμ΄μ λ | |
| π€ Transformersλ‘ λ§€μ° κ°λ¨νκ² λΆλ¬μ¬ μ μμ΅λλ€.`,P,u,le="ꡬ체μ μΈ λ΄μ©μ λ€μ΄κ°κΈ° μ μ, λͺ μ€μ μ½λλ‘ λλ―Έ ν ν¬λμ΄μ λ₯Ό λ§λ€μ΄ λ³΄κ² μ΅λλ€:",Q,j,W,h,ne="μ°λ¦¬κ° μ μν νμΌμ ν΅ν΄ μ΄μ νμ΅λ ν ν¬λμ΄μ λ₯Ό κ°κ² λμμ΅λλ€. μ΄ λ°νμμμ κ³μ μ¬μ©νκ±°λ JSON νμΌλ‘ μ μ₯νμ¬ λμ€μ μ¬μ©ν μ μμ΅λλ€.",E,$,G,T,ae=`π€ Transformers λΌμ΄λΈλ¬λ¦¬μμ μ΄ ν ν¬λμ΄μ κ°μ²΄λ₯Ό νμ©νλ λ°©λ²μ μ΄ν΄λ³΄κ² μ΅λλ€. | |
| <code>PreTrainedTokenizerFast</code> ν΄λμ€λ μΈμ€ν΄μ€νλ <em>ν ν¬λμ΄μ </em> κ°μ²΄λ₯Ό μΈμλ‘ λ°μ μ½κ² μΈμ€ν΄μ€νν μ μμ΅λλ€:`,X,b,x,z,re='μ΄μ <code>fast_tokenizer</code> κ°μ²΄λ π€ Transformers ν ν¬λμ΄μ μμ 곡μ νλ λͺ¨λ λ©μλμ ν¨κ» μ¬μ©ν μ μμ΅λλ€! μμΈν λ΄μ©μ <a href="main_classes/tokenizer">ν ν¬λμ΄μ νμ΄μ§</a>λ₯Ό μ°Έμ‘°νμΈμ.',R,U,S,Z,pe="JSON νμΌμμ ν ν¬λμ΄μ λ₯Ό λΆλ¬μ€κΈ° μν΄, λ¨Όμ ν ν¬λμ΄μ λ₯Ό μ μ₯ν΄ λ³΄κ² μ΅λλ€:",q,w,H,J,ie="JSON νμΌμ μ μ₯ν κ²½λ‘λ <code>tokenizer_file</code> λ§€κ°λ³μλ₯Ό μ¬μ©νμ¬ <code>PreTrainedTokenizerFast</code> μ΄κΈ°ν λ©μλμ μ λ¬ν μ μμ΅λλ€:",L,V,A,_,me='μ΄μ <code>fast_tokenizer</code> κ°μ²΄λ π€ Transformers ν ν¬λμ΄μ μμ 곡μ νλ λͺ¨λ λ©μλμ ν¨κ» μ¬μ©ν μ μμ΅λλ€! μμΈν λ΄μ©μ <a href="main_classes/tokenizer">ν ν¬λμ΄μ νμ΄μ§</a>λ₯Ό μ°Έμ‘°νμΈμ.',Y,B,O,C,K;return k=new ee({props:{title:"π€ Tokenizers λΌμ΄λΈλ¬λ¦¬μ ν ν¬λμ΄μ μ¬μ©νκΈ°",local:"use-tokenizers-from-tokenizers",headingTag:"h1"}}),j=new D({props:{code:"ZnJvbSUyMHRva2VuaXplcnMlMjBpbXBvcnQlMjBUb2tlbml6ZXIlMEFmcm9tJTIwdG9rZW5pemVycy5tb2RlbHMlMjBpbXBvcnQlMjBCUEUlMEFmcm9tJTIwdG9rZW5pemVycy50cmFpbmVycyUyMGltcG9ydCUyMEJwZVRyYWluZXIlMEFmcm9tJTIwdG9rZW5pemVycy5wcmVfdG9rZW5pemVycyUyMGltcG9ydCUyMFdoaXRlc3BhY2UlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBUb2tlbml6ZXIoQlBFKHVua190b2tlbiUzRCUyMiU1QlVOSyU1RCUyMikpJTBBdHJhaW5lciUyMCUzRCUyMEJwZVRyYWluZXIoc3BlY2lhbF90b2tlbnMlM0QlNUIlMjIlNUJVTkslNUQlMjIlMkMlMjAlMjIlNUJDTFMlNUQlMjIlMkMlMjAlMjIlNUJTRVAlNUQlMjIlMkMlMjAlMjIlNUJQQUQlNUQlMjIlMkMlMjAlMjIlNUJNQVNLJTVEJTIyJTVEKSUwQSUwQXRva2VuaXplci5wcmVfdG9rZW5pemVyJTIwJTNEJTIwV2hpdGVzcGFjZSgpJTBBZmlsZXMlMjAlM0QlMjAlNUIuLi4lNUQlMEF0b2tlbml6ZXIudHJhaW4oZmlsZXMlMkMlMjB0cmFpbmVyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers <span class="hljs-keyword">import</span> Tokenizer | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> BPE | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.trainers <span class="hljs-keyword">import</span> BpeTrainer | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.pre_tokenizers <span class="hljs-keyword">import</span> Whitespace | |
| <span class="hljs-meta">>>> </span>tokenizer = Tokenizer(BPE(unk_token=<span class="hljs-string">"[UNK]"</span>)) | |
| <span class="hljs-meta">>>> </span>trainer = BpeTrainer(special_tokens=[<span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"[CLS]"</span>, <span class="hljs-string">"[SEP]"</span>, <span class="hljs-string">"[PAD]"</span>, <span class="hljs-string">"[MASK]"</span>]) | |
| <span class="hljs-meta">>>> </span>tokenizer.pre_tokenizer = Whitespace() | |
| <span class="hljs-meta">>>> </span>files = [...] | |
| <span class="hljs-meta">>>> </span>tokenizer.train(files, trainer)`,wrap:!1}}),$=new ee({props:{title:"ν ν¬λμ΄μ κ°μ²΄λ‘λΆν° μ§μ λΆλ¬μ€κΈ°",local:"loading-directly-from-the-tokenizer-object",headingTag:"h2"}}),b=new D({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFByZVRyYWluZWRUb2tlbml6ZXJGYXN0JTBBJTBBZmFzdF90b2tlbml6ZXIlMjAlM0QlMjBQcmVUcmFpbmVkVG9rZW5pemVyRmFzdCh0b2tlbml6ZXJfb2JqZWN0JTNEdG9rZW5pemVyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> PreTrainedTokenizerFast | |
| <span class="hljs-meta">>>> </span>fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer)`,wrap:!1}}),U=new ee({props:{title:"JSON νμΌμμ λΆλ¬μ€κΈ°",local:"loading-from-a-JSON-file",headingTag:"h2"}}),w=new D({props:{code:"dG9rZW5pemVyLnNhdmUoJTIydG9rZW5pemVyLmpzb24lMjIp",highlighted:'<span class="hljs-meta">>>> </span>tokenizer.save(<span class="hljs-string">"tokenizer.json"</span>)',wrap:!1}}),V=new D({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFByZVRyYWluZWRUb2tlbml6ZXJGYXN0JTBBJTBBZmFzdF90b2tlbml6ZXIlMjAlM0QlMjBQcmVUcmFpbmVkVG9rZW5pemVyRmFzdCh0b2tlbml6ZXJfZmlsZSUzRCUyMnRva2VuaXplci5qc29uJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> PreTrainedTokenizerFast | |
| <span class="hljs-meta">>>> </span>fast_tokenizer = PreTrainedTokenizerFast(tokenizer_file=<span class="hljs-string">"tokenizer.json"</span>)`,wrap:!1}}),B=new je({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/fast_tokenizers.md"}}),{c(){i=r("meta"),F=n(),v=r("p"),N=n(),m(k.$$.fragment),I=n(),y=r("p"),y.innerHTML=se,P=n(),u=r("p"),u.textContent=le,Q=n(),m(j.$$.fragment),W=n(),h=r("p"),h.textContent=ne,E=n(),m($.$$.fragment),G=n(),T=r("p"),T.innerHTML=ae,X=n(),m(b.$$.fragment),x=n(),z=r("p"),z.innerHTML=re,R=n(),m(U.$$.fragment),S=n(),Z=r("p"),Z.textContent=pe,q=n(),m(w.$$.fragment),H=n(),J=r("p"),J.innerHTML=ie,L=n(),m(V.$$.fragment),A=n(),_=r("p"),_.innerHTML=me,Y=n(),m(B.$$.fragment),O=n(),C=r("p"),this.h()},l(e){const t=ye("svelte-u9bgzb",document.head);i=p(t,"META",{name:!0,content:!0}),t.forEach(s),F=a(e),v=p(e,"P",{}),oe(v).forEach(s),N=a(e),o(k.$$.fragment,e),I=a(e),y=p(e,"P",{"data-svelte-h":!0}),f(y)!=="svelte-1ilwdv2"&&(y.innerHTML=se),P=a(e),u=p(e,"P",{"data-svelte-h":!0}),f(u)!=="svelte-nobjxu"&&(u.textContent=le),Q=a(e),o(j.$$.fragment,e),W=a(e),h=p(e,"P",{"data-svelte-h":!0}),f(h)!=="svelte-vmdasx"&&(h.textContent=ne),E=a(e),o($.$$.fragment,e),G=a(e),T=p(e,"P",{"data-svelte-h":!0}),f(T)!=="svelte-10u78cn"&&(T.innerHTML=ae),X=a(e),o(b.$$.fragment,e),x=a(e),z=p(e,"P",{"data-svelte-h":!0}),f(z)!=="svelte-tdf1x7"&&(z.innerHTML=re),R=a(e),o(U.$$.fragment,e),S=a(e),Z=p(e,"P",{"data-svelte-h":!0}),f(Z)!=="svelte-16yucd6"&&(Z.textContent=pe),q=a(e),o(w.$$.fragment,e),H=a(e),J=p(e,"P",{"data-svelte-h":!0}),f(J)!=="svelte-126md19"&&(J.innerHTML=ie),L=a(e),o(V.$$.fragment,e),A=a(e),_=p(e,"P",{"data-svelte-h":!0}),f(_)!=="svelte-tdf1x7"&&(_.innerHTML=me),Y=a(e),o(B.$$.fragment,e),O=a(e),C=p(e,"P",{}),oe(C).forEach(s),this.h()},h(){fe(i,"name","hf:doc:metadata"),fe(i,"content",$e)},m(e,t){ue(document.head,i),l(e,F,t),l(e,v,t),l(e,N,t),c(k,e,t),l(e,I,t),l(e,y,t),l(e,P,t),l(e,u,t),l(e,Q,t),c(j,e,t),l(e,W,t),l(e,h,t),l(e,E,t),c($,e,t),l(e,G,t),l(e,T,t),l(e,X,t),c(b,e,t),l(e,x,t),l(e,z,t),l(e,R,t),c(U,e,t),l(e,S,t),l(e,Z,t),l(e,q,t),c(w,e,t),l(e,H,t),l(e,J,t),l(e,L,t),c(V,e,t),l(e,A,t),l(e,_,t),l(e,Y,t),c(B,e,t),l(e,O,t),l(e,C,t),K=!0},p:Me,i(e){K||(M(k.$$.fragment,e),M(j.$$.fragment,e),M($.$$.fragment,e),M(b.$$.fragment,e),M(U.$$.fragment,e),M(w.$$.fragment,e),M(V.$$.fragment,e),M(B.$$.fragment,e),K=!0)},o(e){d(k.$$.fragment,e),d(j.$$.fragment,e),d($.$$.fragment,e),d(b.$$.fragment,e),d(U.$$.fragment,e),d(w.$$.fragment,e),d(V.$$.fragment,e),d(B.$$.fragment,e),K=!1},d(e){e&&(s(F),s(v),s(N),s(I),s(y),s(P),s(u),s(Q),s(W),s(h),s(E),s(G),s(T),s(X),s(x),s(z),s(R),s(S),s(Z),s(q),s(H),s(J),s(L),s(A),s(_),s(Y),s(O),s(C)),s(i),g(k,e),g(j,e),g($,e),g(b,e),g(U,e),g(w,e),g(V,e),g(B,e)}}}const $e='{"title":"π€ Tokenizers λΌμ΄λΈλ¬λ¦¬μ ν ν¬λμ΄μ μ¬μ©νκΈ°","local":"use-tokenizers-from-tokenizers","sections":[{"title":"ν ν¬λμ΄μ κ°μ²΄λ‘λΆν° μ§μ λΆλ¬μ€κΈ°","local":"loading-directly-from-the-tokenizer-object","sections":[],"depth":2},{"title":"JSON νμΌμμ λΆλ¬μ€κΈ°","local":"loading-from-a-JSON-file","sections":[],"depth":2}],"depth":1}';function Te(te){return de(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class we extends ge{constructor(i){super(),ke(this,i,Te,he,ce,{})}}export{we as component}; | |
Xet Storage Details
- Size:
- 9.13 kB
- Xet hash:
- eb79c2dd903c6293346ce479ba75661f358b39acbf2122854ec80dfff5302cac
Β·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.