Buckets:
| import{s as Zt,o as Xt,n as bt}from"../chunks/scheduler.25b97de1.js";import{S as Nt,i as Wt,g as o,s as n,r as g,A as Gt,h as i,f as a,c as s,j as z,u as k,x as d,k as L,y as t,a as p,v as b,d as v,t as $,w as T}from"../chunks/index.d9030fc9.js";import{T as St}from"../chunks/Tip.baa67368.js";import{D as E}from"../chunks/Docstring.ffac8efa.js";import{C as vt}from"../chunks/CodeBlock.e6cd0d95.js";import{E as Ft}from"../chunks/ExampleCodeBlock.22dfe688.js";import{H as Le,E as Yt}from"../chunks/EditOnGithub.91d95064.js";function Ot(q){let c,x=`Herbert implementation is the same as <code>BERT</code> except for the tokenization method. Refer to <a href="bert">BERT documentation</a> | |
| for API reference and examples.`;return{c(){c=o("p"),c.innerHTML=x},l(u){c=i(u,"P",{"data-svelte-h":!0}),d(c)!=="svelte-71d2k4"&&(c.innerHTML=x)},m(u,f){p(u,c,f)},p:bt,d(u){u&&a(c)}}}function Qt(q){let c,x="pair mask has the following format:",u,f,w;return f=new vt({props:{code:"MCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMEElN0MlMjBmaXJzdCUyMHNlcXVlbmNlJTIwJTIwJTIwJTIwJTdDJTIwc2Vjb25kJTIwc2VxdWVuY2UlMjAlN0M=",highlighted:`0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1 1 | |
| | first sequence | second sequence |`,wrap:!1}}),{c(){c=o("p"),c.textContent=x,u=n(),g(f.$$.fragment)},l(l){c=i(l,"P",{"data-svelte-h":!0}),d(c)!=="svelte-qjgeij"&&(c.textContent=x),u=s(l),k(f.$$.fragment,l)},m(l,y){p(l,c,y),p(l,u,y),b(f,l,y),w=!0},p:bt,i(l){w||(v(f.$$.fragment,l),w=!0)},o(l){$(f.$$.fragment,l),w=!1},d(l){l&&(a(c),a(u)),T(f,l)}}}function Kt(q){let c,x="BERT sequence pair mask has the following format:",u,f,w;return f=new vt({props:{code:"MCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMEElN0MlMjBmaXJzdCUyMHNlcXVlbmNlJTIwJTIwJTIwJTIwJTdDJTIwc2Vjb25kJTIwc2VxdWVuY2UlMjAlN0M=",highlighted:`0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1 1 | |
| | first sequence | second sequence |`,wrap:!1}}),{c(){c=o("p"),c.textContent=x,u=n(),g(f.$$.fragment)},l(l){c=i(l,"P",{"data-svelte-h":!0}),d(c)!=="svelte-thc81r"&&(c.textContent=x),u=s(l),k(f.$$.fragment,l)},m(l,y){p(l,c,y),p(l,u,y),b(f,l,y),w=!0},p:bt,i(l){w||(v(f.$$.fragment,l),w=!0)},o(l){$(f.$$.fragment,l),w=!1},d(l){l&&(a(c),a(u)),T(f,l)}}}function en(q){let c,x,u,f,w,l,y,He,F,$t=`The HerBERT model was proposed in <a href="https://www.aclweb.org/anthology/2020.acl-main.111.pdf" rel="nofollow">KLEJ: Comprehensive Benchmark for Polish Language Understanding</a> by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, and | |
| Ireneusz Gawlik. It is a BERT-based Language Model trained on Polish Corpora using only MLM objective with dynamic | |
| masking of whole words.`,Ce,Z,Tt="The abstract from the paper is the following:",je,X,wt=`<em>In recent years, a series of Transformer-based models unlocked major improvements in general natural language | |
| understanding (NLU) tasks. Such a fast pace of research would not be possible without general NLU benchmarks, which | |
| allow for a fair comparison of the proposed methods. However, such benchmarks are available only for a handful of | |
| languages. To alleviate this issue, we introduce a comprehensive multi-task benchmark for the Polish language | |
| understanding, accompanied by an online leaderboard. It consists of a diverse set of tasks, adopted from existing | |
| datasets for named entity recognition, question-answering, textual entailment, and others. We also introduce a new | |
| sentiment analysis task for the e-commerce domain, named Allegro Reviews (AR). To ensure a common evaluation scheme and | |
| promote models that generalize to different NLU tasks, the benchmark includes datasets from varying domains and | |
| applications. Additionally, we release HerBERT, a Transformer-based model trained specifically for the Polish language, | |
| which has the best average performance and obtains the best results for three out of nine tasks. Finally, we provide an | |
| extensive evaluation, including several standard baselines and recently proposed, multilingual Transformer-based | |
| models.</em>`,Ie,N,Mt=`This model was contributed by <a href="https://huggingface.co/rmroczkowski" rel="nofollow">rmroczkowski</a>. The original code can be found | |
| <a href="https://github.com/allegro/HerBERT" rel="nofollow">here</a>.`,Ee,W,qe,G,Ue,U,De,S,Ve,m,Y,Ne,le,yt="Construct a BPE tokenizer for HerBERT.",We,ce,xt="Peculiarities:",Ge,pe,Jt=`<li><p>uses BERT’s pre-tokenizer: BaseTokenizer splits tokens on spaces, and also on punctuation. Each occurrence of a | |
| punctuation character will be treated separately.</p></li> <li><p>Such pretokenized input is BPE subtokenized</p></li>`,Se,de,zt=`This tokenizer inherits from <a href="/docs/transformers/pr_30690/en/model_doc/xlm#transformers.XLMTokenizer">XLMTokenizer</a> which contains most of the methods. Users should refer to the | |
| superclass for more information regarding methods.`,Ye,H,O,Oe,me,Lt=`Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and | |
| adding special tokens. An XLM sequence has the following format:`,Qe,fe,Ht="<li>single sequence: <code><s> X </s></code></li> <li>pair of sequences: <code><s> A </s> B </s></code></li>",Ke,D,Q,et,ue,Ct="Converts a sequence of tokens (string) in a single string.",tt,J,K,nt,_e,jt="Create a mask from the two sequences passed to be used in a sequence-pair classification task. An XLM sequence",st,V,rt,he,It="If <code>token_ids_1</code> is <code>None</code>, this method only returns the first portion of the mask (0s).",at,R,ee,ot,ge,Et=`Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding | |
| special tokens using the tokenizer <code>prepare_for_model</code> method.`,Re,te,Be,_,ne,it,ke,qt="Construct a “Fast” BPE tokenizer for HerBERT (backed by HuggingFace’s <em>tokenizers</em> library).",lt,be,Ut="Peculiarities:",ct,ve,Dt=`<li>uses BERT’s pre-tokenizer: BertPreTokenizer splits tokens on spaces, and also on punctuation. Each occurrence of | |
| a punctuation character will be treated separately.</li>`,pt,$e,Vt=`This tokenizer inherits from <a href="/docs/transformers/pr_30690/en/main_classes/tokenizer#transformers.PreTrainedTokenizer">PreTrainedTokenizer</a> which contains most of the methods. Users should refer to the | |
| superclass for more information regarding methods.`,dt,C,se,mt,Te,Rt=`Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and | |
| adding special tokens. An HerBERT, like BERT sequence has the following format:`,ft,we,Bt="<li>single sequence: <code><s> X </s></code></li> <li>pair of sequences: <code><s> A </s> B </s></code></li>",ut,j,re,_t,Me,Pt="Create a mask from the two sequences passed to be used in a sequence-pair classification task. HerBERT, like",ht,B,gt,P,ae,kt,ye,At=`Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding | |
| special tokens using the tokenizer <code>prepare_for_model</code> method.`,Pe,oe,Ae,ze,Fe;return w=new Le({props:{title:"HerBERT",local:"herbert",headingTag:"h1"}}),y=new Le({props:{title:"Overview",local:"overview",headingTag:"h2"}}),W=new Le({props:{title:"Usage example",local:"usage-example",headingTag:"h2"}}),G=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEhlcmJlcnRUb2tlbml6ZXIlMkMlMjBSb2JlcnRhTW9kZWwlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBIZXJiZXJ0VG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJhbGxlZ3JvJTJGaGVyYmVydC1rbGVqLWNhc2VkLXRva2VuaXplci12MSUyMiklMEFtb2RlbCUyMCUzRCUyMFJvYmVydGFNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyYWxsZWdybyUyRmhlcmJlcnQta2xlai1jYXNlZC12MSUyMiklMEElMEFlbmNvZGVkX2lucHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJLdG8lMjBtYSUyMGxlcHN6JUM0JTg1JTIwc3p0dWslQzQlOTklMkMlMjBtYSUyMGxlcHN6eSUyMHJ6JUM0JTg1ZCUyMCVFMiU4MCU5MyUyMHRvJTIwamFzbmUuJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoZW5jb2RlZF9pbnB1dCklMEElMEElMjMlMjBIZXJCRVJUJTIwY2FuJTIwYWxzbyUyMGJlJTIwbG9hZGVkJTIwdXNpbmclMjBBdXRvVG9rZW5pemVyJTIwYW5kJTIwQXV0b01vZGVsJTNBJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmFsbGVncm8lMkZoZXJiZXJ0LWtsZWotY2FzZWQtdG9rZW5pemVyLXYxJTIyKSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsLmZyb21fcHJldHJhaW5lZCglMjJhbGxlZ3JvJTJGaGVyYmVydC1rbGVqLWNhc2VkLXYxJTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> HerbertTokenizer, RobertaModel | |
| <span class="hljs-meta">>>> </span>tokenizer = HerbertTokenizer.from_pretrained(<span class="hljs-string">"allegro/herbert-klej-cased-tokenizer-v1"</span>) | |
| <span class="hljs-meta">>>> </span>model = RobertaModel.from_pretrained(<span class="hljs-string">"allegro/herbert-klej-cased-v1"</span>) | |
| <span class="hljs-meta">>>> </span>encoded_input = tokenizer.encode(<span class="hljs-string">"Kto ma lepszą sztukę, ma lepszy rząd – to jasne."</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(encoded_input) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># HerBERT can also be loaded using AutoTokenizer and AutoModel:</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel, AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"allegro/herbert-klej-cased-tokenizer-v1"</span>) | |
| <span class="hljs-meta">>>> </span>model = AutoModel.from_pretrained(<span class="hljs-string">"allegro/herbert-klej-cased-v1"</span>)`,wrap:!1}}),U=new St({props:{$$slots:{default:[Ot]},$$scope:{ctx:q}}}),S=new Le({props:{title:"HerbertTokenizer",local:"transformers.HerbertTokenizer",headingTag:"h2"}}),Y=new E({props:{name:"class transformers.HerbertTokenizer",anchor:"transformers.HerbertTokenizer",parameters:[{name:"vocab_file",val:""},{name:"merges_file",val:""},{name:"tokenizer_file",val:" = None"},{name:"cls_token",val:" = '<s>'"},{name:"unk_token",val:" = '<unk>'"},{name:"pad_token",val:" = '<pad>'"},{name:"mask_token",val:" = '<mask>'"},{name:"sep_token",val:" = '</s>'"},{name:"bos_token",val:" = '<s>'"},{name:"do_lowercase_and_remove_accent",val:" = False"},{name:"additional_special_tokens",val:" = ['<special0>', '<special1>', '<special2>', '<special3>', '<special4>', '<special5>', '<special6>', '<special7>', '<special8>', '<special9>']"},{name:"lang2id",val:" = None"},{name:"id2lang",val:" = None"},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert.py#L277"}}),O=new E({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.HerbertTokenizer.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.HerbertTokenizer.build_inputs_with_special_tokens.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs to which the special tokens will be added.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizer.build_inputs_with_special_tokens.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert.py#L511",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>List of <a href="../glossary#input-ids">input IDs</a> with the appropriate special tokens.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),Q=new E({props:{name:"convert_tokens_to_string",anchor:"transformers.HerbertTokenizer.convert_tokens_to_string",parameters:[{name:"tokens",val:""}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert.py#L505"}}),K=new E({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.HerbertTokenizer.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.HerbertTokenizer.create_token_type_ids_from_sequences.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizer.create_token_type_ids_from_sequences.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert.py#L568",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>List of <a href="../glossary#token-type-ids">token type IDs</a> according to the given sequence(s).</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),V=new Ft({props:{anchor:"transformers.HerbertTokenizer.create_token_type_ids_from_sequences.example",$$slots:{default:[Qt]},$$scope:{ctx:q}}}),ee=new E({props:{name:"get_special_tokens_mask",anchor:"transformers.HerbertTokenizer.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.HerbertTokenizer.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizer.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"},{anchor:"transformers.HerbertTokenizer.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert.py#L539",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),te=new Le({props:{title:"HerbertTokenizerFast",local:"transformers.HerbertTokenizerFast",headingTag:"h2"}}),ne=new E({props:{name:"class transformers.HerbertTokenizerFast",anchor:"transformers.HerbertTokenizerFast",parameters:[{name:"vocab_file",val:" = None"},{name:"merges_file",val:" = None"},{name:"tokenizer_file",val:" = None"},{name:"cls_token",val:" = '<s>'"},{name:"unk_token",val:" = '<unk>'"},{name:"pad_token",val:" = '<pad>'"},{name:"mask_token",val:" = '<mask>'"},{name:"sep_token",val:" = '</s>'"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.HerbertTokenizerFast.vocab_file",description:`<strong>vocab_file</strong> (<code>str</code>) — | |
| Path to the vocabulary file.`,name:"vocab_file"},{anchor:"transformers.HerbertTokenizerFast.merges_file",description:`<strong>merges_file</strong> (<code>str</code>) — | |
| Path to the merges file.`,name:"merges_file"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert_fast.py#L28"}}),se=new E({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.HerbertTokenizerFast.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.HerbertTokenizerFast.build_inputs_with_special_tokens.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs to which the special tokens will be added.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizerFast.build_inputs_with_special_tokens.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert_fast.py#L74",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>List of <a href="../glossary#input-ids">input IDs</a> with the appropriate special tokens.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),re=new E({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.HerbertTokenizerFast.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.HerbertTokenizerFast.create_token_type_ids_from_sequences.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizerFast.create_token_type_ids_from_sequences.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert_fast.py#L128",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>List of <a href="../glossary#token-type-ids">token type IDs</a> according to the given sequence(s).</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),B=new Ft({props:{anchor:"transformers.HerbertTokenizerFast.create_token_type_ids_from_sequences.example",$$slots:{default:[Kt]},$$scope:{ctx:q}}}),ae=new E({props:{name:"get_special_tokens_mask",anchor:"transformers.HerbertTokenizerFast.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.HerbertTokenizerFast.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) — | |
| List of IDs.`,name:"token_ids_0"},{anchor:"transformers.HerbertTokenizerFast.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — | |
| Optional second list of IDs for sequence pairs.`,name:"token_ids_1"},{anchor:"transformers.HerbertTokenizerFast.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/vr_30690/src/transformers/models/herbert/tokenization_herbert_fast.py#L101",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[int]</code></p> | |
| `}}),oe=new Yt({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/herbert.md"}}),{c(){c=o("meta"),x=n(),u=o("p"),f=n(),g(w.$$.fragment),l=n(),g(y.$$.fragment),He=n(),F=o("p"),F.innerHTML=$t,Ce=n(),Z=o("p"),Z.textContent=Tt,je=n(),X=o("p"),X.innerHTML=wt,Ie=n(),N=o("p"),N.innerHTML=Mt,Ee=n(),g(W.$$.fragment),qe=n(),g(G.$$.fragment),Ue=n(),g(U.$$.fragment),De=n(),g(S.$$.fragment),Ve=n(),m=o("div"),g(Y.$$.fragment),Ne=n(),le=o("p"),le.textContent=yt,We=n(),ce=o("p"),ce.textContent=xt,Ge=n(),pe=o("ul"),pe.innerHTML=Jt,Se=n(),de=o("p"),de.innerHTML=zt,Ye=n(),H=o("div"),g(O.$$.fragment),Oe=n(),me=o("p"),me.textContent=Lt,Qe=n(),fe=o("ul"),fe.innerHTML=Ht,Ke=n(),D=o("div"),g(Q.$$.fragment),et=n(),ue=o("p"),ue.textContent=Ct,tt=n(),J=o("div"),g(K.$$.fragment),nt=n(),_e=o("p"),_e.textContent=jt,st=n(),g(V.$$.fragment),rt=n(),he=o("p"),he.innerHTML=It,at=n(),R=o("div"),g(ee.$$.fragment),ot=n(),ge=o("p"),ge.innerHTML=Et,Re=n(),g(te.$$.fragment),Be=n(),_=o("div"),g(ne.$$.fragment),it=n(),ke=o("p"),ke.innerHTML=qt,lt=n(),be=o("p"),be.textContent=Ut,ct=n(),ve=o("ul"),ve.innerHTML=Dt,pt=n(),$e=o("p"),$e.innerHTML=Vt,dt=n(),C=o("div"),g(se.$$.fragment),mt=n(),Te=o("p"),Te.textContent=Rt,ft=n(),we=o("ul"),we.innerHTML=Bt,ut=n(),j=o("div"),g(re.$$.fragment),_t=n(),Me=o("p"),Me.textContent=Pt,ht=n(),g(B.$$.fragment),gt=n(),P=o("div"),g(ae.$$.fragment),kt=n(),ye=o("p"),ye.innerHTML=At,Pe=n(),g(oe.$$.fragment),Ae=n(),ze=o("p"),this.h()},l(e){const r=Gt("svelte-u9bgzb",document.head);c=i(r,"META",{name:!0,content:!0}),r.forEach(a),x=s(e),u=i(e,"P",{}),z(u).forEach(a),f=s(e),k(w.$$.fragment,e),l=s(e),k(y.$$.fragment,e),He=s(e),F=i(e,"P",{"data-svelte-h":!0}),d(F)!=="svelte-innugr"&&(F.innerHTML=$t),Ce=s(e),Z=i(e,"P",{"data-svelte-h":!0}),d(Z)!=="svelte-vfdo9a"&&(Z.textContent=Tt),je=s(e),X=i(e,"P",{"data-svelte-h":!0}),d(X)!=="svelte-1frp4qn"&&(X.innerHTML=wt),Ie=s(e),N=i(e,"P",{"data-svelte-h":!0}),d(N)!=="svelte-1jq061t"&&(N.innerHTML=Mt),Ee=s(e),k(W.$$.fragment,e),qe=s(e),k(G.$$.fragment,e),Ue=s(e),k(U.$$.fragment,e),De=s(e),k(S.$$.fragment,e),Ve=s(e),m=i(e,"DIV",{class:!0});var h=z(m);k(Y.$$.fragment,h),Ne=s(h),le=i(h,"P",{"data-svelte-h":!0}),d(le)!=="svelte-irtuqb"&&(le.textContent=yt),We=s(h),ce=i(h,"P",{"data-svelte-h":!0}),d(ce)!=="svelte-r7777v"&&(ce.textContent=xt),Ge=s(h),pe=i(h,"UL",{"data-svelte-h":!0}),d(pe)!=="svelte-15v80xk"&&(pe.innerHTML=Jt),Se=s(h),de=i(h,"P",{"data-svelte-h":!0}),d(de)!=="svelte-1ssnkfj"&&(de.innerHTML=zt),Ye=s(h),H=i(h,"DIV",{class:!0});var I=z(H);k(O.$$.fragment,I),Oe=s(I),me=i(I,"P",{"data-svelte-h":!0}),d(me)!=="svelte-1xo6smc"&&(me.textContent=Lt),Qe=s(I),fe=i(I,"UL",{"data-svelte-h":!0}),d(fe)!=="svelte-1w73b42"&&(fe.innerHTML=Ht),I.forEach(a),Ke=s(h),D=i(h,"DIV",{class:!0});var ie=z(D);k(Q.$$.fragment,ie),et=s(ie),ue=i(ie,"P",{"data-svelte-h":!0}),d(ue)!=="svelte-b3k2yi"&&(ue.textContent=Ct),ie.forEach(a),tt=s(h),J=i(h,"DIV",{class:!0});var A=z(J);k(K.$$.fragment,A),nt=s(A),_e=i(A,"P",{"data-svelte-h":!0}),d(_e)!=="svelte-17m549d"&&(_e.textContent=jt),st=s(A),k(V.$$.fragment,A),rt=s(A),he=i(A,"P",{"data-svelte-h":!0}),d(he)!=="svelte-owoxgn"&&(he.innerHTML=It),A.forEach(a),at=s(h),R=i(h,"DIV",{class:!0});var Ze=z(R);k(ee.$$.fragment,Ze),ot=s(Ze),ge=i(Ze,"P",{"data-svelte-h":!0}),d(ge)!=="svelte-1f4f5kp"&&(ge.innerHTML=Et),Ze.forEach(a),h.forEach(a),Re=s(e),k(te.$$.fragment,e),Be=s(e),_=i(e,"DIV",{class:!0});var M=z(_);k(ne.$$.fragment,M),it=s(M),ke=i(M,"P",{"data-svelte-h":!0}),d(ke)!=="svelte-1n4shqg"&&(ke.innerHTML=qt),lt=s(M),be=i(M,"P",{"data-svelte-h":!0}),d(be)!=="svelte-r7777v"&&(be.textContent=Ut),ct=s(M),ve=i(M,"UL",{"data-svelte-h":!0}),d(ve)!=="svelte-lbio7x"&&(ve.innerHTML=Dt),pt=s(M),$e=i(M,"P",{"data-svelte-h":!0}),d($e)!=="svelte-how1oh"&&($e.innerHTML=Vt),dt=s(M),C=i(M,"DIV",{class:!0});var xe=z(C);k(se.$$.fragment,xe),mt=s(xe),Te=i(xe,"P",{"data-svelte-h":!0}),d(Te)!=="svelte-11s9gqx"&&(Te.textContent=Rt),ft=s(xe),we=i(xe,"UL",{"data-svelte-h":!0}),d(we)!=="svelte-1w73b42"&&(we.innerHTML=Bt),xe.forEach(a),ut=s(M),j=i(M,"DIV",{class:!0});var Je=z(j);k(re.$$.fragment,Je),_t=s(Je),Me=i(Je,"P",{"data-svelte-h":!0}),d(Me)!=="svelte-xgjhc7"&&(Me.textContent=Pt),ht=s(Je),k(B.$$.fragment,Je),Je.forEach(a),gt=s(M),P=i(M,"DIV",{class:!0});var Xe=z(P);k(ae.$$.fragment,Xe),kt=s(Xe),ye=i(Xe,"P",{"data-svelte-h":!0}),d(ye)!=="svelte-1f4f5kp"&&(ye.innerHTML=At),Xe.forEach(a),M.forEach(a),Pe=s(e),k(oe.$$.fragment,e),Ae=s(e),ze=i(e,"P",{}),z(ze).forEach(a),this.h()},h(){L(c,"name","hf:doc:metadata"),L(c,"content",tn),L(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(m,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),L(_,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,r){t(document.head,c),p(e,x,r),p(e,u,r),p(e,f,r),b(w,e,r),p(e,l,r),b(y,e,r),p(e,He,r),p(e,F,r),p(e,Ce,r),p(e,Z,r),p(e,je,r),p(e,X,r),p(e,Ie,r),p(e,N,r),p(e,Ee,r),b(W,e,r),p(e,qe,r),b(G,e,r),p(e,Ue,r),b(U,e,r),p(e,De,r),b(S,e,r),p(e,Ve,r),p(e,m,r),b(Y,m,null),t(m,Ne),t(m,le),t(m,We),t(m,ce),t(m,Ge),t(m,pe),t(m,Se),t(m,de),t(m,Ye),t(m,H),b(O,H,null),t(H,Oe),t(H,me),t(H,Qe),t(H,fe),t(m,Ke),t(m,D),b(Q,D,null),t(D,et),t(D,ue),t(m,tt),t(m,J),b(K,J,null),t(J,nt),t(J,_e),t(J,st),b(V,J,null),t(J,rt),t(J,he),t(m,at),t(m,R),b(ee,R,null),t(R,ot),t(R,ge),p(e,Re,r),b(te,e,r),p(e,Be,r),p(e,_,r),b(ne,_,null),t(_,it),t(_,ke),t(_,lt),t(_,be),t(_,ct),t(_,ve),t(_,pt),t(_,$e),t(_,dt),t(_,C),b(se,C,null),t(C,mt),t(C,Te),t(C,ft),t(C,we),t(_,ut),t(_,j),b(re,j,null),t(j,_t),t(j,Me),t(j,ht),b(B,j,null),t(_,gt),t(_,P),b(ae,P,null),t(P,kt),t(P,ye),p(e,Pe,r),b(oe,e,r),p(e,Ae,r),p(e,ze,r),Fe=!0},p(e,[r]){const h={};r&2&&(h.$$scope={dirty:r,ctx:e}),U.$set(h);const I={};r&2&&(I.$$scope={dirty:r,ctx:e}),V.$set(I);const ie={};r&2&&(ie.$$scope={dirty:r,ctx:e}),B.$set(ie)},i(e){Fe||(v(w.$$.fragment,e),v(y.$$.fragment,e),v(W.$$.fragment,e),v(G.$$.fragment,e),v(U.$$.fragment,e),v(S.$$.fragment,e),v(Y.$$.fragment,e),v(O.$$.fragment,e),v(Q.$$.fragment,e),v(K.$$.fragment,e),v(V.$$.fragment,e),v(ee.$$.fragment,e),v(te.$$.fragment,e),v(ne.$$.fragment,e),v(se.$$.fragment,e),v(re.$$.fragment,e),v(B.$$.fragment,e),v(ae.$$.fragment,e),v(oe.$$.fragment,e),Fe=!0)},o(e){$(w.$$.fragment,e),$(y.$$.fragment,e),$(W.$$.fragment,e),$(G.$$.fragment,e),$(U.$$.fragment,e),$(S.$$.fragment,e),$(Y.$$.fragment,e),$(O.$$.fragment,e),$(Q.$$.fragment,e),$(K.$$.fragment,e),$(V.$$.fragment,e),$(ee.$$.fragment,e),$(te.$$.fragment,e),$(ne.$$.fragment,e),$(se.$$.fragment,e),$(re.$$.fragment,e),$(B.$$.fragment,e),$(ae.$$.fragment,e),$(oe.$$.fragment,e),Fe=!1},d(e){e&&(a(x),a(u),a(f),a(l),a(He),a(F),a(Ce),a(Z),a(je),a(X),a(Ie),a(N),a(Ee),a(qe),a(Ue),a(De),a(Ve),a(m),a(Re),a(Be),a(_),a(Pe),a(Ae),a(ze)),a(c),T(w,e),T(y,e),T(W,e),T(G,e),T(U,e),T(S,e),T(Y),T(O),T(Q),T(K),T(V),T(ee),T(te,e),T(ne),T(se),T(re),T(B),T(ae),T(oe,e)}}}const tn='{"title":"HerBERT","local":"herbert","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"Usage example","local":"usage-example","sections":[],"depth":2},{"title":"HerbertTokenizer","local":"transformers.HerbertTokenizer","sections":[],"depth":2},{"title":"HerbertTokenizerFast","local":"transformers.HerbertTokenizerFast","sections":[],"depth":2}],"depth":1}';function nn(q){return Xt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class dn extends Nt{constructor(c){super(),Wt(this,c,nn,en,Zt,{})}}export{dn as component}; | |
Xet Storage Details
- Size:
- 29.1 kB
- Xet hash:
- 0c757e23d58a2f8fdaf2d56d2f06fbc846ce0de70fadbde535a199362c24c600
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.