Buckets:
| import{s as uo,o as ho,n as X}from"../chunks/scheduler.25b97de1.js";import{S as fo,i as go,g as c,s as a,r as h,A as _o,h as p,f as l,c as r,j as U,u as f,x as u,k as z,y as s,a as m,v as g,d as _,t as b,w as M}from"../chunks/index.d9030fc9.js";import{T as gt}from"../chunks/Tip.baa67368.js";import{D as V}from"../chunks/Docstring.e257edda.js";import{C as Ne}from"../chunks/CodeBlock.e6cd0d95.js";import{E as Xe}from"../chunks/ExampleCodeBlock.20db4b6e.js";import{H as K,E as bo}from"../chunks/EditOnGithub.91d95064.js";function Mo(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function yo(k){let t,y="Example:",i,d,T;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBJQmVydE1vZGVsJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEFtb2RlbCUyMCUzRCUyMElCZXJ0TW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIZWxsbyUyQyUyMG15JTIwZG9nJTIwaXMlMjBjdXRlJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpJTBBJTBBbGFzdF9oaWRkZW5fc3RhdGVzJTIwJTNEJTIwb3V0cHV0cy5sYXN0X2hpZGRlbl9zdGF0ZQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertModel.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span>last_hidden_states = outputs.last_hidden_state`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-11lpom8"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function To(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function wo(k){let t,y="Example:",i,d,T;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBJQmVydEZvck1hc2tlZExNJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEFtb2RlbCUyMCUzRCUyMElCZXJ0Rm9yTWFza2VkTE0uZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJUaGUlMjBjYXBpdGFsJTIwb2YlMjBGcmFuY2UlMjBpcyUyMCUzQ21hc2slM0UuJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUwQSUyMyUyMHJldHJpZXZlJTIwaW5kZXglMjBvZiUyMCUzQ21hc2slM0UlMEFtYXNrX3Rva2VuX2luZGV4JTIwJTNEJTIwKGlucHV0cy5pbnB1dF9pZHMlMjAlM0QlM0QlMjB0b2tlbml6ZXIubWFza190b2tlbl9pZCklNUIwJTVELm5vbnplcm8oYXNfdHVwbGUlM0RUcnVlKSU1QjAlNUQlMEElMEFwcmVkaWN0ZWRfdG9rZW5faWQlMjAlM0QlMjBsb2dpdHMlNUIwJTJDJTIwbWFza190b2tlbl9pbmRleCU1RC5hcmdtYXgoYXhpcyUzRC0xKSUwQSUwQWxhYmVscyUyMCUzRCUyMHRva2VuaXplciglMjJUaGUlMjBjYXBpdGFsJTIwb2YlMjBGcmFuY2UlMjBpcyUyMFBhcmlzLiUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTVCJTIyaW5wdXRfaWRzJTIyJTVEJTBBJTIzJTIwbWFzayUyMGxhYmVscyUyMG9mJTIwbm9uLSUzQ21hc2slM0UlMjB0b2tlbnMlMEFsYWJlbHMlMjAlM0QlMjB0b3JjaC53aGVyZShpbnB1dHMuaW5wdXRfaWRzJTIwJTNEJTNEJTIwdG9rZW5pemVyLm1hc2tfdG9rZW5faWQlMkMlMjBsYWJlbHMlMkMlMjAtMTAwKSUwQSUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForMaskedLM | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForMaskedLM.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"The capital of France is <mask>."</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># retrieve index of <mask></span> | |
| <span class="hljs-meta">>>> </span>mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[<span class="hljs-number">0</span>].nonzero(as_tuple=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>predicted_token_id = logits[<span class="hljs-number">0</span>, mask_token_index].argmax(axis=-<span class="hljs-number">1</span>) | |
| <span class="hljs-meta">>>> </span>labels = tokenizer(<span class="hljs-string">"The capital of France is Paris."</span>, return_tensors=<span class="hljs-string">"pt"</span>)[<span class="hljs-string">"input_ids"</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># mask labels of non-<mask> tokens</span> | |
| <span class="hljs-meta">>>> </span>labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -<span class="hljs-number">100</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs, labels=labels)`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-11lpom8"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function ko(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function vo(k){let t,y="Example of single-label classification:",i,d,T;return d=new Ne({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMElCZXJ0Rm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQW1vZGVsJTIwJTNEJTIwSUJlcnRGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIySGVsbG8lMkMlMjBteSUyMGRvZyUyMGlzJTIwY3V0ZSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMGxvZ2l0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKS5sb2dpdHMlMEElMEFwcmVkaWN0ZWRfY2xhc3NfaWQlMjAlM0QlMjBsb2dpdHMuYXJnbWF4KCkuaXRlbSgpJTBBJTBBJTIzJTIwVG8lMjB0cmFpbiUyMGElMjBtb2RlbCUyMG9uJTIwJTYwbnVtX2xhYmVscyU2MCUyMGNsYXNzZXMlMkMlMjB5b3UlMjBjYW4lMjBwYXNzJTIwJTYwbnVtX2xhYmVscyUzRG51bV9sYWJlbHMlNjAlMjB0byUyMCU2MC5mcm9tX3ByZXRyYWluZWQoLi4uKSU2MCUwQW51bV9sYWJlbHMlMjAlM0QlMjBsZW4obW9kZWwuY29uZmlnLmlkMmxhYmVsKSUwQW1vZGVsJTIwJTNEJTIwSUJlcnRGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiUyQyUyMG51bV9sYWJlbHMlM0RudW1fbGFiZWxzKSUwQSUwQWxhYmVscyUyMCUzRCUyMHRvcmNoLnRlbnNvciglNUIxJTVEKSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForSequenceClassification | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForSequenceClassification.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits | |
| <span class="hljs-meta">>>> </span>predicted_class_id = logits.argmax().item() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># To train a model on \`num_labels\` classes, you can pass \`num_labels=num_labels\` to \`.from_pretrained(...)\`</span> | |
| <span class="hljs-meta">>>> </span>num_labels = <span class="hljs-built_in">len</span>(model.config.id2label) | |
| <span class="hljs-meta">>>> </span>model = IBertForSequenceClassification.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>, num_labels=num_labels) | |
| <span class="hljs-meta">>>> </span>labels = torch.tensor([<span class="hljs-number">1</span>]) | |
| <span class="hljs-meta">>>> </span>loss = model(**inputs, labels=labels).loss`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-ykxpe4"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function Io(k){let t,y="Example of multi-label classification:",i,d,T;return d=new Ne({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMElCZXJ0Rm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQW1vZGVsJTIwJTNEJTIwSUJlcnRGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiUyQyUyMHByb2JsZW1fdHlwZSUzRCUyMm11bHRpX2xhYmVsX2NsYXNzaWZpY2F0aW9uJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIZWxsbyUyQyUyMG15JTIwZG9nJTIwaXMlMjBjdXRlJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUwQXByZWRpY3RlZF9jbGFzc19pZHMlMjAlM0QlMjB0b3JjaC5hcmFuZ2UoMCUyQyUyMGxvZ2l0cy5zaGFwZSU1Qi0xJTVEKSU1QnRvcmNoLnNpZ21vaWQobG9naXRzKS5zcXVlZXplKGRpbSUzRDApJTIwJTNFJTIwMC41JTVEJTBBJTBBJTIzJTIwVG8lMjB0cmFpbiUyMGElMjBtb2RlbCUyMG9uJTIwJTYwbnVtX2xhYmVscyU2MCUyMGNsYXNzZXMlMkMlMjB5b3UlMjBjYW4lMjBwYXNzJTIwJTYwbnVtX2xhYmVscyUzRG51bV9sYWJlbHMlNjAlMjB0byUyMCU2MC5mcm9tX3ByZXRyYWluZWQoLi4uKSU2MCUwQW51bV9sYWJlbHMlMjAlM0QlMjBsZW4obW9kZWwuY29uZmlnLmlkMmxhYmVsKSUwQW1vZGVsJTIwJTNEJTIwSUJlcnRGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiUyQyUyMG51bV9sYWJlbHMlM0RudW1fbGFiZWxzJTJDJTIwcHJvYmxlbV90eXBlJTNEJTIybXVsdGlfbGFiZWxfY2xhc3NpZmljYXRpb24lMjIlMEEpJTBBJTBBbGFiZWxzJTIwJTNEJTIwdG9yY2guc3VtKCUwQSUyMCUyMCUyMCUyMHRvcmNoLm5uLmZ1bmN0aW9uYWwub25lX2hvdChwcmVkaWN0ZWRfY2xhc3NfaWRzJTVCTm9uZSUyQyUyMCUzQSU1RC5jbG9uZSgpJTJDJTIwbnVtX2NsYXNzZXMlM0RudW1fbGFiZWxzKSUyQyUyMGRpbSUzRDElMEEpLnRvKHRvcmNoLmZsb2F0KSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForSequenceClassification | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForSequenceClassification.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>, problem_type=<span class="hljs-string">"multi_label_classification"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits | |
| <span class="hljs-meta">>>> </span>predicted_class_ids = torch.arange(<span class="hljs-number">0</span>, logits.shape[-<span class="hljs-number">1</span>])[torch.sigmoid(logits).squeeze(dim=<span class="hljs-number">0</span>) > <span class="hljs-number">0.5</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># To train a model on \`num_labels\` classes, you can pass \`num_labels=num_labels\` to \`.from_pretrained(...)\`</span> | |
| <span class="hljs-meta">>>> </span>num_labels = <span class="hljs-built_in">len</span>(model.config.id2label) | |
| <span class="hljs-meta">>>> </span>model = IBertForSequenceClassification.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"kssteven/ibert-roberta-base"</span>, num_labels=num_labels, problem_type=<span class="hljs-string">"multi_label_classification"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>labels = torch.<span class="hljs-built_in">sum</span>( | |
| <span class="hljs-meta">... </span> torch.nn.functional.one_hot(predicted_class_ids[<span class="hljs-literal">None</span>, :].clone(), num_classes=num_labels), dim=<span class="hljs-number">1</span> | |
| <span class="hljs-meta">... </span>).to(torch.<span class="hljs-built_in">float</span>) | |
| <span class="hljs-meta">>>> </span>loss = model(**inputs, labels=labels).loss`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-1l8e32d"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function Jo(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function $o(k){let t,y="Example:",i,d,T;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBJQmVydEZvck11bHRpcGxlQ2hvaWNlJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEFtb2RlbCUyMCUzRCUyMElCZXJ0Rm9yTXVsdGlwbGVDaG9pY2UuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQSUwQXByb21wdCUyMCUzRCUyMCUyMkluJTIwSXRhbHklMkMlMjBwaXp6YSUyMHNlcnZlZCUyMGluJTIwZm9ybWFsJTIwc2V0dGluZ3MlMkMlMjBzdWNoJTIwYXMlMjBhdCUyMGElMjByZXN0YXVyYW50JTJDJTIwaXMlMjBwcmVzZW50ZWQlMjB1bnNsaWNlZC4lMjIlMEFjaG9pY2UwJTIwJTNEJTIwJTIySXQlMjBpcyUyMGVhdGVuJTIwd2l0aCUyMGElMjBmb3JrJTIwYW5kJTIwYSUyMGtuaWZlLiUyMiUwQWNob2ljZTElMjAlM0QlMjAlMjJJdCUyMGlzJTIwZWF0ZW4lMjB3aGlsZSUyMGhlbGQlMjBpbiUyMHRoZSUyMGhhbmQuJTIyJTBBbGFiZWxzJTIwJTNEJTIwdG9yY2gudGVuc29yKDApLnVuc3F1ZWV6ZSgwKSUyMCUyMCUyMyUyMGNob2ljZTAlMjBpcyUyMGNvcnJlY3QlMjAoYWNjb3JkaW5nJTIwdG8lMjBXaWtpcGVkaWElMjAlM0IpKSUyQyUyMGJhdGNoJTIwc2l6ZSUyMDElMEElMEFlbmNvZGluZyUyMCUzRCUyMHRva2VuaXplciglNUJwcm9tcHQlMkMlMjBwcm9tcHQlNUQlMkMlMjAlNUJjaG9pY2UwJTJDJTIwY2hvaWNlMSU1RCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIlMkMlMjBwYWRkaW5nJTNEVHJ1ZSklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKiolN0JrJTNBJTIwdi51bnNxdWVlemUoMCklMjBmb3IlMjBrJTJDJTIwdiUyMGluJTIwZW5jb2RpbmcuaXRlbXMoKSU3RCUyQyUyMGxhYmVscyUzRGxhYmVscyklMjAlMjAlMjMlMjBiYXRjaCUyMHNpemUlMjBpcyUyMDElMEElMEElMjMlMjB0aGUlMjBsaW5lYXIlMjBjbGFzc2lmaWVyJTIwc3RpbGwlMjBuZWVkcyUyMHRvJTIwYmUlMjB0cmFpbmVkJTBBbG9zcyUyMCUzRCUyMG91dHB1dHMubG9zcyUwQWxvZ2l0cyUyMCUzRCUyMG91dHB1dHMubG9naXRz",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForMultipleChoice | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForMultipleChoice.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."</span> | |
| <span class="hljs-meta">>>> </span>choice0 = <span class="hljs-string">"It is eaten with a fork and a knife."</span> | |
| <span class="hljs-meta">>>> </span>choice1 = <span class="hljs-string">"It is eaten while held in the hand."</span> | |
| <span class="hljs-meta">>>> </span>labels = torch.tensor(<span class="hljs-number">0</span>).unsqueeze(<span class="hljs-number">0</span>) <span class="hljs-comment"># choice0 is correct (according to Wikipedia ;)), batch size 1</span> | |
| <span class="hljs-meta">>>> </span>encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors=<span class="hljs-string">"pt"</span>, padding=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(**{k: v.unsqueeze(<span class="hljs-number">0</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> encoding.items()}, labels=labels) <span class="hljs-comment"># batch size is 1</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># the linear classifier still needs to be trained</span> | |
| <span class="hljs-meta">>>> </span>loss = outputs.loss | |
| <span class="hljs-meta">>>> </span>logits = outputs.logits`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-11lpom8"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function Bo(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function jo(k){let t,y="Example:",i,d,T;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBJQmVydEZvclRva2VuQ2xhc3NpZmljYXRpb24lMEFpbXBvcnQlMjB0b3JjaCUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQW1vZGVsJTIwJTNEJTIwSUJlcnRGb3JUb2tlbkNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIySHVnZ2luZ0ZhY2UlMjBpcyUyMGElMjBjb21wYW55JTIwYmFzZWQlMjBpbiUyMFBhcmlzJTIwYW5kJTIwTmV3JTIwWW9yayUyMiUyQyUyMGFkZF9zcGVjaWFsX3Rva2VucyUzREZhbHNlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiUwQSklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUwQXByZWRpY3RlZF90b2tlbl9jbGFzc19pZHMlMjAlM0QlMjBsb2dpdHMuYXJnbWF4KC0xKSUwQSUwQSUyMyUyME5vdGUlMjB0aGF0JTIwdG9rZW5zJTIwYXJlJTIwY2xhc3NpZmllZCUyMHJhdGhlciUyMHRoZW4lMjBpbnB1dCUyMHdvcmRzJTIwd2hpY2glMjBtZWFucyUyMHRoYXQlMEElMjMlMjB0aGVyZSUyMG1pZ2h0JTIwYmUlMjBtb3JlJTIwcHJlZGljdGVkJTIwdG9rZW4lMjBjbGFzc2VzJTIwdGhhbiUyMHdvcmRzLiUwQSUyMyUyME11bHRpcGxlJTIwdG9rZW4lMjBjbGFzc2VzJTIwbWlnaHQlMjBhY2NvdW50JTIwZm9yJTIwdGhlJTIwc2FtZSUyMHdvcmQlMEFwcmVkaWN0ZWRfdG9rZW5zX2NsYXNzZXMlMjAlM0QlMjAlNUJtb2RlbC5jb25maWcuaWQybGFiZWwlNUJ0Lml0ZW0oKSU1RCUyMGZvciUyMHQlMjBpbiUyMHByZWRpY3RlZF90b2tlbl9jbGFzc19pZHMlNUIwJTVEJTVEJTBBJTBBbGFiZWxzJTIwJTNEJTIwcHJlZGljdGVkX3Rva2VuX2NsYXNzX2lkcyUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForTokenClassification | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForTokenClassification.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"HuggingFace is a company based in Paris and New York"</span>, add_special_tokens=<span class="hljs-literal">False</span>, return_tensors=<span class="hljs-string">"pt"</span> | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits | |
| <span class="hljs-meta">>>> </span>predicted_token_class_ids = logits.argmax(-<span class="hljs-number">1</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Note that tokens are classified rather then input words which means that</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># there might be more predicted token classes than words.</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Multiple token classes might account for the same word</span> | |
| <span class="hljs-meta">>>> </span>predicted_tokens_classes = [model.config.id2label[t.item()] <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> predicted_token_class_ids[<span class="hljs-number">0</span>]] | |
| <span class="hljs-meta">>>> </span>labels = predicted_token_class_ids | |
| <span class="hljs-meta">>>> </span>loss = model(**inputs, labels=labels).loss`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-11lpom8"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function Co(k){let t,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=y},l(i){t=p(i,"P",{"data-svelte-h":!0}),u(t)!=="svelte-fincs2"&&(t.innerHTML=y)},m(i,d){m(i,t,d)},p:X,d(i){i&&l(t)}}}function Uo(k){let t,y="Example:",i,d,T;return d=new Ne({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBJQmVydEZvclF1ZXN0aW9uQW5zd2VyaW5nJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJrc3N0ZXZlbiUyRmliZXJ0LXJvYmVydGEtYmFzZSUyMiklMEFtb2RlbCUyMCUzRCUyMElCZXJ0Rm9yUXVlc3Rpb25BbnN3ZXJpbmcuZnJvbV9wcmV0cmFpbmVkKCUyMmtzc3RldmVuJTJGaWJlcnQtcm9iZXJ0YS1iYXNlJTIyKSUwQSUwQXF1ZXN0aW9uJTJDJTIwdGV4dCUyMCUzRCUyMCUyMldobyUyMHdhcyUyMEppbSUyMEhlbnNvbiUzRiUyMiUyQyUyMCUyMkppbSUyMEhlbnNvbiUyMHdhcyUyMGElMjBuaWNlJTIwcHVwcGV0JTIyJTBBJTBBaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHF1ZXN0aW9uJTJDJTIwdGV4dCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyklMEElMEFhbnN3ZXJfc3RhcnRfaW5kZXglMjAlM0QlMjBvdXRwdXRzLnN0YXJ0X2xvZ2l0cy5hcmdtYXgoKSUwQWFuc3dlcl9lbmRfaW5kZXglMjAlM0QlMjBvdXRwdXRzLmVuZF9sb2dpdHMuYXJnbWF4KCklMEElMEFwcmVkaWN0X2Fuc3dlcl90b2tlbnMlMjAlM0QlMjBpbnB1dHMuaW5wdXRfaWRzJTVCMCUyQyUyMGFuc3dlcl9zdGFydF9pbmRleCUyMCUzQSUyMGFuc3dlcl9lbmRfaW5kZXglMjAlMkIlMjAxJTVEJTBBJTBBJTIzJTIwdGFyZ2V0JTIwaXMlMjAlMjJuaWNlJTIwcHVwcGV0JTIyJTBBdGFyZ2V0X3N0YXJ0X2luZGV4JTIwJTNEJTIwdG9yY2gudGVuc29yKCU1QjE0JTVEKSUwQXRhcmdldF9lbmRfaW5kZXglMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCMTUlNUQpJTBBJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzJTJDJTIwc3RhcnRfcG9zaXRpb25zJTNEdGFyZ2V0X3N0YXJ0X2luZGV4JTJDJTIwZW5kX3Bvc2l0aW9ucyUzRHRhcmdldF9lbmRfaW5kZXgpJTBBbG9zcyUyMCUzRCUyMG91dHB1dHMubG9zcw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, IBertForQuestionAnswering | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>model = IBertForQuestionAnswering.from_pretrained(<span class="hljs-string">"kssteven/ibert-roberta-base"</span>) | |
| <span class="hljs-meta">>>> </span>question, text = <span class="hljs-string">"Who was Jim Henson?"</span>, <span class="hljs-string">"Jim Henson was a nice puppet"</span> | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(question, text, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span>answer_start_index = outputs.start_logits.argmax() | |
| <span class="hljs-meta">>>> </span>answer_end_index = outputs.end_logits.argmax() | |
| <span class="hljs-meta">>>> </span>predict_answer_tokens = inputs.input_ids[<span class="hljs-number">0</span>, answer_start_index : answer_end_index + <span class="hljs-number">1</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># target is "nice puppet"</span> | |
| <span class="hljs-meta">>>> </span>target_start_index = torch.tensor([<span class="hljs-number">14</span>]) | |
| <span class="hljs-meta">>>> </span>target_end_index = torch.tensor([<span class="hljs-number">15</span>]) | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) | |
| <span class="hljs-meta">>>> </span>loss = outputs.loss`,wrap:!1}}),{c(){t=c("p"),t.textContent=y,i=a(),h(d.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),u(t)!=="svelte-11lpom8"&&(t.textContent=y),i=r(n),f(d.$$.fragment,n)},m(n,w){m(n,t,w),m(n,i,w),g(d,n,w),T=!0},p:X,i(n){T||(_(d.$$.fragment,n),T=!0)},o(n){b(d.$$.fragment,n),T=!1},d(n){n&&(l(t),l(i)),M(d,n)}}}function zo(k){let t,y,i,d,T,n,w,bt,he,Fn=`The I-BERT model was proposed in <a href="https://arxiv.org/abs/2101.01321" rel="nofollow">I-BERT: Integer-only BERT Quantization</a> by | |
| Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney and Kurt Keutzer. It’s a quantized version of RoBERTa running | |
| inference up to four times faster.`,Mt,fe,Zn="The abstract from the paper is the following:",yt,ge,qn=`<em>Transformer based models, like BERT and RoBERTa, have achieved state-of-the-art results in many Natural Language | |
| Processing tasks. However, their memory footprint, inference latency, and power consumption are prohibitive for | |
| efficient inference at the edge, and even at the data center. While quantization can be a viable solution for this, | |
| previous work on quantizing Transformer based models use floating-point arithmetic during inference, which cannot | |
| efficiently utilize integer-only logical units such as the recent Turing Tensor Cores, or traditional integer-only ARM | |
| processors. In this work, we propose I-BERT, a novel quantization scheme for Transformer based models that quantizes | |
| the entire inference with integer-only arithmetic. Based on lightweight integer-only approximation methods for | |
| nonlinear operations, e.g., GELU, Softmax, and Layer Normalization, I-BERT performs an end-to-end integer-only BERT | |
| inference without any floating point calculation. We evaluate our approach on GLUE downstream tasks using | |
| RoBERTa-Base/Large. We show that for both cases, I-BERT achieves similar (and slightly higher) accuracy as compared to | |
| the full-precision baseline. Furthermore, our preliminary implementation of I-BERT shows a speedup of 2.4 - 4.0x for | |
| INT8 inference on a T4 GPU system as compared to FP32 inference. The framework has been developed in PyTorch and has | |
| been open-sourced.</em>`,Tt,_e,Wn='This model was contributed by <a href="https://huggingface.co/kssteven" rel="nofollow">kssteven</a>. The original code can be found <a href="https://github.com/kssteven418/I-BERT" rel="nofollow">here</a>.',wt,be,kt,Me,Rn='<li><a href="../tasks/sequence_classification">Text classification task guide</a></li> <li><a href="../tasks/token_classification">Token classification task guide</a></li> <li><a href="../tasks/question_answering">Question answering task guide</a></li> <li><a href="../tasks/masked_language_modeling">Masked language modeling task guide</a></li> <li><a href="../tasks/masked_language_modeling">Multiple choice task guide</a></li>',vt,ye,It,Q,Te,Xt,Ee,Gn=`This is the configuration class to store the configuration of a <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertModel">IBertModel</a>. It is used to instantiate a I-BERT | |
| model according to the specified arguments, defining the model architecture. Instantiating a configuration with the | |
| defaults will yield a similar configuration to that of the IBERT | |
| <a href="https://huggingface.co/kssteven/ibert-roberta-base" rel="nofollow">kssteven/ibert-roberta-base</a> architecture.`,Nt,He,Vn=`Configuration objects inherit from <a href="/docs/transformers/pr_33111/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> and can be used to control the model outputs. Read the | |
| documentation from <a href="/docs/transformers/pr_33111/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> for more information.`,Jt,we,$t,v,ke,Et,Le,Xn="The bare I-BERT Model transformer outputting raw hidden-states without any specific head on top.",Ht,Se,Nn=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,Lt,Qe,En=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,St,Ye,Hn=`The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of | |
| cross-attention is added between the self-attention layers, following the architecture described in <a href="https://arxiv.org/abs/1706.03762" rel="nofollow">Attention is | |
| all you need</a> by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, | |
| Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.`,Qt,N,ve,Yt,Ae,Ln='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertModel">IBertModel</a> forward method, overrides the <code>__call__</code> special method.',At,te,Pt,ne,Bt,Ie,jt,I,Je,Ot,Pe,Sn="I-BERT Model with a <code>language modeling</code> head on top.",Dt,Oe,Qn=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,Kt,De,Yn=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,en,E,$e,tn,Ke,An='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertForMaskedLM">IBertForMaskedLM</a> forward method, overrides the <code>__call__</code> special method.',nn,oe,on,se,Ct,Be,Ut,J,je,sn,et,Pn=`I-BERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled | |
| output) e.g. for GLUE tasks.`,an,tt,On=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,rn,nt,Dn=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,ln,x,Ce,dn,ot,Kn='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertForSequenceClassification">IBertForSequenceClassification</a> forward method, overrides the <code>__call__</code> special method.',cn,ae,pn,re,mn,ie,zt,Ue,xt,$,ze,un,st,eo=`I-BERT Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a | |
| softmax) e.g. for RocStories/SWAG tasks.`,hn,at,to=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,fn,rt,no=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,gn,H,xe,_n,it,oo='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertForMultipleChoice">IBertForMultipleChoice</a> forward method, overrides the <code>__call__</code> special method.',bn,le,Mn,de,Ft,Fe,Zt,B,Ze,yn,lt,so=`I-BERT Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for | |
| Named-Entity-Recognition (NER) tasks.`,Tn,dt,ao=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,wn,ct,ro=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,kn,L,qe,vn,pt,io='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertForTokenClassification">IBertForTokenClassification</a> forward method, overrides the <code>__call__</code> special method.',In,ce,Jn,pe,qt,We,Wt,j,Re,$n,mt,lo=`I-BERT Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear | |
| layers on top of the hidden-states output to compute <code>span start logits</code> and <code>span end logits</code>).`,Bn,ut,co=`This model inherits from <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,jn,ht,po=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,Cn,S,Ge,Un,ft,mo='The <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertForQuestionAnswering">IBertForQuestionAnswering</a> forward method, overrides the <code>__call__</code> special method.',zn,me,xn,ue,Rt,Ve,Gt,_t,Vt;return T=new K({props:{title:"I-BERT",local:"i-bert",headingTag:"h1"}}),w=new K({props:{title:"Overview",local:"overview",headingTag:"h2"}}),be=new K({props:{title:"Resources",local:"resources",headingTag:"h2"}}),ye=new K({props:{title:"IBertConfig",local:"transformers.IBertConfig",headingTag:"h2"}}),Te=new V({props:{name:"class transformers.IBertConfig",anchor:"transformers.IBertConfig",parameters:[{name:"vocab_size",val:" = 30522"},{name:"hidden_size",val:" = 768"},{name:"num_hidden_layers",val:" = 12"},{name:"num_attention_heads",val:" = 12"},{name:"intermediate_size",val:" = 3072"},{name:"hidden_act",val:" = 'gelu'"},{name:"hidden_dropout_prob",val:" = 0.1"},{name:"attention_probs_dropout_prob",val:" = 0.1"},{name:"max_position_embeddings",val:" = 512"},{name:"type_vocab_size",val:" = 2"},{name:"initializer_range",val:" = 0.02"},{name:"layer_norm_eps",val:" = 1e-12"},{name:"pad_token_id",val:" = 1"},{name:"bos_token_id",val:" = 0"},{name:"eos_token_id",val:" = 2"},{name:"position_embedding_type",val:" = 'absolute'"},{name:"quant_mode",val:" = False"},{name:"force_dequant",val:" = 'none'"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.IBertConfig.vocab_size",description:`<strong>vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 30522) — | |
| Vocabulary size of the I-BERT model. Defines the number of different tokens that can be represented by the | |
| <code>inputs_ids</code> passed when calling <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertModel">IBertModel</a>`,name:"vocab_size"},{anchor:"transformers.IBertConfig.hidden_size",description:`<strong>hidden_size</strong> (<code>int</code>, <em>optional</em>, defaults to 768) — | |
| Dimensionality of the encoder layers and the pooler layer.`,name:"hidden_size"},{anchor:"transformers.IBertConfig.num_hidden_layers",description:`<strong>num_hidden_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 12) — | |
| Number of hidden layers in the Transformer encoder.`,name:"num_hidden_layers"},{anchor:"transformers.IBertConfig.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 12) — | |
| Number of attention heads for each attention layer in the Transformer encoder.`,name:"num_attention_heads"},{anchor:"transformers.IBertConfig.intermediate_size",description:`<strong>intermediate_size</strong> (<code>int</code>, <em>optional</em>, defaults to 3072) — | |
| Dimensionality of the “intermediate” (often named feed-forward) layer in the Transformer encoder.`,name:"intermediate_size"},{anchor:"transformers.IBertConfig.hidden_act",description:`<strong>hidden_act</strong> (<code>str</code> or <code>Callable</code>, <em>optional</em>, defaults to <code>"gelu"</code>) — | |
| The non-linear activation function (function or string) in the encoder and pooler. If string, <code>"gelu"</code>, | |
| <code>"relu"</code>, <code>"silu"</code> and <code>"gelu_new"</code> are supported.`,name:"hidden_act"},{anchor:"transformers.IBertConfig.hidden_dropout_prob",description:`<strong>hidden_dropout_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"hidden_dropout_prob"},{anchor:"transformers.IBertConfig.attention_probs_dropout_prob",description:`<strong>attention_probs_dropout_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout ratio for the attention probabilities.`,name:"attention_probs_dropout_prob"},{anchor:"transformers.IBertConfig.max_position_embeddings",description:`<strong>max_position_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to 512) — | |
| The maximum sequence length that this model might ever be used with. Typically set this to something large | |
| just in case (e.g., 512 or 1024 or 2048).`,name:"max_position_embeddings"},{anchor:"transformers.IBertConfig.type_vocab_size",description:`<strong>type_vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 2) — | |
| The vocabulary size of the <code>token_type_ids</code> passed when calling <a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertModel">IBertModel</a>`,name:"type_vocab_size"},{anchor:"transformers.IBertConfig.initializer_range",description:`<strong>initializer_range</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) — | |
| The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"initializer_range"},{anchor:"transformers.IBertConfig.layer_norm_eps",description:`<strong>layer_norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-12) — | |
| The epsilon used by the layer normalization layers.`,name:"layer_norm_eps"},{anchor:"transformers.IBertConfig.position_embedding_type",description:`<strong>position_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"absolute"</code>) — | |
| Type of position embedding. Choose one of <code>"absolute"</code>, <code>"relative_key"</code>, <code>"relative_key_query"</code>. For | |
| positional embeddings use <code>"absolute"</code>. For more information on <code>"relative_key"</code>, please refer to | |
| <a href="https://arxiv.org/abs/1803.02155" rel="nofollow">Self-Attention with Relative Position Representations (Shaw et al.)</a>. | |
| For more information on <code>"relative_key_query"</code>, please refer to <em>Method 4</em> in <a href="https://arxiv.org/abs/2009.13658" rel="nofollow">Improve Transformer Models | |
| with Better Relative Position Embeddings (Huang et al.)</a>.`,name:"position_embedding_type"},{anchor:"transformers.IBertConfig.quant_mode",description:`<strong>quant_mode</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to quantize the model or not.`,name:"quant_mode"},{anchor:"transformers.IBertConfig.force_dequant",description:`<strong>force_dequant</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"none"</code>) — | |
| Force dequantize specific nonlinear layer. Dequatized layers are then executed with full precision. | |
| <code>"none"</code>, <code>"gelu"</code>, <code>"softmax"</code>, <code>"layernorm"</code> and <code>"nonlinear"</code> are supported. As deafult, it is set as | |
| <code>"none"</code>, which does not dequantize any layers. Please specify <code>"gelu"</code>, <code>"softmax"</code>, or <code>"layernorm"</code> to | |
| dequantize GELU, Softmax, or LayerNorm, respectively. <code>"nonlinear"</code> will dequantize all nonlinear layers, | |
| i.e., GELU, Softmax, and LayerNorm.`,name:"force_dequant"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/configuration_ibert.py#L30"}}),we=new K({props:{title:"IBertModel",local:"transformers.IBertModel",headingTag:"h2"}}),ke=new V({props:{name:"class transformers.IBertModel",anchor:"transformers.IBertModel",parameters:[{name:"config",val:""},{name:"add_pooling_layer",val:" = True"}],parametersDescription:[{anchor:"transformers.IBertModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L725"}}),ve=new V({props:{name:"forward",anchor:"transformers.IBertModel.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertModel.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertModel.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertModel.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertModel.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertModel.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertModel.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L766",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions" | |
| >transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>pooler_output</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, hidden_size)</code>) — Last layer hidden-state of the first token of the sequence (classification token) after further processing | |
| through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns | |
| the classification token after processing through a linear layer and a tanh activation function. The linear | |
| layer weights are trained from the next sentence prediction (classification) objective during pretraining.</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> and <code>config.add_cross_attention=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the | |
| weighted average in the cross-attention heads.</p> | |
| </li> | |
| <li> | |
| <p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape | |
| <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and optionally if | |
| <code>config.is_encoder_decoder=True</code> 2 additional tensors of shape <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p> | |
| <p>Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if | |
| <code>config.is_encoder_decoder=True</code> in the cross-attention blocks) that can be used (see <code>past_key_values</code> | |
| input) to speed up sequential decoding.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions" | |
| >transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),te=new gt({props:{$$slots:{default:[Mo]},$$scope:{ctx:k}}}),ne=new Xe({props:{anchor:"transformers.IBertModel.forward.example",$$slots:{default:[yo]},$$scope:{ctx:k}}}),Ie=new K({props:{title:"IBertForMaskedLM",local:"transformers.IBertForMaskedLM",headingTag:"h2"}}),Je=new V({props:{name:"class transformers.IBertForMaskedLM",anchor:"transformers.IBertForMaskedLM",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.IBertForMaskedLM.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L850"}}),$e=new V({props:{name:"forward",anchor:"transformers.IBertForMaskedLM.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertForMaskedLM.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertForMaskedLM.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertForMaskedLM.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertForMaskedLM.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertForMaskedLM.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertForMaskedLM.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertForMaskedLM.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertForMaskedLM.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertForMaskedLM.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.IBertForMaskedLM.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Labels for computing the masked language modeling loss. Indices should be in <code>[-100, 0, ..., config.vocab_size]</code> (see <code>input_ids</code> docstring) Tokens with indices set to <code>-100</code> are ignored (masked), the | |
| loss is only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>`,name:"labels"},{anchor:"transformers.IBertForMaskedLM.forward.kwargs",description:`<strong>kwargs</strong> (<code>Dict[str, any]</code>, <em>optional</em>, defaults to <code>{}</code>) — | |
| Used to hide legacy arguments that have been deprecated.`,name:"kwargs"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L870",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.MaskedLMOutput" | |
| >transformers.modeling_outputs.MaskedLMOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Masked language modeling (MLM) loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.MaskedLMOutput" | |
| >transformers.modeling_outputs.MaskedLMOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),oe=new gt({props:{$$slots:{default:[To]},$$scope:{ctx:k}}}),se=new Xe({props:{anchor:"transformers.IBertForMaskedLM.forward.example",$$slots:{default:[wo]},$$scope:{ctx:k}}}),Be=new K({props:{title:"IBertForSequenceClassification",local:"transformers.IBertForSequenceClassification",headingTag:"h2"}}),je=new V({props:{name:"class transformers.IBertForSequenceClassification",anchor:"transformers.IBertForSequenceClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.IBertForSequenceClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L962"}}),Ce=new V({props:{name:"forward",anchor:"transformers.IBertForSequenceClassification.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertForSequenceClassification.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertForSequenceClassification.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertForSequenceClassification.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertForSequenceClassification.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertForSequenceClassification.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertForSequenceClassification.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertForSequenceClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertForSequenceClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertForSequenceClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.IBertForSequenceClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for computing the sequence classification/regression loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>. If <code>config.num_labels == 1</code> a regression loss is computed (Mean-Square loss), If | |
| <code>config.num_labels > 1</code> a classification loss is computed (Cross-Entropy).`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L980",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.SequenceClassifierOutput" | |
| >transformers.modeling_outputs.SequenceClassifierOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Classification (or regression if config.num_labels==1) loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.num_labels)</code>) — Classification (or regression if config.num_labels==1) scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.SequenceClassifierOutput" | |
| >transformers.modeling_outputs.SequenceClassifierOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),ae=new gt({props:{$$slots:{default:[ko]},$$scope:{ctx:k}}}),re=new Xe({props:{anchor:"transformers.IBertForSequenceClassification.forward.example",$$slots:{default:[vo]},$$scope:{ctx:k}}}),ie=new Xe({props:{anchor:"transformers.IBertForSequenceClassification.forward.example-2",$$slots:{default:[Io]},$$scope:{ctx:k}}}),Ue=new K({props:{title:"IBertForMultipleChoice",local:"transformers.IBertForMultipleChoice",headingTag:"h2"}}),ze=new V({props:{name:"class transformers.IBertForMultipleChoice",anchor:"transformers.IBertForMultipleChoice",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.IBertForMultipleChoice.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1055"}}),xe=new V({props:{name:"forward",anchor:"transformers.IBertForMultipleChoice.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertForMultipleChoice.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertForMultipleChoice.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertForMultipleChoice.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertForMultipleChoice.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertForMultipleChoice.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertForMultipleChoice.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertForMultipleChoice.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertForMultipleChoice.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertForMultipleChoice.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.IBertForMultipleChoice.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for computing the multiple choice classification loss. Indices should be in <code>[0, ..., num_choices-1]</code> where <code>num_choices</code> is the size of the second dimension of the input tensors. (See | |
| <code>input_ids</code> above)`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1073",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.MultipleChoiceModelOutput" | |
| >transformers.modeling_outputs.MultipleChoiceModelOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <em>(1,)</em>, <em>optional</em>, returned when <code>labels</code> is provided) — Classification loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices)</code>) — <em>num_choices</em> is the second dimension of the input tensors. (see <em>input_ids</em> above).</p> | |
| <p>Classification scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.MultipleChoiceModelOutput" | |
| >transformers.modeling_outputs.MultipleChoiceModelOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),le=new gt({props:{$$slots:{default:[Jo]},$$scope:{ctx:k}}}),de=new Xe({props:{anchor:"transformers.IBertForMultipleChoice.forward.example",$$slots:{default:[$o]},$$scope:{ctx:k}}}),Fe=new K({props:{title:"IBertForTokenClassification",local:"transformers.IBertForTokenClassification",headingTag:"h2"}}),Ze=new V({props:{name:"class transformers.IBertForTokenClassification",anchor:"transformers.IBertForTokenClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.IBertForTokenClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1145"}}),qe=new V({props:{name:"forward",anchor:"transformers.IBertForTokenClassification.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertForTokenClassification.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertForTokenClassification.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertForTokenClassification.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertForTokenClassification.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertForTokenClassification.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertForTokenClassification.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertForTokenClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertForTokenClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertForTokenClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.IBertForTokenClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Labels for computing the token classification loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1164",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.TokenClassifierOutput" | |
| >transformers.modeling_outputs.TokenClassifierOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Classification loss.</p> | |
| </li> | |
| <li> | |
| <p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.num_labels)</code>) — Classification scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.TokenClassifierOutput" | |
| >transformers.modeling_outputs.TokenClassifierOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),ce=new gt({props:{$$slots:{default:[Bo]},$$scope:{ctx:k}}}),pe=new Xe({props:{anchor:"transformers.IBertForTokenClassification.forward.example",$$slots:{default:[jo]},$$scope:{ctx:k}}}),We=new K({props:{title:"IBertForQuestionAnswering",local:"transformers.IBertForQuestionAnswering",headingTag:"h2"}}),Re=new V({props:{name:"class transformers.IBertForQuestionAnswering",anchor:"transformers.IBertForQuestionAnswering",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.IBertForQuestionAnswering.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig">IBertConfig</a>) — Model configuration class with all the parameters of the | |
| model. Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/pr_33111/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1242"}}),Ge=new V({props:{name:"forward",anchor:"transformers.IBertForQuestionAnswering.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"start_positions",val:": Optional = None"},{name:"end_positions",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.IBertForQuestionAnswering.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) — | |
| Indices of input sequence tokens in the vocabulary.</p> | |
| <p>Indices can be obtained using <a href="/docs/transformers/pr_33111/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and | |
| <a href="/docs/transformers/pr_33111/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p> | |
| <p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.IBertForQuestionAnswering.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for tokens that are <strong>not masked</strong>,</li> | |
| <li>0 for tokens that are <strong>masked</strong>.</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.IBertForQuestionAnswering.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>0 corresponds to a <em>sentence A</em> token,</li> | |
| <li>1 corresponds to a <em>sentence B</em> token.</li> | |
| </ul> | |
| <p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.IBertForQuestionAnswering.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) — | |
| Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p> | |
| <p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.IBertForQuestionAnswering.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) — | |
| Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 indicates the head is <strong>not masked</strong>,</li> | |
| <li>0 indicates the head is <strong>masked</strong>.</li> | |
| </ul>`,name:"head_mask"},{anchor:"transformers.IBertForQuestionAnswering.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This | |
| is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the | |
| model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.IBertForQuestionAnswering.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.IBertForQuestionAnswering.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.IBertForQuestionAnswering.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/pr_33111/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.IBertForQuestionAnswering.forward.start_positions",description:`<strong>start_positions</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for position (index) of the start of the labelled span for computing the token classification loss. | |
| Positions are clamped to the length of the sequence (<code>sequence_length</code>). Position outside of the sequence | |
| are not taken into account for computing the loss.`,name:"start_positions"},{anchor:"transformers.IBertForQuestionAnswering.forward.end_positions",description:`<strong>end_positions</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for position (index) of the end of the labelled span for computing the token classification loss. | |
| Positions are clamped to the length of the sequence (<code>sequence_length</code>). Position outside of the sequence | |
| are not taken into account for computing the loss.`,name:"end_positions"}],source:"https://github.com/huggingface/transformers/blob/vr_33111/src/transformers/models/ibert/modeling_ibert.py#L1260",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.QuestionAnsweringModelOutput" | |
| >transformers.modeling_outputs.QuestionAnsweringModelOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/pr_33111/en/model_doc/ibert#transformers.IBertConfig" | |
| >IBertConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.</p> | |
| </li> | |
| <li> | |
| <p><strong>start_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>) — Span-start scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>end_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>) — Span-end scores (before SoftMax).</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, + | |
| one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/pr_33111/en/main_classes/output#transformers.modeling_outputs.QuestionAnsweringModelOutput" | |
| >transformers.modeling_outputs.QuestionAnsweringModelOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),me=new gt({props:{$$slots:{default:[Co]},$$scope:{ctx:k}}}),ue=new Xe({props:{anchor:"transformers.IBertForQuestionAnswering.forward.example",$$slots:{default:[Uo]},$$scope:{ctx:k}}}),Ve=new bo({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/ibert.md"}}),{c(){t=c("meta"),y=a(),i=c("p"),d=a(),h(T.$$.fragment),n=a(),h(w.$$.fragment),bt=a(),he=c("p"),he.innerHTML=Fn,Mt=a(),fe=c("p"),fe.textContent=Zn,yt=a(),ge=c("p"),ge.innerHTML=qn,Tt=a(),_e=c("p"),_e.innerHTML=Wn,wt=a(),h(be.$$.fragment),kt=a(),Me=c("ul"),Me.innerHTML=Rn,vt=a(),h(ye.$$.fragment),It=a(),Q=c("div"),h(Te.$$.fragment),Xt=a(),Ee=c("p"),Ee.innerHTML=Gn,Nt=a(),He=c("p"),He.innerHTML=Vn,Jt=a(),h(we.$$.fragment),$t=a(),v=c("div"),h(ke.$$.fragment),Et=a(),Le=c("p"),Le.textContent=Xn,Ht=a(),Se=c("p"),Se.innerHTML=Nn,Lt=a(),Qe=c("p"),Qe.innerHTML=En,St=a(),Ye=c("p"),Ye.innerHTML=Hn,Qt=a(),N=c("div"),h(ve.$$.fragment),Yt=a(),Ae=c("p"),Ae.innerHTML=Ln,At=a(),h(te.$$.fragment),Pt=a(),h(ne.$$.fragment),Bt=a(),h(Ie.$$.fragment),jt=a(),I=c("div"),h(Je.$$.fragment),Ot=a(),Pe=c("p"),Pe.innerHTML=Sn,Dt=a(),Oe=c("p"),Oe.innerHTML=Qn,Kt=a(),De=c("p"),De.innerHTML=Yn,en=a(),E=c("div"),h($e.$$.fragment),tn=a(),Ke=c("p"),Ke.innerHTML=An,nn=a(),h(oe.$$.fragment),on=a(),h(se.$$.fragment),Ct=a(),h(Be.$$.fragment),Ut=a(),J=c("div"),h(je.$$.fragment),sn=a(),et=c("p"),et.textContent=Pn,an=a(),tt=c("p"),tt.innerHTML=On,rn=a(),nt=c("p"),nt.innerHTML=Dn,ln=a(),x=c("div"),h(Ce.$$.fragment),dn=a(),ot=c("p"),ot.innerHTML=Kn,cn=a(),h(ae.$$.fragment),pn=a(),h(re.$$.fragment),mn=a(),h(ie.$$.fragment),zt=a(),h(Ue.$$.fragment),xt=a(),$=c("div"),h(ze.$$.fragment),un=a(),st=c("p"),st.textContent=eo,hn=a(),at=c("p"),at.innerHTML=to,fn=a(),rt=c("p"),rt.innerHTML=no,gn=a(),H=c("div"),h(xe.$$.fragment),_n=a(),it=c("p"),it.innerHTML=oo,bn=a(),h(le.$$.fragment),Mn=a(),h(de.$$.fragment),Ft=a(),h(Fe.$$.fragment),Zt=a(),B=c("div"),h(Ze.$$.fragment),yn=a(),lt=c("p"),lt.textContent=so,Tn=a(),dt=c("p"),dt.innerHTML=ao,wn=a(),ct=c("p"),ct.innerHTML=ro,kn=a(),L=c("div"),h(qe.$$.fragment),vn=a(),pt=c("p"),pt.innerHTML=io,In=a(),h(ce.$$.fragment),Jn=a(),h(pe.$$.fragment),qt=a(),h(We.$$.fragment),Wt=a(),j=c("div"),h(Re.$$.fragment),$n=a(),mt=c("p"),mt.innerHTML=lo,Bn=a(),ut=c("p"),ut.innerHTML=co,jn=a(),ht=c("p"),ht.innerHTML=po,Cn=a(),S=c("div"),h(Ge.$$.fragment),Un=a(),ft=c("p"),ft.innerHTML=mo,zn=a(),h(me.$$.fragment),xn=a(),h(ue.$$.fragment),Rt=a(),h(Ve.$$.fragment),Gt=a(),_t=c("p"),this.h()},l(e){const o=_o("svelte-u9bgzb",document.head);t=p(o,"META",{name:!0,content:!0}),o.forEach(l),y=r(e),i=p(e,"P",{}),U(i).forEach(l),d=r(e),f(T.$$.fragment,e),n=r(e),f(w.$$.fragment,e),bt=r(e),he=p(e,"P",{"data-svelte-h":!0}),u(he)!=="svelte-ordmxb"&&(he.innerHTML=Fn),Mt=r(e),fe=p(e,"P",{"data-svelte-h":!0}),u(fe)!=="svelte-vfdo9a"&&(fe.textContent=Zn),yt=r(e),ge=p(e,"P",{"data-svelte-h":!0}),u(ge)!=="svelte-ykcqol"&&(ge.innerHTML=qn),Tt=r(e),_e=p(e,"P",{"data-svelte-h":!0}),u(_e)!=="svelte-1k4zbws"&&(_e.innerHTML=Wn),wt=r(e),f(be.$$.fragment,e),kt=r(e),Me=p(e,"UL",{"data-svelte-h":!0}),u(Me)!=="svelte-4aqgrt"&&(Me.innerHTML=Rn),vt=r(e),f(ye.$$.fragment,e),It=r(e),Q=p(e,"DIV",{class:!0});var ee=U(Q);f(Te.$$.fragment,ee),Xt=r(ee),Ee=p(ee,"P",{"data-svelte-h":!0}),u(Ee)!=="svelte-6jymeb"&&(Ee.innerHTML=Gn),Nt=r(ee),He=p(ee,"P",{"data-svelte-h":!0}),u(He)!=="svelte-5bofbr"&&(He.innerHTML=Vn),ee.forEach(l),Jt=r(e),f(we.$$.fragment,e),$t=r(e),v=p(e,"DIV",{class:!0});var C=U(v);f(ke.$$.fragment,C),Et=r(C),Le=p(C,"P",{"data-svelte-h":!0}),u(Le)!=="svelte-168s1r6"&&(Le.textContent=Xn),Ht=r(C),Se=p(C,"P",{"data-svelte-h":!0}),u(Se)!=="svelte-vl14cn"&&(Se.innerHTML=Nn),Lt=r(C),Qe=p(C,"P",{"data-svelte-h":!0}),u(Qe)!=="svelte-hswkmf"&&(Qe.innerHTML=En),St=r(C),Ye=p(C,"P",{"data-svelte-h":!0}),u(Ye)!=="svelte-1du13oj"&&(Ye.innerHTML=Hn),Qt=r(C),N=p(C,"DIV",{class:!0});var Y=U(N);f(ve.$$.fragment,Y),Yt=r(Y),Ae=p(Y,"P",{"data-svelte-h":!0}),u(Ae)!=="svelte-rybln8"&&(Ae.innerHTML=Ln),At=r(Y),f(te.$$.fragment,Y),Pt=r(Y),f(ne.$$.fragment,Y),Y.forEach(l),C.forEach(l),Bt=r(e),f(Ie.$$.fragment,e),jt=r(e),I=p(e,"DIV",{class:!0});var F=U(I);f(Je.$$.fragment,F),Ot=r(F),Pe=p(F,"P",{"data-svelte-h":!0}),u(Pe)!=="svelte-mirnql"&&(Pe.innerHTML=Sn),Dt=r(F),Oe=p(F,"P",{"data-svelte-h":!0}),u(Oe)!=="svelte-vl14cn"&&(Oe.innerHTML=Qn),Kt=r(F),De=p(F,"P",{"data-svelte-h":!0}),u(De)!=="svelte-hswkmf"&&(De.innerHTML=Yn),en=r(F),E=p(F,"DIV",{class:!0});var A=U(E);f($e.$$.fragment,A),tn=r(A),Ke=p(A,"P",{"data-svelte-h":!0}),u(Ke)!=="svelte-14us3zk"&&(Ke.innerHTML=An),nn=r(A),f(oe.$$.fragment,A),on=r(A),f(se.$$.fragment,A),A.forEach(l),F.forEach(l),Ct=r(e),f(Be.$$.fragment,e),Ut=r(e),J=p(e,"DIV",{class:!0});var Z=U(J);f(je.$$.fragment,Z),sn=r(Z),et=p(Z,"P",{"data-svelte-h":!0}),u(et)!=="svelte-d4a33x"&&(et.textContent=Pn),an=r(Z),tt=p(Z,"P",{"data-svelte-h":!0}),u(tt)!=="svelte-vl14cn"&&(tt.innerHTML=On),rn=r(Z),nt=p(Z,"P",{"data-svelte-h":!0}),u(nt)!=="svelte-hswkmf"&&(nt.innerHTML=Dn),ln=r(Z),x=p(Z,"DIV",{class:!0});var q=U(x);f(Ce.$$.fragment,q),dn=r(q),ot=p(q,"P",{"data-svelte-h":!0}),u(ot)!=="svelte-17sx4dq"&&(ot.innerHTML=Kn),cn=r(q),f(ae.$$.fragment,q),pn=r(q),f(re.$$.fragment,q),mn=r(q),f(ie.$$.fragment,q),q.forEach(l),Z.forEach(l),zt=r(e),f(Ue.$$.fragment,e),xt=r(e),$=p(e,"DIV",{class:!0});var W=U($);f(ze.$$.fragment,W),un=r(W),st=p(W,"P",{"data-svelte-h":!0}),u(st)!=="svelte-zjuftl"&&(st.textContent=eo),hn=r(W),at=p(W,"P",{"data-svelte-h":!0}),u(at)!=="svelte-vl14cn"&&(at.innerHTML=to),fn=r(W),rt=p(W,"P",{"data-svelte-h":!0}),u(rt)!=="svelte-hswkmf"&&(rt.innerHTML=no),gn=r(W),H=p(W,"DIV",{class:!0});var P=U(H);f(xe.$$.fragment,P),_n=r(P),it=p(P,"P",{"data-svelte-h":!0}),u(it)!=="svelte-16nmxvy"&&(it.innerHTML=oo),bn=r(P),f(le.$$.fragment,P),Mn=r(P),f(de.$$.fragment,P),P.forEach(l),W.forEach(l),Ft=r(e),f(Fe.$$.fragment,e),Zt=r(e),B=p(e,"DIV",{class:!0});var R=U(B);f(Ze.$$.fragment,R),yn=r(R),lt=p(R,"P",{"data-svelte-h":!0}),u(lt)!=="svelte-qboyo6"&&(lt.textContent=so),Tn=r(R),dt=p(R,"P",{"data-svelte-h":!0}),u(dt)!=="svelte-vl14cn"&&(dt.innerHTML=ao),wn=r(R),ct=p(R,"P",{"data-svelte-h":!0}),u(ct)!=="svelte-hswkmf"&&(ct.innerHTML=ro),kn=r(R),L=p(R,"DIV",{class:!0});var O=U(L);f(qe.$$.fragment,O),vn=r(O),pt=p(O,"P",{"data-svelte-h":!0}),u(pt)!=="svelte-11nrmwa"&&(pt.innerHTML=io),In=r(O),f(ce.$$.fragment,O),Jn=r(O),f(pe.$$.fragment,O),O.forEach(l),R.forEach(l),qt=r(e),f(We.$$.fragment,e),Wt=r(e),j=p(e,"DIV",{class:!0});var G=U(j);f(Re.$$.fragment,G),$n=r(G),mt=p(G,"P",{"data-svelte-h":!0}),u(mt)!=="svelte-nr1xe0"&&(mt.innerHTML=lo),Bn=r(G),ut=p(G,"P",{"data-svelte-h":!0}),u(ut)!=="svelte-vl14cn"&&(ut.innerHTML=co),jn=r(G),ht=p(G,"P",{"data-svelte-h":!0}),u(ht)!=="svelte-hswkmf"&&(ht.innerHTML=po),Cn=r(G),S=p(G,"DIV",{class:!0});var D=U(S);f(Ge.$$.fragment,D),Un=r(D),ft=p(D,"P",{"data-svelte-h":!0}),u(ft)!=="svelte-17jtula"&&(ft.innerHTML=mo),zn=r(D),f(me.$$.fragment,D),xn=r(D),f(ue.$$.fragment,D),D.forEach(l),G.forEach(l),Rt=r(e),f(Ve.$$.fragment,e),Gt=r(e),_t=p(e,"P",{}),U(_t).forEach(l),this.h()},h(){z(t,"name","hf:doc:metadata"),z(t,"content",xo),z(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),z(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){s(document.head,t),m(e,y,o),m(e,i,o),m(e,d,o),g(T,e,o),m(e,n,o),g(w,e,o),m(e,bt,o),m(e,he,o),m(e,Mt,o),m(e,fe,o),m(e,yt,o),m(e,ge,o),m(e,Tt,o),m(e,_e,o),m(e,wt,o),g(be,e,o),m(e,kt,o),m(e,Me,o),m(e,vt,o),g(ye,e,o),m(e,It,o),m(e,Q,o),g(Te,Q,null),s(Q,Xt),s(Q,Ee),s(Q,Nt),s(Q,He),m(e,Jt,o),g(we,e,o),m(e,$t,o),m(e,v,o),g(ke,v,null),s(v,Et),s(v,Le),s(v,Ht),s(v,Se),s(v,Lt),s(v,Qe),s(v,St),s(v,Ye),s(v,Qt),s(v,N),g(ve,N,null),s(N,Yt),s(N,Ae),s(N,At),g(te,N,null),s(N,Pt),g(ne,N,null),m(e,Bt,o),g(Ie,e,o),m(e,jt,o),m(e,I,o),g(Je,I,null),s(I,Ot),s(I,Pe),s(I,Dt),s(I,Oe),s(I,Kt),s(I,De),s(I,en),s(I,E),g($e,E,null),s(E,tn),s(E,Ke),s(E,nn),g(oe,E,null),s(E,on),g(se,E,null),m(e,Ct,o),g(Be,e,o),m(e,Ut,o),m(e,J,o),g(je,J,null),s(J,sn),s(J,et),s(J,an),s(J,tt),s(J,rn),s(J,nt),s(J,ln),s(J,x),g(Ce,x,null),s(x,dn),s(x,ot),s(x,cn),g(ae,x,null),s(x,pn),g(re,x,null),s(x,mn),g(ie,x,null),m(e,zt,o),g(Ue,e,o),m(e,xt,o),m(e,$,o),g(ze,$,null),s($,un),s($,st),s($,hn),s($,at),s($,fn),s($,rt),s($,gn),s($,H),g(xe,H,null),s(H,_n),s(H,it),s(H,bn),g(le,H,null),s(H,Mn),g(de,H,null),m(e,Ft,o),g(Fe,e,o),m(e,Zt,o),m(e,B,o),g(Ze,B,null),s(B,yn),s(B,lt),s(B,Tn),s(B,dt),s(B,wn),s(B,ct),s(B,kn),s(B,L),g(qe,L,null),s(L,vn),s(L,pt),s(L,In),g(ce,L,null),s(L,Jn),g(pe,L,null),m(e,qt,o),g(We,e,o),m(e,Wt,o),m(e,j,o),g(Re,j,null),s(j,$n),s(j,mt),s(j,Bn),s(j,ut),s(j,jn),s(j,ht),s(j,Cn),s(j,S),g(Ge,S,null),s(S,Un),s(S,ft),s(S,zn),g(me,S,null),s(S,xn),g(ue,S,null),m(e,Rt,o),g(Ve,e,o),m(e,Gt,o),m(e,_t,o),Vt=!0},p(e,[o]){const ee={};o&2&&(ee.$$scope={dirty:o,ctx:e}),te.$set(ee);const C={};o&2&&(C.$$scope={dirty:o,ctx:e}),ne.$set(C);const Y={};o&2&&(Y.$$scope={dirty:o,ctx:e}),oe.$set(Y);const F={};o&2&&(F.$$scope={dirty:o,ctx:e}),se.$set(F);const A={};o&2&&(A.$$scope={dirty:o,ctx:e}),ae.$set(A);const Z={};o&2&&(Z.$$scope={dirty:o,ctx:e}),re.$set(Z);const q={};o&2&&(q.$$scope={dirty:o,ctx:e}),ie.$set(q);const W={};o&2&&(W.$$scope={dirty:o,ctx:e}),le.$set(W);const P={};o&2&&(P.$$scope={dirty:o,ctx:e}),de.$set(P);const R={};o&2&&(R.$$scope={dirty:o,ctx:e}),ce.$set(R);const O={};o&2&&(O.$$scope={dirty:o,ctx:e}),pe.$set(O);const G={};o&2&&(G.$$scope={dirty:o,ctx:e}),me.$set(G);const D={};o&2&&(D.$$scope={dirty:o,ctx:e}),ue.$set(D)},i(e){Vt||(_(T.$$.fragment,e),_(w.$$.fragment,e),_(be.$$.fragment,e),_(ye.$$.fragment,e),_(Te.$$.fragment,e),_(we.$$.fragment,e),_(ke.$$.fragment,e),_(ve.$$.fragment,e),_(te.$$.fragment,e),_(ne.$$.fragment,e),_(Ie.$$.fragment,e),_(Je.$$.fragment,e),_($e.$$.fragment,e),_(oe.$$.fragment,e),_(se.$$.fragment,e),_(Be.$$.fragment,e),_(je.$$.fragment,e),_(Ce.$$.fragment,e),_(ae.$$.fragment,e),_(re.$$.fragment,e),_(ie.$$.fragment,e),_(Ue.$$.fragment,e),_(ze.$$.fragment,e),_(xe.$$.fragment,e),_(le.$$.fragment,e),_(de.$$.fragment,e),_(Fe.$$.fragment,e),_(Ze.$$.fragment,e),_(qe.$$.fragment,e),_(ce.$$.fragment,e),_(pe.$$.fragment,e),_(We.$$.fragment,e),_(Re.$$.fragment,e),_(Ge.$$.fragment,e),_(me.$$.fragment,e),_(ue.$$.fragment,e),_(Ve.$$.fragment,e),Vt=!0)},o(e){b(T.$$.fragment,e),b(w.$$.fragment,e),b(be.$$.fragment,e),b(ye.$$.fragment,e),b(Te.$$.fragment,e),b(we.$$.fragment,e),b(ke.$$.fragment,e),b(ve.$$.fragment,e),b(te.$$.fragment,e),b(ne.$$.fragment,e),b(Ie.$$.fragment,e),b(Je.$$.fragment,e),b($e.$$.fragment,e),b(oe.$$.fragment,e),b(se.$$.fragment,e),b(Be.$$.fragment,e),b(je.$$.fragment,e),b(Ce.$$.fragment,e),b(ae.$$.fragment,e),b(re.$$.fragment,e),b(ie.$$.fragment,e),b(Ue.$$.fragment,e),b(ze.$$.fragment,e),b(xe.$$.fragment,e),b(le.$$.fragment,e),b(de.$$.fragment,e),b(Fe.$$.fragment,e),b(Ze.$$.fragment,e),b(qe.$$.fragment,e),b(ce.$$.fragment,e),b(pe.$$.fragment,e),b(We.$$.fragment,e),b(Re.$$.fragment,e),b(Ge.$$.fragment,e),b(me.$$.fragment,e),b(ue.$$.fragment,e),b(Ve.$$.fragment,e),Vt=!1},d(e){e&&(l(y),l(i),l(d),l(n),l(bt),l(he),l(Mt),l(fe),l(yt),l(ge),l(Tt),l(_e),l(wt),l(kt),l(Me),l(vt),l(It),l(Q),l(Jt),l($t),l(v),l(Bt),l(jt),l(I),l(Ct),l(Ut),l(J),l(zt),l(xt),l($),l(Ft),l(Zt),l(B),l(qt),l(Wt),l(j),l(Rt),l(Gt),l(_t)),l(t),M(T,e),M(w,e),M(be,e),M(ye,e),M(Te),M(we,e),M(ke),M(ve),M(te),M(ne),M(Ie,e),M(Je),M($e),M(oe),M(se),M(Be,e),M(je),M(Ce),M(ae),M(re),M(ie),M(Ue,e),M(ze),M(xe),M(le),M(de),M(Fe,e),M(Ze),M(qe),M(ce),M(pe),M(We,e),M(Re),M(Ge),M(me),M(ue),M(Ve,e)}}}const xo='{"title":"I-BERT","local":"i-bert","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2},{"title":"IBertConfig","local":"transformers.IBertConfig","sections":[],"depth":2},{"title":"IBertModel","local":"transformers.IBertModel","sections":[],"depth":2},{"title":"IBertForMaskedLM","local":"transformers.IBertForMaskedLM","sections":[],"depth":2},{"title":"IBertForSequenceClassification","local":"transformers.IBertForSequenceClassification","sections":[],"depth":2},{"title":"IBertForMultipleChoice","local":"transformers.IBertForMultipleChoice","sections":[],"depth":2},{"title":"IBertForTokenClassification","local":"transformers.IBertForTokenClassification","sections":[],"depth":2},{"title":"IBertForQuestionAnswering","local":"transformers.IBertForQuestionAnswering","sections":[],"depth":2}],"depth":1}';function Fo(k){return ho(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class No extends fo{constructor(t){super(),go(this,t,Fo,zo,uo,{})}}export{No as component}; | |
Xet Storage Details
- Size:
- 117 kB
- Xet hash:
- c8d10a2d579168bf84f4b7c6c8955ff815b8db7ba8b7f788e8546288e57c6bf2
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.