Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / transformers /pr_33913 /en /_app /immutable /nodes /136.b4dfd864.js

rtrm's picture

about 2 months ago

159 kB

	import{s as Ys,o as Ks,n as F}from"../chunks/scheduler.25b97de1.js";import{S as er,i as tr,g as l,s,r as u,A as nr,h as c,f as i,c as r,j as v,u as f,x as h,k as $,y as n,a as p,v as g,d as _,t as b,w as y}from"../chunks/index.d9030fc9.js";import{T as me}from"../chunks/Tip.baa67368.js";import{D as E}from"../chunks/Docstring.ffac8efa.js";import{C as Ce}from"../chunks/CodeBlock.e6cd0d95.js";import{E as yt}from"../chunks/ExampleCodeBlock.22dfe688.js";import{H as C,E as or}from"../chunks/EditOnGithub.91d95064.js";function sr(w){let t,k="Examples:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEVybmllQ29uZmlnJTJDJTIwRXJuaWVNb2RlbCUwQSUwQSUyMyUyMEluaXRpYWxpemluZyUyMGElMjBFUk5JRSUyMG5naHV5b25nJTJGZXJuaWUtMy4wLWJhc2UtemglMjBzdHlsZSUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwRXJuaWVDb25maWcoKSUwQSUwQSUyMyUyMEluaXRpYWxpemluZyUyMGElMjBtb2RlbCUyMCh3aXRoJTIwcmFuZG9tJTIwd2VpZ2h0cyklMjBmcm9tJTIwdGhlJTIwbmdodXlvbmclMkZlcm5pZS0zLjAtYmFzZS16aCUyMHN0eWxlJTIwY29uZmlndXJhdGlvbiUwQW1vZGVsJTIwJTNEJTIwRXJuaWVNb2RlbChjb25maWd1cmF0aW9uKSUwQSUwQSUyMyUyMEFjY2Vzc2luZyUyMHRoZSUyMG1vZGVsJTIwY29uZmlndXJhdGlvbiUwQWNvbmZpZ3VyYXRpb24lMjAlM0QlMjBtb2RlbC5jb25maWc=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> ErnieConfig, ErnieModel

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a ERNIE nghuyong/ernie-3.0-base-zh style configuration</span>
	<span class="hljs-meta">>>> </span>configuration = ErnieConfig()

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a model (with random weights) from the nghuyong/ernie-3.0-base-zh style configuration</span>
	<span class="hljs-meta">>>> </span>model = ErnieModel(configuration)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Accessing the model configuration</span>
	<span class="hljs-meta">>>> </span>configuration = model.config`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-kvfsh7"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function rr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function ar(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBFcm5pZU1vZGVsJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwRXJuaWVNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIybmdodXlvbmclMkZlcm5pZS0xLjAtYmFzZS16aCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIySGVsbG8lMkMlMjBteSUyMGRvZyUyMGlzJTIwY3V0ZSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKSUwQSUwQWxhc3RfaGlkZGVuX3N0YXRlcyUyMCUzRCUyMG91dHB1dHMubGFzdF9oaWRkZW5fc3RhdGU=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieModel
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieModel.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>)
	<span class="hljs-meta">>>> </span>outputs = model(**inputs)

	<span class="hljs-meta">>>> </span>last_hidden_states = outputs.last_hidden_state`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function ir(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function dr(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBFcm5pZUZvclByZVRyYWluaW5nJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwRXJuaWVGb3JQcmVUcmFpbmluZy5mcm9tX3ByZXRyYWluZWQoJTIybmdodXlvbmclMkZlcm5pZS0xLjAtYmFzZS16aCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIySGVsbG8lMkMlMjBteSUyMGRvZyUyMGlzJTIwY3V0ZSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKSUwQSUwQXByZWRpY3Rpb25fbG9naXRzJTIwJTNEJTIwb3V0cHV0cy5wcmVkaWN0aW9uX2xvZ2l0cyUwQXNlcV9yZWxhdGlvbnNoaXBfbG9naXRzJTIwJTNEJTIwb3V0cHV0cy5zZXFfcmVsYXRpb25zaGlwX2xvZ2l0cw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieForPreTraining
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieForPreTraining.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>)
	<span class="hljs-meta">>>> </span>outputs = model(**inputs)

	<span class="hljs-meta">>>> </span>prediction_logits = outputs.prediction_logits
	<span class="hljs-meta">>>> </span>seq_relationship_logits = outputs.seq_relationship_logits`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function lr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function cr(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEVybmllRm9yQ2F1c2FsTE0lMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwRXJuaWVGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIybmdodXlvbmclMkZlcm5pZS0xLjAtYmFzZS16aCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIySGVsbG8lMkMlMjBteSUyMGRvZyUyMGlzJTIwY3V0ZSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzJTJDJTIwbGFiZWxzJTNEaW5wdXRzJTVCJTIyaW5wdXRfaWRzJTIyJTVEKSUwQWxvc3MlMjAlM0QlMjBvdXRwdXRzLmxvc3MlMEFsb2dpdHMlMjAlM0QlMjBvdXRwdXRzLmxvZ2l0cw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieForCausalLM

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieForCausalLM.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"Hello, my dog is cute"</span>, return_tensors=<span class="hljs-string">"pt"</span>)
	<span class="hljs-meta">>>> </span>outputs = model(**inputs, labels=inputs[<span class="hljs-string">"input_ids"</span>])
	<span class="hljs-meta">>>> </span>loss = outputs.loss
	<span class="hljs-meta">>>> </span>logits = outputs.logits`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function pr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function mr(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBFcm5pZUZvck1hc2tlZExNJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwRXJuaWVGb3JNYXNrZWRMTS5mcm9tX3ByZXRyYWluZWQoJTIybmdodXlvbmclMkZlcm5pZS0xLjAtYmFzZS16aCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIyVGhlJTIwY2FwaXRhbCUyMG9mJTIwRnJhbmNlJTIwaXMlMjAlNUJNQVNLJTVELiUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMGxvZ2l0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKS5sb2dpdHMlMEElMEElMjMlMjByZXRyaWV2ZSUyMGluZGV4JTIwb2YlMjAlNUJNQVNLJTVEJTBBbWFza190b2tlbl9pbmRleCUyMCUzRCUyMChpbnB1dHMuaW5wdXRfaWRzJTIwJTNEJTNEJTIwdG9rZW5pemVyLm1hc2tfdG9rZW5faWQpJTVCMCU1RC5ub256ZXJvKGFzX3R1cGxlJTNEVHJ1ZSklNUIwJTVEJTBBJTBBcHJlZGljdGVkX3Rva2VuX2lkJTIwJTNEJTIwbG9naXRzJTVCMCUyQyUyMG1hc2tfdG9rZW5faW5kZXglNUQuYXJnbWF4KGF4aXMlM0QtMSklMEF0b2tlbml6ZXIuZGVjb2RlKHByZWRpY3RlZF90b2tlbl9pZCklMEElMEFsYWJlbHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIyVGhlJTIwY2FwaXRhbCUyMG9mJTIwRnJhbmNlJTIwaXMlMjBQYXJpcy4lMjIlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSU1QiUyMmlucHV0X2lkcyUyMiU1RCUwQSUyMyUyMG1hc2slMjBsYWJlbHMlMjBvZiUyMG5vbi0lNUJNQVNLJTVEJTIwdG9rZW5zJTBBbGFiZWxzJTIwJTNEJTIwdG9yY2gud2hlcmUoaW5wdXRzLmlucHV0X2lkcyUyMCUzRCUzRCUyMHRva2VuaXplci5tYXNrX3Rva2VuX2lkJTJDJTIwbGFiZWxzJTJDJTIwLTEwMCklMEElMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMlMkMlMjBsYWJlbHMlM0RsYWJlbHMpJTBBcm91bmQob3V0cHV0cy5sb3NzLml0ZW0oKSUyQyUyMDIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieForMaskedLM
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieForMaskedLM.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>inputs = tokenizer(<span class="hljs-string">"The capital of France is [MASK]."</span>, return_tensors=<span class="hljs-string">"pt"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad():
	<span class="hljs-meta">... </span> logits = model(**inputs).logits

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># retrieve index of [MASK]</span>
	<span class="hljs-meta">>>> </span>mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[<span class="hljs-number">0</span>].nonzero(as_tuple=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]

	<span class="hljs-meta">>>> </span>predicted_token_id = logits[<span class="hljs-number">0</span>, mask_token_index].argmax(axis=-<span class="hljs-number">1</span>)
	<span class="hljs-meta">>>> </span>tokenizer.decode(predicted_token_id)
	<span class="hljs-string">'paris'</span>

	<span class="hljs-meta">>>> </span>labels = tokenizer(<span class="hljs-string">"The capital of France is Paris."</span>, return_tensors=<span class="hljs-string">"pt"</span>)[<span class="hljs-string">"input_ids"</span>]
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># mask labels of non-[MASK] tokens</span>
	<span class="hljs-meta">>>> </span>labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -<span class="hljs-number">100</span>)

	<span class="hljs-meta">>>> </span>outputs = model(**inputs, labels=labels)
	<span class="hljs-meta">>>> </span><span class="hljs-built_in">round</span>(outputs.loss.item(), <span class="hljs-number">2</span>)
	<span class="hljs-number">0.88</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function hr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function ur(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBFcm5pZUZvck5leHRTZW50ZW5jZVByZWRpY3Rpb24lMEFpbXBvcnQlMjB0b3JjaCUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMm5naHV5b25nJTJGZXJuaWUtMS4wLWJhc2UtemglMjIpJTBBbW9kZWwlMjAlM0QlMjBFcm5pZUZvck5leHRTZW50ZW5jZVByZWRpY3Rpb24uZnJvbV9wcmV0cmFpbmVkKCUyMm5naHV5b25nJTJGZXJuaWUtMS4wLWJhc2UtemglMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIySW4lMjBJdGFseSUyQyUyMHBpenphJTIwc2VydmVkJTIwaW4lMjBmb3JtYWwlMjBzZXR0aW5ncyUyQyUyMHN1Y2glMjBhcyUyMGF0JTIwYSUyMHJlc3RhdXJhbnQlMkMlMjBpcyUyMHByZXNlbnRlZCUyMHVuc2xpY2VkLiUyMiUwQW5leHRfc2VudGVuY2UlMjAlM0QlMjAlMjJUaGUlMjBza3klMjBpcyUyMGJsdWUlMjBkdWUlMjB0byUyMHRoZSUyMHNob3J0ZXIlMjB3YXZlbGVuZ3RoJTIwb2YlMjBibHVlJTIwbGlnaHQuJTIyJTBBZW5jb2RpbmclMjAlM0QlMjB0b2tlbml6ZXIocHJvbXB0JTJDJTIwbmV4dF9zZW50ZW5jZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqZW5jb2RpbmclMkMlMjBsYWJlbHMlM0R0b3JjaC5Mb25nVGVuc29yKCU1QjElNUQpKSUwQWxvZ2l0cyUyMCUzRCUyMG91dHB1dHMubG9naXRzJTBBYXNzZXJ0JTIwbG9naXRzJTVCMCUyQyUyMDAlNUQlMjAlM0MlMjBsb2dpdHMlNUIwJTJDJTIwMSU1RCUyMCUyMCUyMyUyMG5leHQlMjBzZW50ZW5jZSUyMHdhcyUyMHJhbmRvbQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieForNextSentencePrediction
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieForNextSentencePrediction.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."</span>
	<span class="hljs-meta">>>> </span>next_sentence = <span class="hljs-string">"The sky is blue due to the shorter wavelength of blue light."</span>
	<span class="hljs-meta">>>> </span>encoding = tokenizer(prompt, next_sentence, return_tensors=<span class="hljs-string">"pt"</span>)

	<span class="hljs-meta">>>> </span>outputs = model(**encoding, labels=torch.LongTensor([<span class="hljs-number">1</span>]))
	<span class="hljs-meta">>>> </span>logits = outputs.logits
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">assert</span> logits[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>] < logits[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>] <span class="hljs-comment"># next sentence was random</span>`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function fr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function gr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function _r(w){let t,k="Example:",d,m,T;return m=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBFcm5pZUZvck11bHRpcGxlQ2hvaWNlJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwRXJuaWVGb3JNdWx0aXBsZUNob2ljZS5mcm9tX3ByZXRyYWluZWQoJTIybmdodXlvbmclMkZlcm5pZS0xLjAtYmFzZS16aCUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJJbiUyMEl0YWx5JTJDJTIwcGl6emElMjBzZXJ2ZWQlMjBpbiUyMGZvcm1hbCUyMHNldHRpbmdzJTJDJTIwc3VjaCUyMGFzJTIwYXQlMjBhJTIwcmVzdGF1cmFudCUyQyUyMGlzJTIwcHJlc2VudGVkJTIwdW5zbGljZWQuJTIyJTBBY2hvaWNlMCUyMCUzRCUyMCUyMkl0JTIwaXMlMjBlYXRlbiUyMHdpdGglMjBhJTIwZm9yayUyMGFuZCUyMGElMjBrbmlmZS4lMjIlMEFjaG9pY2UxJTIwJTNEJTIwJTIySXQlMjBpcyUyMGVhdGVuJTIwd2hpbGUlMjBoZWxkJTIwaW4lMjB0aGUlMjBoYW5kLiUyMiUwQWxhYmVscyUyMCUzRCUyMHRvcmNoLnRlbnNvcigwKS51bnNxdWVlemUoMCklMjAlMjAlMjMlMjBjaG9pY2UwJTIwaXMlMjBjb3JyZWN0JTIwKGFjY29yZGluZyUyMHRvJTIwV2lraXBlZGlhJTIwJTNCKSklMkMlMjBiYXRjaCUyMHNpemUlMjAxJTBBJTBBZW5jb2RpbmclMjAlM0QlMjB0b2tlbml6ZXIoJTVCcHJvbXB0JTJDJTIwcHJvbXB0JTVEJTJDJTIwJTVCY2hvaWNlMCUyQyUyMGNob2ljZTElNUQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyJTJDJTIwcGFkZGluZyUzRFRydWUpJTBBb3V0cHV0cyUyMCUzRCUyMG1vZGVsKCoqJTdCayUzQSUyMHYudW5zcXVlZXplKDApJTIwZm9yJTIwayUyQyUyMHYlMjBpbiUyMGVuY29kaW5nLml0ZW1zKCklN0QlMkMlMjBsYWJlbHMlM0RsYWJlbHMpJTIwJTIwJTIzJTIwYmF0Y2glMjBzaXplJTIwaXMlMjAxJTBBJTBBJTIzJTIwdGhlJTIwbGluZWFyJTIwY2xhc3NpZmllciUyMHN0aWxsJTIwbmVlZHMlMjB0byUyMGJlJTIwdHJhaW5lZCUwQWxvc3MlMjAlM0QlMjBvdXRwdXRzLmxvc3MlMEFsb2dpdHMlMjAlM0QlMjBvdXRwdXRzLmxvZ2l0cw==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, ErnieForMultipleChoice
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	<span class="hljs-meta">>>> </span>model = ErnieForMultipleChoice.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)

	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."</span>
	<span class="hljs-meta">>>> </span>choice0 = <span class="hljs-string">"It is eaten with a fork and a knife."</span>
	<span class="hljs-meta">>>> </span>choice1 = <span class="hljs-string">"It is eaten while held in the hand."</span>
	<span class="hljs-meta">>>> </span>labels = torch.tensor(<span class="hljs-number">0</span>).unsqueeze(<span class="hljs-number">0</span>) <span class="hljs-comment"># choice0 is correct (according to Wikipedia ;)), batch size 1</span>

	<span class="hljs-meta">>>> </span>encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors=<span class="hljs-string">"pt"</span>, padding=<span class="hljs-literal">True</span>)
	<span class="hljs-meta">>>> </span>outputs = model(**{k: v.unsqueeze(<span class="hljs-number">0</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> encoding.items()}, labels=labels) <span class="hljs-comment"># batch size is 1</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># the linear classifier still needs to be trained</span>
	<span class="hljs-meta">>>> </span>loss = outputs.loss
	<span class="hljs-meta">>>> </span>logits = outputs.logits`,wrap:!1}}),{c(){t=l("p"),t.textContent=k,d=s(),u(m.$$.fragment)},l(a){t=c(a,"P",{"data-svelte-h":!0}),h(t)!=="svelte-11lpom8"&&(t.textContent=k),d=r(a),f(m.$$.fragment,a)},m(a,M){p(a,t,M),p(a,d,M),g(m,a,M),T=!0},p:F,i(a){T\|\|(_(m.$$.fragment,a),T=!0)},o(a){b(m.$$.fragment,a),T=!1},d(a){a&&(i(t),i(d)),y(m,a)}}}function br(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function yr(w){let t,k=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){t=l("p"),t.innerHTML=k},l(d){t=c(d,"P",{"data-svelte-h":!0}),h(t)!=="svelte-fincs2"&&(t.innerHTML=k)},m(d,m){p(d,t,m)},p:F,d(d){d&&i(t)}}}function kr(w){let t,k,d,m,T,a,M,pn,Fe,rs=`ERNIE is a series of powerful models proposed by baidu, especially in Chinese tasks,
	including <a href="https://arxiv.org/abs/1904.09223" rel="nofollow">ERNIE1.0</a>, <a href="https://ojs.aaai.org/index.php/AAAI/article/view/6428" rel="nofollow">ERNIE2.0</a>,
	<a href="https://arxiv.org/abs/2107.02137" rel="nofollow">ERNIE3.0</a>, <a href="https://arxiv.org/abs/2010.12148" rel="nofollow">ERNIE-Gram</a>, <a href="https://arxiv.org/abs/2110.07244" rel="nofollow">ERNIE-health</a>, etc.`,mn,Je,as='These models are contributed by <a href="https://huggingface.co/nghuyong" rel="nofollow">nghuyong</a> and the official code can be found in <a href="https://github.com/PaddlePaddle/PaddleNLP" rel="nofollow">PaddleNLP</a> (in PaddlePaddle).',hn,je,un,Le,is="Take <code>ernie-1.0-base-zh</code> as an example:",fn,Ie,gn,qe,_n,Ue,ds='<thead><tr><th align="center">Model Name</th> <th align="center">Language</th> <th align="center">Description</th></tr></thead> <tbody><tr><td align="center">ernie-1.0-base-zh</td> <td align="center">Chinese</td> <td align="center">Layer:12, Heads:12, Hidden:768</td></tr> <tr><td align="center">ernie-2.0-base-en</td> <td align="center">English</td> <td align="center">Layer:12, Heads:12, Hidden:768</td></tr> <tr><td align="center">ernie-2.0-large-en</td> <td align="center">English</td> <td align="center">Layer:24, Heads:16, Hidden:1024</td></tr> <tr><td align="center">ernie-3.0-base-zh</td> <td align="center">Chinese</td> <td align="center">Layer:12, Heads:12, Hidden:768</td></tr> <tr><td align="center">ernie-3.0-medium-zh</td> <td align="center">Chinese</td> <td align="center">Layer:6, Heads:12, Hidden:768</td></tr> <tr><td align="center">ernie-3.0-mini-zh</td> <td align="center">Chinese</td> <td align="center">Layer:6, Heads:12, Hidden:384</td></tr> <tr><td align="center">ernie-3.0-micro-zh</td> <td align="center">Chinese</td> <td align="center">Layer:4, Heads:12, Hidden:384</td></tr> <tr><td align="center">ernie-3.0-nano-zh</td> <td align="center">Chinese</td> <td align="center">Layer:4, Heads:12, Hidden:312</td></tr> <tr><td align="center">ernie-health-zh</td> <td align="center">Chinese</td> <td align="center">Layer:12, Heads:12, Hidden:768</td></tr> <tr><td align="center">ernie-gram-zh</td> <td align="center">Chinese</td> <td align="center">Layer:12, Heads:12, Hidden:768</td></tr></tbody>',bn,We,ls=`You can find all the supported models from huggingface’s model hub: <a href="https://huggingface.co/nghuyong" rel="nofollow">huggingface.co/nghuyong</a>, and model details from paddle’s official
	repo: <a href="https://paddlenlp.readthedocs.io/zh/latest/model_zoo/transformers/ERNIE/contents.html" rel="nofollow">PaddleNLP</a>
	and <a href="https://github.com/PaddlePaddle/ERNIE/blob/repro" rel="nofollow">ERNIE</a>.`,yn,Pe,kn,Ne,cs='<li><a href="../tasks/sequence_classification">Text classification task guide</a></li> <li><a href="../tasks/token_classification">Token classification task guide</a></li> <li><a href="../tasks/question_answering">Question answering task guide</a></li> <li><a href="../tasks/language_modeling">Causal language modeling task guide</a></li> <li><a href="../tasks/masked_language_modeling">Masked language modeling task guide</a></li> <li><a href="../tasks/multiple_choice">Multiple choice task guide</a></li>',Tn,He,Mn,N,Ze,Rn,kt,ps=`This is the configuration class to store the configuration of a <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieModel">ErnieModel</a> or a <code>TFErnieModel</code>. It is used to
	instantiate a ERNIE model according to the specified arguments, defining the model architecture. Instantiating a
	configuration with the defaults will yield a similar configuration to that of the ERNIE
	<a href="https://huggingface.co/nghuyong/ernie-3.0-base-zh" rel="nofollow">nghuyong/ernie-3.0-base-zh</a> architecture.`,Xn,Tt,ms=`Configuration objects inherit from <a href="/docs/transformers/pr_33913/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> and can be used to control the model outputs. Read the
	documentation from <a href="/docs/transformers/pr_33913/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> for more information.`,Gn,he,wn,Be,vn,ce,Se,An,Mt,hs='Output type of <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForPreTraining">ErnieForPreTraining</a>.',$n,Ve,En,x,Oe,Qn,wt,us="The bare Ernie Model transformer outputting raw hidden-states without any specific head on top.",Dn,vt,fs=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Yn,$t,gs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,Kn,Et,_s=`The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
	cross-attention is added between the self-attention layers, following the architecture described in <a href="https://arxiv.org/abs/1706.03762" rel="nofollow">Attention is
	all you need</a> by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
	Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.`,eo,xt,bs=`To behave as an decoder the model needs to be initialized with the <code>is_decoder</code> argument of the configuration set
	to <code>True</code>. To be used in a Seq2Seq model, the model needs to initialized with both <code>is_decoder</code> argument and
	<code>add_cross_attention</code> set to <code>True</code>; an <code>encoder_hidden_states</code> is then expected as an input to the forward pass.`,to,R,Re,no,zt,ys='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieModel">ErnieModel</a> forward method, overrides the <code>__call__</code> special method.',oo,ue,so,fe,xn,Xe,zn,J,Ge,ro,Ct,ks="Ernie Model with two heads on top as done during the pretraining: a <code>masked language modeling</code> head and a <code>next sentence prediction (classification)</code> head.",ao,Ft,Ts=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,io,Jt,Ms=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,lo,X,Ae,co,jt,ws='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForPreTraining">ErnieForPreTraining</a> forward method, overrides the <code>__call__</code> special method.',po,ge,mo,_e,Cn,Qe,Fn,j,De,ho,Lt,vs="Ernie Model with a <code>language modeling</code> head on top for CLM fine-tuning.",uo,It,$s=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,fo,qt,Es=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,go,G,Ye,_o,Ut,xs='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForCausalLM">ErnieForCausalLM</a> forward method, overrides the <code>__call__</code> special method.',bo,be,yo,ye,Jn,Ke,jn,L,et,ko,Wt,zs="Ernie Model with a <code>language modeling</code> head on top.",To,Pt,Cs=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Mo,Nt,Fs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,wo,A,tt,vo,Ht,Js='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForMaskedLM">ErnieForMaskedLM</a> forward method, overrides the <code>__call__</code> special method.',$o,ke,Eo,Te,Ln,nt,In,I,ot,xo,Zt,js="Ernie Model with a <code>next sentence prediction (classification)</code> head on top.",zo,Bt,Ls=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Co,St,Is=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,Fo,Q,st,Jo,Vt,qs='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForNextSentencePrediction">ErnieForNextSentencePrediction</a> forward method, overrides the <code>__call__</code> special method.',jo,Me,Lo,we,qn,rt,Un,q,at,Io,Ot,Us=`Ernie Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
	output) e.g. for GLUE tasks.`,qo,Rt,Ws=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Uo,Xt,Ps=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,Wo,re,it,Po,Gt,Ns='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForSequenceClassification">ErnieForSequenceClassification</a> forward method, overrides the <code>__call__</code> special method.',No,ve,Wn,dt,Pn,U,lt,Ho,At,Hs=`Ernie Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
	softmax) e.g. for RocStories/SWAG tasks.`,Zo,Qt,Zs=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Bo,Dt,Bs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,So,D,ct,Vo,Yt,Ss='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForMultipleChoice">ErnieForMultipleChoice</a> forward method, overrides the <code>__call__</code> special method.',Oo,$e,Ro,Ee,Nn,pt,Hn,W,mt,Xo,Kt,Vs=`Ernie Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
	Named-Entity-Recognition (NER) tasks.`,Go,en,Os=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,Ao,tn,Rs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,Qo,ae,ht,Do,nn,Xs='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForTokenClassification">ErnieForTokenClassification</a> forward method, overrides the <code>__call__</code> special method.',Yo,xe,Zn,ut,Bn,P,ft,Ko,on,Gs=`Ernie Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
	layers on top of the hidden-states output to compute <code>span start logits</code> and <code>span end logits</code>).`,es,sn,As=`This model inherits from <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,ts,rn,Qs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,ns,ie,gt,os,an,Ds='The <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieForQuestionAnswering">ErnieForQuestionAnswering</a> forward method, overrides the <code>__call__</code> special method.',ss,ze,Sn,_t,Vn,cn,On;return T=new C({props:{title:"ERNIE",local:"ernie",headingTag:"h1"}}),M=new C({props:{title:"Overview",local:"overview",headingTag:"h2"}}),je=new C({props:{title:"Usage example",local:"usage-example",headingTag:"h3"}}),Ie=new Ce({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBBdXRvTW9kZWwlMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsLmZyb21fcHJldHJhaW5lZCglMjJuZ2h1eW9uZyUyRmVybmllLTEuMC1iYXNlLXpoJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModel
	tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)
	model = AutoModel.from_pretrained(<span class="hljs-string">"nghuyong/ernie-1.0-base-zh"</span>)`,wrap:!1}}),qe=new C({props:{title:"Model checkpoints",local:"model-checkpoints",headingTag:"h3"}}),Pe=new C({props:{title:"Resources",local:"resources",headingTag:"h2"}}),He=new C({props:{title:"ErnieConfig",local:"transformers.ErnieConfig",headingTag:"h2"}}),Ze=new E({props:{name:"class transformers.ErnieConfig",anchor:"transformers.ErnieConfig",parameters:[{name:"vocab_size",val:" = 30522"},{name:"hidden_size",val:" = 768"},{name:"num_hidden_layers",val:" = 12"},{name:"num_attention_heads",val:" = 12"},{name:"intermediate_size",val:" = 3072"},{name:"hidden_act",val:" = 'gelu'"},{name:"hidden_dropout_prob",val:" = 0.1"},{name:"attention_probs_dropout_prob",val:" = 0.1"},{name:"max_position_embeddings",val:" = 512"},{name:"type_vocab_size",val:" = 2"},{name:"task_type_vocab_size",val:" = 3"},{name:"use_task_id",val:" = False"},{name:"initializer_range",val:" = 0.02"},{name:"layer_norm_eps",val:" = 1e-12"},{name:"pad_token_id",val:" = 0"},{name:"position_embedding_type",val:" = 'absolute'"},{name:"use_cache",val:" = True"},{name:"classifier_dropout",val:" = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.ErnieConfig.vocab_size",description:`<strong>vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 30522) —
	Vocabulary size of the ERNIE model. Defines the number of different tokens that can be represented by the
	<code>inputs_ids</code> passed when calling <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieModel">ErnieModel</a> or <code>TFErnieModel</code>.`,name:"vocab_size"},{anchor:"transformers.ErnieConfig.hidden_size",description:`<strong>hidden_size</strong> (<code>int</code>, <em>optional</em>, defaults to 768) —
	Dimensionality of the encoder layers and the pooler layer.`,name:"hidden_size"},{anchor:"transformers.ErnieConfig.num_hidden_layers",description:`<strong>num_hidden_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 12) —
	Number of hidden layers in the Transformer encoder.`,name:"num_hidden_layers"},{anchor:"transformers.ErnieConfig.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 12) —
	Number of attention heads for each attention layer in the Transformer encoder.`,name:"num_attention_heads"},{anchor:"transformers.ErnieConfig.intermediate_size",description:`<strong>intermediate_size</strong> (<code>int</code>, <em>optional</em>, defaults to 3072) —
	Dimensionality of the “intermediate” (often named feed-forward) layer in the Transformer encoder.`,name:"intermediate_size"},{anchor:"transformers.ErnieConfig.hidden_act",description:`<strong>hidden_act</strong> (<code>str</code> or <code>Callable</code>, <em>optional</em>, defaults to <code>"gelu"</code>) —
	The non-linear activation function (function or string) in the encoder and pooler. If string, <code>"gelu"</code>,
	<code>"relu"</code>, <code>"silu"</code> and <code>"gelu_new"</code> are supported.`,name:"hidden_act"},{anchor:"transformers.ErnieConfig.hidden_dropout_prob",description:`<strong>hidden_dropout_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) —
	The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"hidden_dropout_prob"},{anchor:"transformers.ErnieConfig.attention_probs_dropout_prob",description:`<strong>attention_probs_dropout_prob</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) —
	The dropout ratio for the attention probabilities.`,name:"attention_probs_dropout_prob"},{anchor:"transformers.ErnieConfig.max_position_embeddings",description:`<strong>max_position_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to 512) —
	The maximum sequence length that this model might ever be used with. Typically set this to something large
	just in case (e.g., 512 or 1024 or 2048).`,name:"max_position_embeddings"},{anchor:"transformers.ErnieConfig.type_vocab_size",description:`<strong>type_vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 2) —
	The vocabulary size of the <code>token_type_ids</code> passed when calling <a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieModel">ErnieModel</a> or <code>TFErnieModel</code>.`,name:"type_vocab_size"},{anchor:"transformers.ErnieConfig.task_type_vocab_size",description:`<strong>task_type_vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 3) —
	The vocabulary size of the <code>task_type_ids</code> for ERNIE2.0/ERNIE3.0 model`,name:"task_type_vocab_size"},{anchor:"transformers.ErnieConfig.use_task_id",description:`<strong>use_task_id</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not the model support <code>task_type_ids</code>`,name:"use_task_id"},{anchor:"transformers.ErnieConfig.initializer_range",description:`<strong>initializer_range</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) —
	The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"initializer_range"},{anchor:"transformers.ErnieConfig.layer_norm_eps",description:`<strong>layer_norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-12) —
	The epsilon used by the layer normalization layers.`,name:"layer_norm_eps"},{anchor:"transformers.ErnieConfig.pad_token_id",description:`<strong>pad_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 0) —
	Padding token id.`,name:"pad_token_id"},{anchor:"transformers.ErnieConfig.position_embedding_type",description:`<strong>position_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"absolute"</code>) —
	Type of position embedding. Choose one of <code>"absolute"</code>, <code>"relative_key"</code>, <code>"relative_key_query"</code>. For
	positional embeddings use <code>"absolute"</code>. For more information on <code>"relative_key"</code>, please refer to
	<a href="https://arxiv.org/abs/1803.02155" rel="nofollow">Self-Attention with Relative Position Representations (Shaw et al.)</a>.
	For more information on <code>"relative_key_query"</code>, please refer to <em>Method 4</em> in <a href="https://arxiv.org/abs/2009.13658" rel="nofollow">Improve Transformer Models
	with Better Relative Position Embeddings (Huang et al.)</a>.`,name:"position_embedding_type"},{anchor:"transformers.ErnieConfig.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not the model should return the last key/values attentions (not used by all models). Only
	relevant if <code>config.is_decoder=True</code>.`,name:"use_cache"},{anchor:"transformers.ErnieConfig.classifier_dropout",description:`<strong>classifier_dropout</strong> (<code>float</code>, <em>optional</em>) —
	The dropout ratio for the classification head.`,name:"classifier_dropout"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/configuration_ernie.py#L29"}}),he=new yt({props:{anchor:"transformers.ErnieConfig.example",$$slots:{default:[sr]},$$scope:{ctx:w}}}),Be=new C({props:{title:"Ernie specific outputs",local:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput",headingTag:"h2"}}),Se=new E({props:{name:"class transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput",anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput",parameters:[{name:"loss",val:": Optional = None"},{name:"prediction_logits",val:": FloatTensor = None"},{name:"seq_relationship_logits",val:": FloatTensor = None"},{name:"hidden_states",val:": Optional = None"},{name:"attentions",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput.loss",description:`<strong>loss</strong> (<em>optional</em>, returned when <code>labels</code> is provided, <code>torch.FloatTensor</code> of shape <code>(1,)</code>) —
	Total loss as the sum of the masked language modeling loss and the next sequence prediction
	(classification) loss.`,name:"loss"},{anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput.prediction_logits",description:`<strong>prediction_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) —
	Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).`,name:"prediction_logits"},{anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput.seq_relationship_logits",description:`<strong>seq_relationship_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, 2)</code>) —
	Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
	before SoftMax).`,name:"seq_relationship_logits"},{anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput.hidden_states",description:`<strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) —
	Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
	shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the initial embedding outputs.`,name:"hidden_states"},{anchor:"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput.attentions",description:`<strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) —
	Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.`,name:"attentions"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L674"}}),Ve=new C({props:{title:"ErnieModel",local:"transformers.ErnieModel",headingTag:"h2"}}),Oe=new E({props:{name:"class transformers.ErnieModel",anchor:"transformers.ErnieModel",parameters:[{name:"config",val:""},{name:"add_pooling_layer",val:" = True"}],parametersDescription:[{anchor:"transformers.ErnieModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L780"}}),Re=new E({props:{name:"forward",anchor:"transformers.ErnieModel.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attention_mask",val:": Optional = None"},{name:"past_key_values",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieModel.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieModel.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieModel.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieModel.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieModel.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieModel.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieModel.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieModel.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
	the model is configured as a decoder.`,name:"encoder_hidden_states"},{anchor:"transformers.ErnieModel.forward.encoder_attention_mask",description:`<strong>encoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
	the cross-attention if the model is configured as a decoder. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>`,name:"encoder_attention_mask"},{anchor:"transformers.ErnieModel.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code> of length <code>config.n_layers</code> with each tuple having 4 tensors of shape <code>(batch_size, num_heads, sequence_length - 1, embed_size_per_head)</code>) —
	Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.</p>
	<p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that
	don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all
	<code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.ErnieModel.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) —
	If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see
	<code>past_key_values</code>).`,name:"use_cache"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L827",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions"
	>transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the model.</p>
	</li>
	<li>
	<p><strong>pooler_output</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, hidden_size)</code>) — Last layer hidden-state of the first token of the sequence (classification token) after further processing
	through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns
	the classification token after processing through a linear layer and a tanh activation function. The linear
	layer weights are trained from the next sentence prediction (classification) objective during pretraining.</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	<li>
	<p><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> and <code>config.add_cross_attention=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the
	weighted average in the cross-attention heads.</p>
	</li>
	<li>
	<p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape
	<code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>) and optionally if
	<code>config.is_encoder_decoder=True</code> 2 additional tensors of shape <code>(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)</code>.</p>
	<p>Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
	<code>config.is_encoder_decoder=True</code> in the cross-attention blocks) that can be used (see <code>past_key_values</code>
	input) to speed up sequential decoding.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions"
	>transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),ue=new me({props:{$$slots:{default:[rr]},$$scope:{ctx:w}}}),fe=new yt({props:{anchor:"transformers.ErnieModel.forward.example",$$slots:{default:[ar]},$$scope:{ctx:w}}}),Xe=new C({props:{title:"ErnieForPreTraining",local:"transformers.ErnieForPreTraining",headingTag:"h2"}}),Ge=new E({props:{name:"class transformers.ErnieForPreTraining",anchor:"transformers.ErnieForPreTraining",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForPreTraining.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L966"}}),Ae=new E({props:{name:"forward",anchor:"transformers.ErnieForPreTraining.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"next_sentence_label",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForPreTraining.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForPreTraining.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForPreTraining.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForPreTraining.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForPreTraining.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForPreTraining.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForPreTraining.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForPreTraining.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForPreTraining.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForPreTraining.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.</p>
	<p>labels (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>):
	Labels for computing the masked language modeling loss. Indices should be in <code>[-100, 0, ..., config.vocab_size]</code> (see <code>input_ids</code> docstring) Tokens with indices set to <code>-100</code> are ignored (masked),
	the loss is only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>
	next_sentence_label (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>):
	Labels for computing the next sequence prediction (classification) loss. Input should be a sequence
	pair (see <code>input_ids</code> docstring) Indices should be in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 indicates sequence B is a continuation of sequence A,</li>
	<li>1 indicates sequence B is a random sequence.
	kwargs (<code>Dict[str, any]</code>, <em>optional</em>, defaults to <code>{}</code>):
	Used to hide legacy arguments that have been deprecated.</li>
	</ul>`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L995",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput"
	>transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<em>optional</em>, returned when <code>labels</code> is provided, <code>torch.FloatTensor</code> of shape <code>(1,)</code>) — Total loss as the sum of the masked language modeling loss and the next sequence prediction
	(classification) loss.</p>
	</li>
	<li>
	<p><strong>prediction_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p>
	</li>
	<li>
	<p><strong>seq_relationship_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, 2)</code>) — Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
	before SoftMax).</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
	shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput"
	>transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),ge=new me({props:{$$slots:{default:[ir]},$$scope:{ctx:w}}}),_e=new yt({props:{anchor:"transformers.ErnieForPreTraining.forward.example",$$slots:{default:[dr]},$$scope:{ctx:w}}}),Qe=new C({props:{title:"ErnieForCausalLM",local:"transformers.ErnieForCausalLM",headingTag:"h2"}}),De=new E({props:{name:"class transformers.ErnieForCausalLM",anchor:"transformers.ErnieForCausalLM",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForCausalLM.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1082"}}),Ye=new E({props:{name:"forward",anchor:"transformers.ErnieForCausalLM.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attention_mask",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"past_key_values",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForCausalLM.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForCausalLM.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForCausalLM.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForCausalLM.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForCausalLM.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForCausalLM.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForCausalLM.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForCausalLM.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForCausalLM.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForCausalLM.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForCausalLM.forward.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
	the model is configured as a decoder.`,name:"encoder_hidden_states"},{anchor:"transformers.ErnieForCausalLM.forward.encoder_attention_mask",description:`<strong>encoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
	the cross-attention if the model is configured as a decoder. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>`,name:"encoder_attention_mask"},{anchor:"transformers.ErnieForCausalLM.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
	<code>[-100, 0, ..., config.vocab_size]</code> (see <code>input_ids</code> docstring) Tokens with indices set to <code>-100</code> are
	ignored (masked), the loss is only computed for the tokens with labels n <code>[0, ..., config.vocab_size]</code>`,name:"labels"},{anchor:"transformers.ErnieForCausalLM.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code> of length <code>config.n_layers</code> with each tuple having 4 tensors of shape <code>(batch_size, num_heads, sequence_length - 1, embed_size_per_head)</code>) —
	Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.</p>
	<p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>decoder_input_ids</code> (those that
	don’t have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all
	<code>decoder_input_ids</code> of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.ErnieForCausalLM.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) —
	If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see
	<code>past_key_values</code>).`,name:"use_cache"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1110",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.CausalLMOutputWithCrossAttentions"
	>transformers.modeling_outputs.CausalLMOutputWithCrossAttentions</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Language modeling loss (for next-token prediction).</p>
	</li>
	<li>
	<p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	<li>
	<p><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Cross attentions weights after the attention softmax, used to compute the weighted average in the
	cross-attention heads.</p>
	</li>
	<li>
	<p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>torch.FloatTensor</code> tuples of length <code>config.n_layers</code>, with each tuple containing the cached key,
	value states of the self-attention and the cross-attention layers if model is used in encoder-decoder
	setting. Only relevant if <code>config.is_decoder = True</code>.</p>
	<p>Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
	<code>past_key_values</code> input) to speed up sequential decoding.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.CausalLMOutputWithCrossAttentions"
	>transformers.modeling_outputs.CausalLMOutputWithCrossAttentions</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),be=new me({props:{$$slots:{default:[lr]},$$scope:{ctx:w}}}),ye=new yt({props:{anchor:"transformers.ErnieForCausalLM.forward.example",$$slots:{default:[cr]},$$scope:{ctx:w}}}),Ke=new C({props:{title:"ErnieForMaskedLM",local:"transformers.ErnieForMaskedLM",headingTag:"h2"}}),et=new E({props:{name:"class transformers.ErnieForMaskedLM",anchor:"transformers.ErnieForMaskedLM",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForMaskedLM.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1213"}}),tt=new E({props:{name:"forward",anchor:"transformers.ErnieForMaskedLM.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attention_mask",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForMaskedLM.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForMaskedLM.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForMaskedLM.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForMaskedLM.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForMaskedLM.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForMaskedLM.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForMaskedLM.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForMaskedLM.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForMaskedLM.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForMaskedLM.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForMaskedLM.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Labels for computing the masked language modeling loss. Indices should be in <code>[-100, 0, ..., config.vocab_size]</code> (see <code>input_ids</code> docstring) Tokens with indices set to <code>-100</code> are ignored (masked), the
	loss is only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1242",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.MaskedLMOutput"
	>transformers.modeling_outputs.MaskedLMOutput</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Masked language modeling (MLM) loss.</p>
	</li>
	<li>
	<p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.MaskedLMOutput"
	>transformers.modeling_outputs.MaskedLMOutput</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),ke=new me({props:{$$slots:{default:[pr]},$$scope:{ctx:w}}}),Te=new yt({props:{anchor:"transformers.ErnieForMaskedLM.forward.example",$$slots:{default:[mr]},$$scope:{ctx:w}}}),nt=new C({props:{title:"ErnieForNextSentencePrediction",local:"transformers.ErnieForNextSentencePrediction",headingTag:"h2"}}),ot=new E({props:{name:"class transformers.ErnieForNextSentencePrediction",anchor:"transformers.ErnieForNextSentencePrediction",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForNextSentencePrediction.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1327"}}),st=new E({props:{name:"forward",anchor:"transformers.ErnieForNextSentencePrediction.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.ErnieForNextSentencePrediction.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForNextSentencePrediction.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
	(see <code>input_ids</code> docstring). Indices should be in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 indicates sequence B is a continuation of sequence A,</li>
	<li>1 indicates sequence B is a random sequence.</li>
	</ul>`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1342",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.NextSentencePredictorOutput"
	>transformers.modeling_outputs.NextSentencePredictorOutput</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>next_sentence_label</code> is provided) — Next sequence prediction (classification) loss.</p>
	</li>
	<li>
	<p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, 2)</code>) — Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
	before SoftMax).</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.NextSentencePredictorOutput"
	>transformers.modeling_outputs.NextSentencePredictorOutput</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),Me=new me({props:{$$slots:{default:[hr]},$$scope:{ctx:w}}}),we=new yt({props:{anchor:"transformers.ErnieForNextSentencePrediction.forward.example",$$slots:{default:[ur]},$$scope:{ctx:w}}}),rt=new C({props:{title:"ErnieForSequenceClassification",local:"transformers.ErnieForSequenceClassification",headingTag:"h2"}}),at=new E({props:{name:"class transformers.ErnieForSequenceClassification",anchor:"transformers.ErnieForSequenceClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForSequenceClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1432"}}),it=new E({props:{name:"forward",anchor:"transformers.ErnieForSequenceClassification.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForSequenceClassification.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForSequenceClassification.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForSequenceClassification.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForSequenceClassification.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForSequenceClassification.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForSequenceClassification.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForSequenceClassification.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForSequenceClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForSequenceClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForSequenceClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForSequenceClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for computing the sequence classification/regression loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>. If <code>config.num_labels == 1</code> a regression loss is computed (Mean-Square loss), If
	<code>config.num_labels > 1</code> a classification loss is computed (Cross-Entropy).`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1456"}}),ve=new me({props:{$$slots:{default:[fr]},$$scope:{ctx:w}}}),dt=new C({props:{title:"ErnieForMultipleChoice",local:"transformers.ErnieForMultipleChoice",headingTag:"h2"}}),lt=new E({props:{name:"class transformers.ErnieForMultipleChoice",anchor:"transformers.ErnieForMultipleChoice",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForMultipleChoice.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1531"}}),ct=new E({props:{name:"forward",anchor:"transformers.ErnieForMultipleChoice.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForMultipleChoice.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForMultipleChoice.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForMultipleChoice.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForMultipleChoice.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForMultipleChoice.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, num_choices, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForMultipleChoice.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForMultipleChoice.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForMultipleChoice.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForMultipleChoice.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForMultipleChoice.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForMultipleChoice.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for computing the multiple choice classification loss. Indices should be in <code>[0, ..., num_choices-1]</code> where <code>num_choices</code> is the size of the second dimension of the input tensors. (See
	<code>input_ids</code> above)`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1553",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.MultipleChoiceModelOutput"
	>transformers.modeling_outputs.MultipleChoiceModelOutput</a> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig"
	>ErnieConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <em>(1,)</em>, <em>optional</em>, returned when <code>labels</code> is provided) — Classification loss.</p>
	</li>
	<li>
	<p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_choices)</code>) — <em>num_choices</em> is the second dimension of the input tensors. (see <em>input_ids</em> above).</p>
	<p>Classification scores (before SoftMax).</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><a
	href="/docs/transformers/pr_33913/en/main_classes/output#transformers.modeling_outputs.MultipleChoiceModelOutput"
	>transformers.modeling_outputs.MultipleChoiceModelOutput</a> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),$e=new me({props:{$$slots:{default:[gr]},$$scope:{ctx:w}}}),Ee=new yt({props:{anchor:"transformers.ErnieForMultipleChoice.forward.example",$$slots:{default:[_r]},$$scope:{ctx:w}}}),pt=new C({props:{title:"ErnieForTokenClassification",local:"transformers.ErnieForTokenClassification",headingTag:"h2"}}),mt=new E({props:{name:"class transformers.ErnieForTokenClassification",anchor:"transformers.ErnieForTokenClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForTokenClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1628"}}),ht=new E({props:{name:"forward",anchor:"transformers.ErnieForTokenClassification.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForTokenClassification.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForTokenClassification.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForTokenClassification.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForTokenClassification.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForTokenClassification.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForTokenClassification.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForTokenClassification.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForTokenClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForTokenClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForTokenClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForTokenClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Labels for computing the token classification loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1651"}}),xe=new me({props:{$$slots:{default:[br]},$$scope:{ctx:w}}}),ut=new C({props:{title:"ErnieForQuestionAnswering",local:"transformers.ErnieForQuestionAnswering",headingTag:"h2"}}),ft=new E({props:{name:"class transformers.ErnieForQuestionAnswering",anchor:"transformers.ErnieForQuestionAnswering",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.ErnieForQuestionAnswering.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33913/en/model_doc/ernie#transformers.ErnieConfig">ErnieConfig</a>) — Model configuration class with all the parameters of the model.
	Initializing with a config file does not load the weights associated with the model, only the
	configuration. Check out the <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1707"}}),gt=new E({props:{name:"forward",anchor:"transformers.ErnieForQuestionAnswering.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"task_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"start_positions",val:": Optional = None"},{name:"end_positions",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.ErnieForQuestionAnswering.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary.</p>
	<p>Indices can be obtained using <a href="/docs/transformers/pr_33913/en/model_doc/auto#transformers.AutoTokenizer">AutoTokenizer</a>. See <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.encode">PreTrainedTokenizer.encode()</a> and
	<a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.__call__">PreTrainedTokenizer.<strong>call</strong>()</a> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.ErnieForQuestionAnswering.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"attention_mask"},{anchor:"transformers.ErnieForQuestionAnswering.forward.token_type_ids",description:`<strong>token_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Segment token indices to indicate first and second portions of the inputs. Indices are selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>0 corresponds to a <em>sentence A</em> token,</li>
	<li>1 corresponds to a <em>sentence B</em> token.</li>
	</ul>
	<p><a href="../glossary#token-type-ids">What are token type IDs?</a>`,name:"token_type_ids"},{anchor:"transformers.ErnieForQuestionAnswering.forward.task_type_ids",description:`<strong>task_type_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Task type embedding is a special embedding to represent the characteristic of different tasks, such as
	word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
	assign a <code>task_type_id</code> to each task and the <code>task_type_id</code> is in the range \`[0,
	config.task_type_vocab_size-1]`,name:"task_type_ids"},{anchor:"transformers.ErnieForQuestionAnswering.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.max_position_embeddings - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.ErnieForQuestionAnswering.forward.head_mask",description:`<strong>head_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(num_heads,)</code> or <code>(num_layers, num_heads)</code>, <em>optional</em>) —
	Mask to nullify selected heads of the self-attention modules. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"head_mask"},{anchor:"transformers.ErnieForQuestionAnswering.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.ErnieForQuestionAnswering.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.ErnieForQuestionAnswering.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.ErnieForQuestionAnswering.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <a href="/docs/transformers/pr_33913/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.ErnieForQuestionAnswering.forward.start_positions",description:`<strong>start_positions</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for position (index) of the start of the labelled span for computing the token classification loss.
	Positions are clamped to the length of the sequence (<code>sequence_length</code>). Position outside of the sequence
	are not taken into account for computing the loss.`,name:"start_positions"},{anchor:"transformers.ErnieForQuestionAnswering.forward.end_positions",description:`<strong>end_positions</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for position (index) of the end of the labelled span for computing the token classification loss.
	Positions are clamped to the length of the sequence (<code>sequence_length</code>). Position outside of the sequence
	are not taken into account for computing the loss.`,name:"end_positions"}],source:"https://github.com/huggingface/transformers/blob/vr_33913/src/transformers/models/ernie/modeling_ernie.py#L1726"}}),ze=new me({props:{$$slots:{default:[yr]},$$scope:{ctx:w}}}),_t=new or({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/ernie.md"}}),{c(){t=l("meta"),k=s(),d=l("p"),m=s(),u(T.$$.fragment),a=s(),u(M.$$.fragment),pn=s(),Fe=l("p"),Fe.innerHTML=rs,mn=s(),Je=l("p"),Je.innerHTML=as,hn=s(),u(je.$$.fragment),un=s(),Le=l("p"),Le.innerHTML=is,fn=s(),u(Ie.$$.fragment),gn=s(),u(qe.$$.fragment),_n=s(),Ue=l("table"),Ue.innerHTML=ds,bn=s(),We=l("p"),We.innerHTML=ls,yn=s(),u(Pe.$$.fragment),kn=s(),Ne=l("ul"),Ne.innerHTML=cs,Tn=s(),u(He.$$.fragment),Mn=s(),N=l("div"),u(Ze.$$.fragment),Rn=s(),kt=l("p"),kt.innerHTML=ps,Xn=s(),Tt=l("p"),Tt.innerHTML=ms,Gn=s(),u(he.$$.fragment),wn=s(),u(Be.$$.fragment),vn=s(),ce=l("div"),u(Se.$$.fragment),An=s(),Mt=l("p"),Mt.innerHTML=hs,$n=s(),u(Ve.$$.fragment),En=s(),x=l("div"),u(Oe.$$.fragment),Qn=s(),wt=l("p"),wt.textContent=us,Dn=s(),vt=l("p"),vt.innerHTML=fs,Yn=s(),$t=l("p"),$t.innerHTML=gs,Kn=s(),Et=l("p"),Et.innerHTML=_s,eo=s(),xt=l("p"),xt.innerHTML=bs,to=s(),R=l("div"),u(Re.$$.fragment),no=s(),zt=l("p"),zt.innerHTML=ys,oo=s(),u(ue.$$.fragment),so=s(),u(fe.$$.fragment),xn=s(),u(Xe.$$.fragment),zn=s(),J=l("div"),u(Ge.$$.fragment),ro=s(),Ct=l("p"),Ct.innerHTML=ks,ao=s(),Ft=l("p"),Ft.innerHTML=Ts,io=s(),Jt=l("p"),Jt.innerHTML=Ms,lo=s(),X=l("div"),u(Ae.$$.fragment),co=s(),jt=l("p"),jt.innerHTML=ws,po=s(),u(ge.$$.fragment),mo=s(),u(_e.$$.fragment),Cn=s(),u(Qe.$$.fragment),Fn=s(),j=l("div"),u(De.$$.fragment),ho=s(),Lt=l("p"),Lt.innerHTML=vs,uo=s(),It=l("p"),It.innerHTML=$s,fo=s(),qt=l("p"),qt.innerHTML=Es,go=s(),G=l("div"),u(Ye.$$.fragment),_o=s(),Ut=l("p"),Ut.innerHTML=xs,bo=s(),u(be.$$.fragment),yo=s(),u(ye.$$.fragment),Jn=s(),u(Ke.$$.fragment),jn=s(),L=l("div"),u(et.$$.fragment),ko=s(),Wt=l("p"),Wt.innerHTML=zs,To=s(),Pt=l("p"),Pt.innerHTML=Cs,Mo=s(),Nt=l("p"),Nt.innerHTML=Fs,wo=s(),A=l("div"),u(tt.$$.fragment),vo=s(),Ht=l("p"),Ht.innerHTML=Js,$o=s(),u(ke.$$.fragment),Eo=s(),u(Te.$$.fragment),Ln=s(),u(nt.$$.fragment),In=s(),I=l("div"),u(ot.$$.fragment),xo=s(),Zt=l("p"),Zt.innerHTML=js,zo=s(),Bt=l("p"),Bt.innerHTML=Ls,Co=s(),St=l("p"),St.innerHTML=Is,Fo=s(),Q=l("div"),u(st.$$.fragment),Jo=s(),Vt=l("p"),Vt.innerHTML=qs,jo=s(),u(Me.$$.fragment),Lo=s(),u(we.$$.fragment),qn=s(),u(rt.$$.fragment),Un=s(),q=l("div"),u(at.$$.fragment),Io=s(),Ot=l("p"),Ot.textContent=Us,qo=s(),Rt=l("p"),Rt.innerHTML=Ws,Uo=s(),Xt=l("p"),Xt.innerHTML=Ps,Wo=s(),re=l("div"),u(it.$$.fragment),Po=s(),Gt=l("p"),Gt.innerHTML=Ns,No=s(),u(ve.$$.fragment),Wn=s(),u(dt.$$.fragment),Pn=s(),U=l("div"),u(lt.$$.fragment),Ho=s(),At=l("p"),At.textContent=Hs,Zo=s(),Qt=l("p"),Qt.innerHTML=Zs,Bo=s(),Dt=l("p"),Dt.innerHTML=Bs,So=s(),D=l("div"),u(ct.$$.fragment),Vo=s(),Yt=l("p"),Yt.innerHTML=Ss,Oo=s(),u($e.$$.fragment),Ro=s(),u(Ee.$$.fragment),Nn=s(),u(pt.$$.fragment),Hn=s(),W=l("div"),u(mt.$$.fragment),Xo=s(),Kt=l("p"),Kt.textContent=Vs,Go=s(),en=l("p"),en.innerHTML=Os,Ao=s(),tn=l("p"),tn.innerHTML=Rs,Qo=s(),ae=l("div"),u(ht.$$.fragment),Do=s(),nn=l("p"),nn.innerHTML=Xs,Yo=s(),u(xe.$$.fragment),Zn=s(),u(ut.$$.fragment),Bn=s(),P=l("div"),u(ft.$$.fragment),Ko=s(),on=l("p"),on.innerHTML=Gs,es=s(),sn=l("p"),sn.innerHTML=As,ts=s(),rn=l("p"),rn.innerHTML=Qs,ns=s(),ie=l("div"),u(gt.$$.fragment),os=s(),an=l("p"),an.innerHTML=Ds,ss=s(),u(ze.$$.fragment),Sn=s(),u(_t.$$.fragment),Vn=s(),cn=l("p"),this.h()},l(e){const o=nr("svelte-u9bgzb",document.head);t=c(o,"META",{name:!0,content:!0}),o.forEach(i),k=r(e),d=c(e,"P",{}),v(d).forEach(i),m=r(e),f(T.$$.fragment,e),a=r(e),f(M.$$.fragment,e),pn=r(e),Fe=c(e,"P",{"data-svelte-h":!0}),h(Fe)!=="svelte-1yp61t5"&&(Fe.innerHTML=rs),mn=r(e),Je=c(e,"P",{"data-svelte-h":!0}),h(Je)!=="svelte-1fthuba"&&(Je.innerHTML=as),hn=r(e),f(je.$$.fragment,e),un=r(e),Le=c(e,"P",{"data-svelte-h":!0}),h(Le)!=="svelte-r1g5av"&&(Le.innerHTML=is),fn=r(e),f(Ie.$$.fragment,e),gn=r(e),f(qe.$$.fragment,e),_n=r(e),Ue=c(e,"TABLE",{"data-svelte-h":!0}),h(Ue)!=="svelte-nh20hi"&&(Ue.innerHTML=ds),bn=r(e),We=c(e,"P",{"data-svelte-h":!0}),h(We)!=="svelte-10dq6fe"&&(We.innerHTML=ls),yn=r(e),f(Pe.$$.fragment,e),kn=r(e),Ne=c(e,"UL",{"data-svelte-h":!0}),h(Ne)!=="svelte-p1b16m"&&(Ne.innerHTML=cs),Tn=r(e),f(He.$$.fragment,e),Mn=r(e),N=c(e,"DIV",{class:!0});var Y=v(N);f(Ze.$$.fragment,Y),Rn=r(Y),kt=c(Y,"P",{"data-svelte-h":!0}),h(kt)!=="svelte-4m63tc"&&(kt.innerHTML=ps),Xn=r(Y),Tt=c(Y,"P",{"data-svelte-h":!0}),h(Tt)!=="svelte-fxep4b"&&(Tt.innerHTML=ms),Gn=r(Y),f(he.$$.fragment,Y),Y.forEach(i),wn=r(e),f(Be.$$.fragment,e),vn=r(e),ce=c(e,"DIV",{class:!0});var bt=v(ce);f(Se.$$.fragment,bt),An=r(bt),Mt=c(bt,"P",{"data-svelte-h":!0}),h(Mt)!=="svelte-1a6lceb"&&(Mt.innerHTML=hs),bt.forEach(i),$n=r(e),f(Ve.$$.fragment,e),En=r(e),x=c(e,"DIV",{class:!0});var z=v(x);f(Oe.$$.fragment,z),Qn=r(z),wt=c(z,"P",{"data-svelte-h":!0}),h(wt)!=="svelte-1t6amlg"&&(wt.textContent=us),Dn=r(z),vt=c(z,"P",{"data-svelte-h":!0}),h(vt)!=="svelte-159uoa1"&&(vt.innerHTML=fs),Yn=r(z),$t=c(z,"P",{"data-svelte-h":!0}),h($t)!=="svelte-hswkmf"&&($t.innerHTML=gs),Kn=r(z),Et=c(z,"P",{"data-svelte-h":!0}),h(Et)!=="svelte-1du13oj"&&(Et.innerHTML=_s),eo=r(z),xt=c(z,"P",{"data-svelte-h":!0}),h(xt)!=="svelte-174erte"&&(xt.innerHTML=bs),to=r(z),R=c(z,"DIV",{class:!0});var K=v(R);f(Re.$$.fragment,K),no=r(K),zt=c(K,"P",{"data-svelte-h":!0}),h(zt)!=="svelte-18zvy9"&&(zt.innerHTML=ys),oo=r(K),f(ue.$$.fragment,K),so=r(K),f(fe.$$.fragment,K),K.forEach(i),z.forEach(i),xn=r(e),f(Xe.$$.fragment,e),zn=r(e),J=c(e,"DIV",{class:!0});var H=v(J);f(Ge.$$.fragment,H),ro=r(H),Ct=c(H,"P",{"data-svelte-h":!0}),h(Ct)!=="svelte-ltv6uf"&&(Ct.innerHTML=ks),ao=r(H),Ft=c(H,"P",{"data-svelte-h":!0}),h(Ft)!=="svelte-159uoa1"&&(Ft.innerHTML=Ts),io=r(H),Jt=c(H,"P",{"data-svelte-h":!0}),h(Jt)!=="svelte-hswkmf"&&(Jt.innerHTML=Ms),lo=r(H),X=c(H,"DIV",{class:!0});var ee=v(X);f(Ae.$$.fragment,ee),co=r(ee),jt=c(ee,"P",{"data-svelte-h":!0}),h(jt)!=="svelte-1vgt75l"&&(jt.innerHTML=ws),po=r(ee),f(ge.$$.fragment,ee),mo=r(ee),f(_e.$$.fragment,ee),ee.forEach(i),H.forEach(i),Cn=r(e),f(Qe.$$.fragment,e),Fn=r(e),j=c(e,"DIV",{class:!0});var Z=v(j);f(De.$$.fragment,Z),ho=r(Z),Lt=c(Z,"P",{"data-svelte-h":!0}),h(Lt)!=="svelte-ts4qhc"&&(Lt.innerHTML=vs),uo=r(Z),It=c(Z,"P",{"data-svelte-h":!0}),h(It)!=="svelte-159uoa1"&&(It.innerHTML=$s),fo=r(Z),qt=c(Z,"P",{"data-svelte-h":!0}),h(qt)!=="svelte-hswkmf"&&(qt.innerHTML=Es),go=r(Z),G=c(Z,"DIV",{class:!0});var te=v(G);f(Ye.$$.fragment,te),_o=r(te),Ut=c(te,"P",{"data-svelte-h":!0}),h(Ut)!=="svelte-g4ozyx"&&(Ut.innerHTML=xs),bo=r(te),f(be.$$.fragment,te),yo=r(te),f(ye.$$.fragment,te),te.forEach(i),Z.forEach(i),Jn=r(e),f(Ke.$$.fragment,e),jn=r(e),L=c(e,"DIV",{class:!0});var B=v(L);f(et.$$.fragment,B),ko=r(B),Wt=c(B,"P",{"data-svelte-h":!0}),h(Wt)!=="svelte-1e378t3"&&(Wt.innerHTML=zs),To=r(B),Pt=c(B,"P",{"data-svelte-h":!0}),h(Pt)!=="svelte-159uoa1"&&(Pt.innerHTML=Cs),Mo=r(B),Nt=c(B,"P",{"data-svelte-h":!0}),h(Nt)!=="svelte-hswkmf"&&(Nt.innerHTML=Fs),wo=r(B),A=c(B,"DIV",{class:!0});var ne=v(A);f(tt.$$.fragment,ne),vo=r(ne),Ht=c(ne,"P",{"data-svelte-h":!0}),h(Ht)!=="svelte-17j9y3l"&&(Ht.innerHTML=Js),$o=r(ne),f(ke.$$.fragment,ne),Eo=r(ne),f(Te.$$.fragment,ne),ne.forEach(i),B.forEach(i),Ln=r(e),f(nt.$$.fragment,e),In=r(e),I=c(e,"DIV",{class:!0});var S=v(I);f(ot.$$.fragment,S),xo=r(S),Zt=c(S,"P",{"data-svelte-h":!0}),h(Zt)!=="svelte-139sre"&&(Zt.innerHTML=js),zo=r(S),Bt=c(S,"P",{"data-svelte-h":!0}),h(Bt)!=="svelte-159uoa1"&&(Bt.innerHTML=Ls),Co=r(S),St=c(S,"P",{"data-svelte-h":!0}),h(St)!=="svelte-hswkmf"&&(St.innerHTML=Is),Fo=r(S),Q=c(S,"DIV",{class:!0});var oe=v(Q);f(st.$$.fragment,oe),Jo=r(oe),Vt=c(oe,"P",{"data-svelte-h":!0}),h(Vt)!=="svelte-1377scv"&&(Vt.innerHTML=qs),jo=r(oe),f(Me.$$.fragment,oe),Lo=r(oe),f(we.$$.fragment,oe),oe.forEach(i),S.forEach(i),qn=r(e),f(rt.$$.fragment,e),Un=r(e),q=c(e,"DIV",{class:!0});var V=v(q);f(at.$$.fragment,V),Io=r(V),Ot=c(V,"P",{"data-svelte-h":!0}),h(Ot)!=="svelte-129pehz"&&(Ot.textContent=Us),qo=r(V),Rt=c(V,"P",{"data-svelte-h":!0}),h(Rt)!=="svelte-159uoa1"&&(Rt.innerHTML=Ws),Uo=r(V),Xt=c(V,"P",{"data-svelte-h":!0}),h(Xt)!=="svelte-hswkmf"&&(Xt.innerHTML=Ps),Wo=r(V),re=c(V,"DIV",{class:!0});var pe=v(re);f(it.$$.fragment,pe),Po=r(pe),Gt=c(pe,"P",{"data-svelte-h":!0}),h(Gt)!=="svelte-ntydqb"&&(Gt.innerHTML=Ns),No=r(pe),f(ve.$$.fragment,pe),pe.forEach(i),V.forEach(i),Wn=r(e),f(dt.$$.fragment,e),Pn=r(e),U=c(e,"DIV",{class:!0});var O=v(U);f(lt.$$.fragment,O),Ho=r(O),At=c(O,"P",{"data-svelte-h":!0}),h(At)!=="svelte-kvp7gr"&&(At.textContent=Hs),Zo=r(O),Qt=c(O,"P",{"data-svelte-h":!0}),h(Qt)!=="svelte-159uoa1"&&(Qt.innerHTML=Zs),Bo=r(O),Dt=c(O,"P",{"data-svelte-h":!0}),h(Dt)!=="svelte-hswkmf"&&(Dt.innerHTML=Bs),So=r(O),D=c(O,"DIV",{class:!0});var se=v(D);f(ct.$$.fragment,se),Vo=r(se),Yt=c(se,"P",{"data-svelte-h":!0}),h(Yt)!=="svelte-11cvn3z"&&(Yt.innerHTML=Ss),Oo=r(se),f($e.$$.fragment,se),Ro=r(se),f(Ee.$$.fragment,se),se.forEach(i),O.forEach(i),Nn=r(e),f(pt.$$.fragment,e),Hn=r(e),W=c(e,"DIV",{class:!0});var de=v(W);f(mt.$$.fragment,de),Xo=r(de),Kt=c(de,"P",{"data-svelte-h":!0}),h(Kt)!=="svelte-1wydhty"&&(Kt.textContent=Vs),Go=r(de),en=c(de,"P",{"data-svelte-h":!0}),h(en)!=="svelte-159uoa1"&&(en.innerHTML=Os),Ao=r(de),tn=c(de,"P",{"data-svelte-h":!0}),h(tn)!=="svelte-hswkmf"&&(tn.innerHTML=Rs),Qo=r(de),ae=c(de,"DIV",{class:!0});var dn=v(ae);f(ht.$$.fragment,dn),Do=r(dn),nn=c(dn,"P",{"data-svelte-h":!0}),h(nn)!=="svelte-5kz6gd"&&(nn.innerHTML=Xs),Yo=r(dn),f(xe.$$.fragment,dn),dn.forEach(i),de.forEach(i),Zn=r(e),f(ut.$$.fragment,e),Bn=r(e),P=c(e,"DIV",{class:!0});var le=v(P);f(ft.$$.fragment,le),Ko=r(le),on=c(le,"P",{"data-svelte-h":!0}),h(on)!=="svelte-1fvax6s"&&(on.innerHTML=Gs),es=r(le),sn=c(le,"P",{"data-svelte-h":!0}),h(sn)!=="svelte-159uoa1"&&(sn.innerHTML=As),ts=r(le),rn=c(le,"P",{"data-svelte-h":!0}),h(rn)!=="svelte-hswkmf"&&(rn.innerHTML=Qs),ns=r(le),ie=c(le,"DIV",{class:!0});var ln=v(ie);f(gt.$$.fragment,ln),os=r(ln),an=c(ln,"P",{"data-svelte-h":!0}),h(an)!=="svelte-1ek6bup"&&(an.innerHTML=Ds),ss=r(ln),f(ze.$$.fragment,ln),ln.forEach(i),le.forEach(i),Sn=r(e),f(_t.$$.fragment,e),Vn=r(e),cn=c(e,"P",{}),v(cn).forEach(i),this.h()},h(){$(t,"name","hf:doc:metadata"),$(t,"content",Tr),$(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ce,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ae,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(ie,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){n(document.head,t),p(e,k,o),p(e,d,o),p(e,m,o),g(T,e,o),p(e,a,o),g(M,e,o),p(e,pn,o),p(e,Fe,o),p(e,mn,o),p(e,Je,o),p(e,hn,o),g(je,e,o),p(e,un,o),p(e,Le,o),p(e,fn,o),g(Ie,e,o),p(e,gn,o),g(qe,e,o),p(e,_n,o),p(e,Ue,o),p(e,bn,o),p(e,We,o),p(e,yn,o),g(Pe,e,o),p(e,kn,o),p(e,Ne,o),p(e,Tn,o),g(He,e,o),p(e,Mn,o),p(e,N,o),g(Ze,N,null),n(N,Rn),n(N,kt),n(N,Xn),n(N,Tt),n(N,Gn),g(he,N,null),p(e,wn,o),g(Be,e,o),p(e,vn,o),p(e,ce,o),g(Se,ce,null),n(ce,An),n(ce,Mt),p(e,$n,o),g(Ve,e,o),p(e,En,o),p(e,x,o),g(Oe,x,null),n(x,Qn),n(x,wt),n(x,Dn),n(x,vt),n(x,Yn),n(x,$t),n(x,Kn),n(x,Et),n(x,eo),n(x,xt),n(x,to),n(x,R),g(Re,R,null),n(R,no),n(R,zt),n(R,oo),g(ue,R,null),n(R,so),g(fe,R,null),p(e,xn,o),g(Xe,e,o),p(e,zn,o),p(e,J,o),g(Ge,J,null),n(J,ro),n(J,Ct),n(J,ao),n(J,Ft),n(J,io),n(J,Jt),n(J,lo),n(J,X),g(Ae,X,null),n(X,co),n(X,jt),n(X,po),g(ge,X,null),n(X,mo),g(_e,X,null),p(e,Cn,o),g(Qe,e,o),p(e,Fn,o),p(e,j,o),g(De,j,null),n(j,ho),n(j,Lt),n(j,uo),n(j,It),n(j,fo),n(j,qt),n(j,go),n(j,G),g(Ye,G,null),n(G,_o),n(G,Ut),n(G,bo),g(be,G,null),n(G,yo),g(ye,G,null),p(e,Jn,o),g(Ke,e,o),p(e,jn,o),p(e,L,o),g(et,L,null),n(L,ko),n(L,Wt),n(L,To),n(L,Pt),n(L,Mo),n(L,Nt),n(L,wo),n(L,A),g(tt,A,null),n(A,vo),n(A,Ht),n(A,$o),g(ke,A,null),n(A,Eo),g(Te,A,null),p(e,Ln,o),g(nt,e,o),p(e,In,o),p(e,I,o),g(ot,I,null),n(I,xo),n(I,Zt),n(I,zo),n(I,Bt),n(I,Co),n(I,St),n(I,Fo),n(I,Q),g(st,Q,null),n(Q,Jo),n(Q,Vt),n(Q,jo),g(Me,Q,null),n(Q,Lo),g(we,Q,null),p(e,qn,o),g(rt,e,o),p(e,Un,o),p(e,q,o),g(at,q,null),n(q,Io),n(q,Ot),n(q,qo),n(q,Rt),n(q,Uo),n(q,Xt),n(q,Wo),n(q,re),g(it,re,null),n(re,Po),n(re,Gt),n(re,No),g(ve,re,null),p(e,Wn,o),g(dt,e,o),p(e,Pn,o),p(e,U,o),g(lt,U,null),n(U,Ho),n(U,At),n(U,Zo),n(U,Qt),n(U,Bo),n(U,Dt),n(U,So),n(U,D),g(ct,D,null),n(D,Vo),n(D,Yt),n(D,Oo),g($e,D,null),n(D,Ro),g(Ee,D,null),p(e,Nn,o),g(pt,e,o),p(e,Hn,o),p(e,W,o),g(mt,W,null),n(W,Xo),n(W,Kt),n(W,Go),n(W,en),n(W,Ao),n(W,tn),n(W,Qo),n(W,ae),g(ht,ae,null),n(ae,Do),n(ae,nn),n(ae,Yo),g(xe,ae,null),p(e,Zn,o),g(ut,e,o),p(e,Bn,o),p(e,P,o),g(ft,P,null),n(P,Ko),n(P,on),n(P,es),n(P,sn),n(P,ts),n(P,rn),n(P,ns),n(P,ie),g(gt,ie,null),n(ie,os),n(ie,an),n(ie,ss),g(ze,ie,null),p(e,Sn,o),g(_t,e,o),p(e,Vn,o),p(e,cn,o),On=!0},p(e,[o]){const Y={};o&2&&(Y.$$scope={dirty:o,ctx:e}),he.$set(Y);const bt={};o&2&&(bt.$$scope={dirty:o,ctx:e}),ue.$set(bt);const z={};o&2&&(z.$$scope={dirty:o,ctx:e}),fe.$set(z);const K={};o&2&&(K.$$scope={dirty:o,ctx:e}),ge.$set(K);const H={};o&2&&(H.$$scope={dirty:o,ctx:e}),_e.$set(H);const ee={};o&2&&(ee.$$scope={dirty:o,ctx:e}),be.$set(ee);const Z={};o&2&&(Z.$$scope={dirty:o,ctx:e}),ye.$set(Z);const te={};o&2&&(te.$$scope={dirty:o,ctx:e}),ke.$set(te);const B={};o&2&&(B.$$scope={dirty:o,ctx:e}),Te.$set(B);const ne={};o&2&&(ne.$$scope={dirty:o,ctx:e}),Me.$set(ne);const S={};o&2&&(S.$$scope={dirty:o,ctx:e}),we.$set(S);const oe={};o&2&&(oe.$$scope={dirty:o,ctx:e}),ve.$set(oe);const V={};o&2&&(V.$$scope={dirty:o,ctx:e}),$e.$set(V);const pe={};o&2&&(pe.$$scope={dirty:o,ctx:e}),Ee.$set(pe);const O={};o&2&&(O.$$scope={dirty:o,ctx:e}),xe.$set(O);const se={};o&2&&(se.$$scope={dirty:o,ctx:e}),ze.$set(se)},i(e){On\|\|(_(T.$$.fragment,e),_(M.$$.fragment,e),_(je.$$.fragment,e),_(Ie.$$.fragment,e),_(qe.$$.fragment,e),_(Pe.$$.fragment,e),_(He.$$.fragment,e),_(Ze.$$.fragment,e),_(he.$$.fragment,e),_(Be.$$.fragment,e),_(Se.$$.fragment,e),_(Ve.$$.fragment,e),_(Oe.$$.fragment,e),_(Re.$$.fragment,e),_(ue.$$.fragment,e),_(fe.$$.fragment,e),_(Xe.$$.fragment,e),_(Ge.$$.fragment,e),_(Ae.$$.fragment,e),_(ge.$$.fragment,e),_(_e.$$.fragment,e),_(Qe.$$.fragment,e),_(De.$$.fragment,e),_(Ye.$$.fragment,e),_(be.$$.fragment,e),_(ye.$$.fragment,e),_(Ke.$$.fragment,e),_(et.$$.fragment,e),_(tt.$$.fragment,e),_(ke.$$.fragment,e),_(Te.$$.fragment,e),_(nt.$$.fragment,e),_(ot.$$.fragment,e),_(st.$$.fragment,e),_(Me.$$.fragment,e),_(we.$$.fragment,e),_(rt.$$.fragment,e),_(at.$$.fragment,e),_(it.$$.fragment,e),_(ve.$$.fragment,e),_(dt.$$.fragment,e),_(lt.$$.fragment,e),_(ct.$$.fragment,e),_($e.$$.fragment,e),_(Ee.$$.fragment,e),_(pt.$$.fragment,e),_(mt.$$.fragment,e),_(ht.$$.fragment,e),_(xe.$$.fragment,e),_(ut.$$.fragment,e),_(ft.$$.fragment,e),_(gt.$$.fragment,e),_(ze.$$.fragment,e),_(_t.$$.fragment,e),On=!0)},o(e){b(T.$$.fragment,e),b(M.$$.fragment,e),b(je.$$.fragment,e),b(Ie.$$.fragment,e),b(qe.$$.fragment,e),b(Pe.$$.fragment,e),b(He.$$.fragment,e),b(Ze.$$.fragment,e),b(he.$$.fragment,e),b(Be.$$.fragment,e),b(Se.$$.fragment,e),b(Ve.$$.fragment,e),b(Oe.$$.fragment,e),b(Re.$$.fragment,e),b(ue.$$.fragment,e),b(fe.$$.fragment,e),b(Xe.$$.fragment,e),b(Ge.$$.fragment,e),b(Ae.$$.fragment,e),b(ge.$$.fragment,e),b(_e.$$.fragment,e),b(Qe.$$.fragment,e),b(De.$$.fragment,e),b(Ye.$$.fragment,e),b(be.$$.fragment,e),b(ye.$$.fragment,e),b(Ke.$$.fragment,e),b(et.$$.fragment,e),b(tt.$$.fragment,e),b(ke.$$.fragment,e),b(Te.$$.fragment,e),b(nt.$$.fragment,e),b(ot.$$.fragment,e),b(st.$$.fragment,e),b(Me.$$.fragment,e),b(we.$$.fragment,e),b(rt.$$.fragment,e),b(at.$$.fragment,e),b(it.$$.fragment,e),b(ve.$$.fragment,e),b(dt.$$.fragment,e),b(lt.$$.fragment,e),b(ct.$$.fragment,e),b($e.$$.fragment,e),b(Ee.$$.fragment,e),b(pt.$$.fragment,e),b(mt.$$.fragment,e),b(ht.$$.fragment,e),b(xe.$$.fragment,e),b(ut.$$.fragment,e),b(ft.$$.fragment,e),b(gt.$$.fragment,e),b(ze.$$.fragment,e),b(_t.$$.fragment,e),On=!1},d(e){e&&(i(k),i(d),i(m),i(a),i(pn),i(Fe),i(mn),i(Je),i(hn),i(un),i(Le),i(fn),i(gn),i(_n),i(Ue),i(bn),i(We),i(yn),i(kn),i(Ne),i(Tn),i(Mn),i(N),i(wn),i(vn),i(ce),i($n),i(En),i(x),i(xn),i(zn),i(J),i(Cn),i(Fn),i(j),i(Jn),i(jn),i(L),i(Ln),i(In),i(I),i(qn),i(Un),i(q),i(Wn),i(Pn),i(U),i(Nn),i(Hn),i(W),i(Zn),i(Bn),i(P),i(Sn),i(Vn),i(cn)),i(t),y(T,e),y(M,e),y(je,e),y(Ie,e),y(qe,e),y(Pe,e),y(He,e),y(Ze),y(he),y(Be,e),y(Se),y(Ve,e),y(Oe),y(Re),y(ue),y(fe),y(Xe,e),y(Ge),y(Ae),y(ge),y(_e),y(Qe,e),y(De),y(Ye),y(be),y(ye),y(Ke,e),y(et),y(tt),y(ke),y(Te),y(nt,e),y(ot),y(st),y(Me),y(we),y(rt,e),y(at),y(it),y(ve),y(dt,e),y(lt),y(ct),y($e),y(Ee),y(pt,e),y(mt),y(ht),y(xe),y(ut,e),y(ft),y(gt),y(ze),y(_t,e)}}}const Tr='{"title":"ERNIE","local":"ernie","sections":[{"title":"Overview","local":"overview","sections":[{"title":"Usage example","local":"usage-example","sections":[],"depth":3},{"title":"Model checkpoints","local":"model-checkpoints","sections":[],"depth":3}],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2},{"title":"ErnieConfig","local":"transformers.ErnieConfig","sections":[],"depth":2},{"title":"Ernie specific outputs","local":"transformers.models.ernie.modeling_ernie.ErnieForPreTrainingOutput","sections":[],"depth":2},{"title":"ErnieModel","local":"transformers.ErnieModel","sections":[],"depth":2},{"title":"ErnieForPreTraining","local":"transformers.ErnieForPreTraining","sections":[],"depth":2},{"title":"ErnieForCausalLM","local":"transformers.ErnieForCausalLM","sections":[],"depth":2},{"title":"ErnieForMaskedLM","local":"transformers.ErnieForMaskedLM","sections":[],"depth":2},{"title":"ErnieForNextSentencePrediction","local":"transformers.ErnieForNextSentencePrediction","sections":[],"depth":2},{"title":"ErnieForSequenceClassification","local":"transformers.ErnieForSequenceClassification","sections":[],"depth":2},{"title":"ErnieForMultipleChoice","local":"transformers.ErnieForMultipleChoice","sections":[],"depth":2},{"title":"ErnieForTokenClassification","local":"transformers.ErnieForTokenClassification","sections":[],"depth":2},{"title":"ErnieForQuestionAnswering","local":"transformers.ErnieForQuestionAnswering","sections":[],"depth":2}],"depth":1}';function Mr(w){return Ks(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Fr extends er{constructor(t){super(),tr(this,t,Mr,kr,Ys,{})}}export{Fr as component};

Xet Storage Details

Size:: 159 kB
Xet hash:: fb773d027d98c784bdf3ffa23d3b1eb6486ac064e20fd8f4b2c001a71655f7bc

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.