Buckets:
| import{s as Ve,o as Le,n as ke}from"../chunks/scheduler.25b97de1.js";import{S as Se,i as Xe,g as m,s as r,r as h,A as Ye,h as d,f as n,c as i,j as P,u as g,x as j,k as q,y as p,a,v as u,d as _,t as T,w as M}from"../chunks/index.d9030fc9.js";import{T as Pe}from"../chunks/Tip.baa67368.js";import{D as ae}from"../chunks/Docstring.e257edda.js";import{C as Ae}from"../chunks/CodeBlock.e6cd0d95.js";import{E as He}from"../chunks/ExampleCodeBlock.20db4b6e.js";import{H as re,E as qe}from"../chunks/EditOnGithub.91d95064.js";function Oe(C){let s,y="Example:",c,l,f;return l=new Ae({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERlY2lzaW9uVHJhbnNmb3JtZXJDb25maWclMkMlMjBEZWNpc2lvblRyYW5zZm9ybWVyTW9kZWwlMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwRGVjaXNpb25UcmFuc2Zvcm1lciUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwRGVjaXNpb25UcmFuc2Zvcm1lckNvbmZpZygpJTBBJTBBJTIzJTIwSW5pdGlhbGl6aW5nJTIwYSUyMG1vZGVsJTIwKHdpdGglMjByYW5kb20lMjB3ZWlnaHRzKSUyMGZyb20lMjB0aGUlMjBjb25maWd1cmF0aW9uJTBBbW9kZWwlMjAlM0QlMjBEZWNpc2lvblRyYW5zZm9ybWVyTW9kZWwoY29uZmlndXJhdGlvbiklMEElMEElMjMlMjBBY2Nlc3NpbmclMjB0aGUlMjBtb2RlbCUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwbW9kZWwuY29uZmln",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DecisionTransformerConfig, DecisionTransformerModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a DecisionTransformer configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = DecisionTransformerConfig() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a model (with random weights) from the configuration</span> | |
| <span class="hljs-meta">>>> </span>model = DecisionTransformerModel(configuration) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Accessing the model configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = model.config`,wrap:!1}}),{c(){s=m("p"),s.textContent=y,c=r(),h(l.$$.fragment)},l(o){s=d(o,"P",{"data-svelte-h":!0}),j(s)!=="svelte-11lpom8"&&(s.textContent=y),c=i(o),g(l.$$.fragment,o)},m(o,b){a(o,s,b),a(o,c,b),u(l,o,b),f=!0},p:ke,i(o){f||(_(l.$$.fragment,o),f=!0)},o(o){T(l.$$.fragment,o),f=!1},d(o){o&&(n(s),n(c)),M(l,o)}}}function Qe(C){let s,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){s=m("p"),s.innerHTML=y},l(c){s=d(c,"P",{"data-svelte-h":!0}),j(s)!=="svelte-fincs2"&&(s.innerHTML=y)},m(c,l){a(c,s,l)},p:ke,d(c){c&&n(s)}}}function Ke(C){let s,y="Examples:",c,l,f;return l=new Ae({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERlY2lzaW9uVHJhbnNmb3JtZXJNb2RlbCUwQWltcG9ydCUyMHRvcmNoJTBBJTBBbW9kZWwlMjAlM0QlMjBEZWNpc2lvblRyYW5zZm9ybWVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmVkYmVlY2hpbmclMkZkZWNpc2lvbi10cmFuc2Zvcm1lci1neW0taG9wcGVyLW1lZGl1bSUyMiklMEElMjMlMjBldmFsdWF0aW9uJTBBbW9kZWwlMjAlM0QlMjBtb2RlbC50byhkZXZpY2UpJTBBbW9kZWwuZXZhbCgpJTBBJTBBZW52JTIwJTNEJTIwZ3ltLm1ha2UoJTIySG9wcGVyLXYzJTIyKSUwQXN0YXRlX2RpbSUyMCUzRCUyMGVudi5vYnNlcnZhdGlvbl9zcGFjZS5zaGFwZSU1QjAlNUQlMEFhY3RfZGltJTIwJTNEJTIwZW52LmFjdGlvbl9zcGFjZS5zaGFwZSU1QjAlNUQlMEElMEFzdGF0ZSUyMCUzRCUyMGVudi5yZXNldCgpJTBBc3RhdGVzJTIwJTNEJTIwdG9yY2guZnJvbV9udW1weShzdGF0ZSkucmVzaGFwZSgxJTJDJTIwMSUyQyUyMHN0YXRlX2RpbSkudG8oZGV2aWNlJTNEZGV2aWNlJTJDJTIwZHR5cGUlM0R0b3JjaC5mbG9hdDMyKSUwQWFjdGlvbnMlMjAlM0QlMjB0b3JjaC56ZXJvcygoMSUyQyUyMDElMkMlMjBhY3RfZGltKSUyQyUyMGRldmljZSUzRGRldmljZSUyQyUyMGR0eXBlJTNEdG9yY2guZmxvYXQzMiklMEFyZXdhcmRzJTIwJTNEJTIwdG9yY2guemVyb3MoMSUyQyUyMDElMkMlMjBkZXZpY2UlM0RkZXZpY2UlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBdGFyZ2V0X3JldHVybiUyMCUzRCUyMHRvcmNoLnRlbnNvcihUQVJHRVRfUkVUVVJOJTJDJTIwZHR5cGUlM0R0b3JjaC5mbG9hdDMyKS5yZXNoYXBlKDElMkMlMjAxKSUwQXRpbWVzdGVwcyUyMCUzRCUyMHRvcmNoLnRlbnNvcigwJTJDJTIwZGV2aWNlJTNEZGV2aWNlJTJDJTIwZHR5cGUlM0R0b3JjaC5sb25nKS5yZXNoYXBlKDElMkMlMjAxKSUwQWF0dGVudGlvbl9tYXNrJTIwJTNEJTIwdG9yY2guemVyb3MoMSUyQyUyMDElMkMlMjBkZXZpY2UlM0RkZXZpY2UlMkMlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MzIpJTBBJTBBJTIzJTIwZm9yd2FyZCUyMHBhc3MlMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwc3RhdGVfcHJlZHMlMkMlMjBhY3Rpb25fcHJlZHMlMkMlMjByZXR1cm5fcHJlZHMlMjAlM0QlMjBtb2RlbCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGF0ZXMlM0RzdGF0ZXMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBhY3Rpb25zJTNEYWN0aW9ucyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJld2FyZHMlM0RyZXdhcmRzJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuc190b19nbyUzRHRhcmdldF9yZXR1cm4lMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0aW1lc3RlcHMlM0R0aW1lc3RlcHMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBhdHRlbnRpb25fbWFzayUzRGF0dGVudGlvbl9tYXNrJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3QlM0RGYWxzZSUyQyUwQSUyMCUyMCUyMCUyMCk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DecisionTransformerModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span>model = DecisionTransformerModel.from_pretrained(<span class="hljs-string">"edbeeching/decision-transformer-gym-hopper-medium"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># evaluation</span> | |
| <span class="hljs-meta">>>> </span>model = model.to(device) | |
| <span class="hljs-meta">>>> </span>model.<span class="hljs-built_in">eval</span>() | |
| <span class="hljs-meta">>>> </span>env = gym.make(<span class="hljs-string">"Hopper-v3"</span>) | |
| <span class="hljs-meta">>>> </span>state_dim = env.observation_space.shape[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>act_dim = env.action_space.shape[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>state = env.reset() | |
| <span class="hljs-meta">>>> </span>states = torch.from_numpy(state).reshape(<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, state_dim).to(device=device, dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>actions = torch.zeros((<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, act_dim), device=device, dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>rewards = torch.zeros(<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, device=device, dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span>target_return = torch.tensor(TARGET_RETURN, dtype=torch.float32).reshape(<span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| <span class="hljs-meta">>>> </span>timesteps = torch.tensor(<span class="hljs-number">0</span>, device=device, dtype=torch.long).reshape(<span class="hljs-number">1</span>, <span class="hljs-number">1</span>) | |
| <span class="hljs-meta">>>> </span>attention_mask = torch.zeros(<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, device=device, dtype=torch.float32) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># forward pass</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> state_preds, action_preds, return_preds = model( | |
| <span class="hljs-meta">... </span> states=states, | |
| <span class="hljs-meta">... </span> actions=actions, | |
| <span class="hljs-meta">... </span> rewards=rewards, | |
| <span class="hljs-meta">... </span> returns_to_go=target_return, | |
| <span class="hljs-meta">... </span> timesteps=timesteps, | |
| <span class="hljs-meta">... </span> attention_mask=attention_mask, | |
| <span class="hljs-meta">... </span> return_dict=<span class="hljs-literal">False</span>, | |
| <span class="hljs-meta">... </span> )`,wrap:!1}}),{c(){s=m("p"),s.textContent=y,c=r(),h(l.$$.fragment)},l(o){s=d(o,"P",{"data-svelte-h":!0}),j(s)!=="svelte-kvfsh7"&&(s.textContent=y),c=i(o),g(l.$$.fragment,o)},m(o,b){a(o,s,b),a(o,c,b),u(l,o,b),f=!0},p:ke,i(o){f||(_(l.$$.fragment,o),f=!0)},o(o){T(l.$$.fragment,o),f=!1},d(o){o&&(n(s),n(c)),M(l,o)}}}function et(C){let s,y,c,l,f,o,b,ie,G,Ue=`The Decision Transformer model was proposed in <a href="https://arxiv.org/abs/2106.01345" rel="nofollow">Decision Transformer: Reinforcement Learning via Sequence Modeling</a><br/> | |
| by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.`,le,z,Re="The abstract from the paper is the following:",ce,N,Ge=`<em>We introduce a framework that abstracts Reinforcement Learning (RL) as a sequence modeling problem. | |
| This allows us to draw upon the simplicity and scalability of the Transformer architecture, and associated advances | |
| in language modeling such as GPT-x and BERT. In particular, we present Decision Transformer, an architecture that | |
| casts the problem of RL as conditional sequence modeling. Unlike prior approaches to RL that fit value functions or | |
| compute policy gradients, Decision Transformer simply outputs the optimal actions by leveraging a causally masked | |
| Transformer. By conditioning an autoregressive model on the desired return (reward), past states, and actions, our | |
| Decision Transformer model can generate future actions that achieve the desired return. Despite its simplicity, | |
| Decision Transformer matches or exceeds the performance of state-of-the-art model-free offline RL baselines on | |
| Atari, OpenAI Gym, and Key-to-Door tasks.</em>`,me,W,ze="This version of the model is for tasks where the state is a vector.",de,B,Ne='This model was contributed by <a href="https://huggingface.co/edbeeching" rel="nofollow">edbeeching</a>. The original code can be found <a href="https://github.com/kzl/decision-transformer" rel="nofollow">here</a>.',pe,E,fe,v,I,ye,O,We=`This is the configuration class to store the configuration of a <a href="/docs/transformers/main/en/model_doc/decision_transformer#transformers.DecisionTransformerModel">DecisionTransformerModel</a>. It is used to | |
| instantiate a Decision Transformer model according to the specified arguments, defining the model architecture. | |
| Instantiating a configuration with the defaults will yield a similar configuration to that of the standard | |
| DecisionTransformer architecture. Many of the config options are used to instatiate the GPT2 model that is used as | |
| part of the architecture.`,ve,Q,Be=`Configuration objects inherit from <a href="/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> and can be used to control the model outputs. Read the | |
| documentation from <a href="/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> for more information.`,we,Z,he,F,ge,D,H,je,K,A,ue,V,_e,w,L,$e,ee,Ee=`The Decision Transformer Model | |
| This model is a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> sub-class. Use | |
| it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and | |
| behavior.`,Je,te,Ie=`The model builds upon the GPT2 architecture to perform autoregressive prediction of actions in an offline RL | |
| setting. Refer to the paper for more details: <a href="https://arxiv.org/abs/2106.01345" rel="nofollow">https://arxiv.org/abs/2106.01345</a>`,De,$,S,Ce,ne,Fe='The <a href="/docs/transformers/main/en/model_doc/decision_transformer#transformers.DecisionTransformerModel">DecisionTransformerModel</a> forward method, overrides the <code>__call__</code> special method.',Ze,x,xe,k,Te,X,Me,oe,be;return f=new re({props:{title:"Decision Transformer",local:"decision-transformer",headingTag:"h1"}}),b=new re({props:{title:"Overview",local:"overview",headingTag:"h2"}}),E=new re({props:{title:"DecisionTransformerConfig",local:"transformers.DecisionTransformerConfig",headingTag:"h2"}}),I=new ae({props:{name:"class transformers.DecisionTransformerConfig",anchor:"transformers.DecisionTransformerConfig",parameters:[{name:"state_dim",val:" = 17"},{name:"act_dim",val:" = 4"},{name:"hidden_size",val:" = 128"},{name:"max_ep_len",val:" = 4096"},{name:"action_tanh",val:" = True"},{name:"vocab_size",val:" = 1"},{name:"n_positions",val:" = 1024"},{name:"n_layer",val:" = 3"},{name:"n_head",val:" = 1"},{name:"n_inner",val:" = None"},{name:"activation_function",val:" = 'relu'"},{name:"resid_pdrop",val:" = 0.1"},{name:"embd_pdrop",val:" = 0.1"},{name:"attn_pdrop",val:" = 0.1"},{name:"layer_norm_epsilon",val:" = 1e-05"},{name:"initializer_range",val:" = 0.02"},{name:"scale_attn_weights",val:" = True"},{name:"use_cache",val:" = True"},{name:"bos_token_id",val:" = 50256"},{name:"eos_token_id",val:" = 50256"},{name:"scale_attn_by_inverse_layer_idx",val:" = False"},{name:"reorder_and_upcast_attn",val:" = False"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DecisionTransformerConfig.state_dim",description:`<strong>state_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 17) — | |
| The state size for the RL environment`,name:"state_dim"},{anchor:"transformers.DecisionTransformerConfig.act_dim",description:`<strong>act_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 4) — | |
| The size of the output action space`,name:"act_dim"},{anchor:"transformers.DecisionTransformerConfig.hidden_size",description:`<strong>hidden_size</strong> (<code>int</code>, <em>optional</em>, defaults to 128) — | |
| The size of the hidden layers`,name:"hidden_size"},{anchor:"transformers.DecisionTransformerConfig.max_ep_len",description:`<strong>max_ep_len</strong> (<code>int</code>, <em>optional</em>, defaults to 4096) — | |
| The maximum length of an episode in the environment`,name:"max_ep_len"},{anchor:"transformers.DecisionTransformerConfig.action_tanh",description:`<strong>action_tanh</strong> (<code>bool</code>, <em>optional</em>, defaults to True) — | |
| Whether to use a tanh activation on action prediction`,name:"action_tanh"},{anchor:"transformers.DecisionTransformerConfig.vocab_size",description:`<strong>vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 50257) — | |
| Vocabulary size of the GPT-2 model. Defines the number of different tokens that can be represented by the | |
| <code>inputs_ids</code> passed when calling <a href="/docs/transformers/main/en/model_doc/decision_transformer#transformers.DecisionTransformerModel">DecisionTransformerModel</a>.`,name:"vocab_size"},{anchor:"transformers.DecisionTransformerConfig.n_positions",description:`<strong>n_positions</strong> (<code>int</code>, <em>optional</em>, defaults to 1024) — | |
| The maximum sequence length that this model might ever be used with. Typically set this to something large | |
| just in case (e.g., 512 or 1024 or 2048).`,name:"n_positions"},{anchor:"transformers.DecisionTransformerConfig.n_layer",description:`<strong>n_layer</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — | |
| Number of hidden layers in the Transformer encoder.`,name:"n_layer"},{anchor:"transformers.DecisionTransformerConfig.n_head",description:`<strong>n_head</strong> (<code>int</code>, <em>optional</em>, defaults to 1) — | |
| Number of attention heads for each attention layer in the Transformer encoder.`,name:"n_head"},{anchor:"transformers.DecisionTransformerConfig.n_inner",description:`<strong>n_inner</strong> (<code>int</code>, <em>optional</em>) — | |
| Dimensionality of the inner feed-forward layers. If unset, will default to 4 times <code>n_embd</code>.`,name:"n_inner"},{anchor:"transformers.DecisionTransformerConfig.activation_function",description:`<strong>activation_function</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"gelu"</code>) — | |
| Activation function, to be selected in the list <code>["relu", "silu", "gelu", "tanh", "gelu_new"]</code>.`,name:"activation_function"},{anchor:"transformers.DecisionTransformerConfig.resid_pdrop",description:`<strong>resid_pdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"resid_pdrop"},{anchor:"transformers.DecisionTransformerConfig.embd_pdrop",description:`<strong>embd_pdrop</strong> (<code>int</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout ratio for the embeddings.`,name:"embd_pdrop"},{anchor:"transformers.DecisionTransformerConfig.attn_pdrop",description:`<strong>attn_pdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout ratio for the attention.`,name:"attn_pdrop"},{anchor:"transformers.DecisionTransformerConfig.layer_norm_epsilon",description:`<strong>layer_norm_epsilon</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-5) — | |
| The epsilon to use in the layer normalization layers.`,name:"layer_norm_epsilon"},{anchor:"transformers.DecisionTransformerConfig.initializer_range",description:`<strong>initializer_range</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) — | |
| The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"initializer_range"},{anchor:"transformers.DecisionTransformerConfig.scale_attn_weights",description:`<strong>scale_attn_weights</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Scale attention weights by dividing by sqrt(hidden_size)..`,name:"scale_attn_weights"},{anchor:"transformers.DecisionTransformerConfig.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not the model should return the last key/values attentions (not used by all models).`,name:"use_cache"},{anchor:"transformers.DecisionTransformerConfig.scale_attn_by_inverse_layer_idx",description:`<strong>scale_attn_by_inverse_layer_idx</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to additionally scale attention weights by <code>1 / layer_idx + 1</code>.`,name:"scale_attn_by_inverse_layer_idx"},{anchor:"transformers.DecisionTransformerConfig.reorder_and_upcast_attn",description:`<strong>reorder_and_upcast_attn</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention | |
| dot-product/softmax to float() when training with mixed precision.`,name:"reorder_and_upcast_attn"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/decision_transformer/configuration_decision_transformer.py#L24"}}),Z=new He({props:{anchor:"transformers.DecisionTransformerConfig.example",$$slots:{default:[Oe]},$$scope:{ctx:C}}}),F=new re({props:{title:"DecisionTransformerGPT2Model",local:"transformers.DecisionTransformerGPT2Model",headingTag:"h2"}}),H=new ae({props:{name:"class transformers.DecisionTransformerGPT2Model",anchor:"transformers.DecisionTransformerGPT2Model",parameters:[{name:"config",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/decision_transformer/modeling_decision_transformer.py#L469"}}),A=new ae({props:{name:"forward",anchor:"transformers.DecisionTransformerGPT2Model.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"past_key_values",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"token_type_ids",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"head_mask",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attention_mask",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/decision_transformer/modeling_decision_transformer.py#L498"}}),V=new re({props:{title:"DecisionTransformerModel",local:"transformers.DecisionTransformerModel",headingTag:"h2"}}),L=new ae({props:{name:"class transformers.DecisionTransformerModel",anchor:"transformers.DecisionTransformerModel",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.DecisionTransformerModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/en/model_doc/decision_transformer#transformers.DecisionTransformerConfig">~DecisionTransformerConfig</a>) — Model configuration class with all the parameters of the model. | |
| Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/decision_transformer/modeling_decision_transformer.py#L778"}}),S=new ae({props:{name:"forward",anchor:"transformers.DecisionTransformerModel.forward",parameters:[{name:"states",val:": Optional = None"},{name:"actions",val:": Optional = None"},{name:"rewards",val:": Optional = None"},{name:"returns_to_go",val:": Optional = None"},{name:"timesteps",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DecisionTransformerModel.forward.states",description:`<strong>states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, episode_length, state_dim)</code>) — | |
| The states for each step in the trajectory`,name:"states"},{anchor:"transformers.DecisionTransformerModel.forward.actions",description:`<strong>actions</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, episode_length, act_dim)</code>) — | |
| The actions taken by the “expert” policy for the current state, these are masked for auto regressive | |
| prediction`,name:"actions"},{anchor:"transformers.DecisionTransformerModel.forward.rewards",description:`<strong>rewards</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, episode_length, 1)</code>) — | |
| The rewards for each state, action`,name:"rewards"},{anchor:"transformers.DecisionTransformerModel.forward.returns_to_go",description:`<strong>returns_to_go</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, episode_length, 1)</code>) — | |
| The returns for each state in the trajectory`,name:"returns_to_go"},{anchor:"transformers.DecisionTransformerModel.forward.timesteps",description:`<strong>timesteps</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, episode_length)</code>) — | |
| The timestep for each step in the trajectory`,name:"timesteps"},{anchor:"transformers.DecisionTransformerModel.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, episode_length)</code>) — | |
| Masking, used to mask the actions when performing autoregressive prediction`,name:"attention_mask"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/decision_transformer/modeling_decision_transformer.py#L812",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>transformers.models.decision_transformer.modeling_decision_transformer.DecisionTransformerOutput</code> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/en/model_doc/decision_transformer#transformers.DecisionTransformerConfig" | |
| >DecisionTransformerConfig</a>) and inputs.</p> | |
| <ul> | |
| <li> | |
| <p><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the model.</p> | |
| </li> | |
| <li> | |
| <p><strong>state_preds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, state_dim)</code>) — Environment state predictions</p> | |
| </li> | |
| <li> | |
| <p><strong>action_preds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, action_dim)</code>) — Model action predictions</p> | |
| </li> | |
| <li> | |
| <p><strong>return_preds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, 1)</code>) — Predicted returns for each state</p> | |
| </li> | |
| <li> | |
| <p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>.</p> | |
| <p>Hidden-states of the model at the output of each layer plus the initial embedding outputs.</p> | |
| </li> | |
| <li> | |
| <p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p> | |
| <p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads.</p> | |
| </li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>transformers.models.decision_transformer.modeling_decision_transformer.DecisionTransformerOutput</code> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),x=new Pe({props:{$$slots:{default:[Qe]},$$scope:{ctx:C}}}),k=new He({props:{anchor:"transformers.DecisionTransformerModel.forward.example",$$slots:{default:[Ke]},$$scope:{ctx:C}}}),X=new qe({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/decision_transformer.md"}}),{c(){s=m("meta"),y=r(),c=m("p"),l=r(),h(f.$$.fragment),o=r(),h(b.$$.fragment),ie=r(),G=m("p"),G.innerHTML=Ue,le=r(),z=m("p"),z.textContent=Re,ce=r(),N=m("p"),N.innerHTML=Ge,me=r(),W=m("p"),W.textContent=ze,de=r(),B=m("p"),B.innerHTML=Ne,pe=r(),h(E.$$.fragment),fe=r(),v=m("div"),h(I.$$.fragment),ye=r(),O=m("p"),O.innerHTML=We,ve=r(),Q=m("p"),Q.innerHTML=Be,we=r(),h(Z.$$.fragment),he=r(),h(F.$$.fragment),ge=r(),D=m("div"),h(H.$$.fragment),je=r(),K=m("div"),h(A.$$.fragment),ue=r(),h(V.$$.fragment),_e=r(),w=m("div"),h(L.$$.fragment),$e=r(),ee=m("p"),ee.innerHTML=Ee,Je=r(),te=m("p"),te.innerHTML=Ie,De=r(),$=m("div"),h(S.$$.fragment),Ce=r(),ne=m("p"),ne.innerHTML=Fe,Ze=r(),h(x.$$.fragment),xe=r(),h(k.$$.fragment),Te=r(),h(X.$$.fragment),Me=r(),oe=m("p"),this.h()},l(e){const t=Ye("svelte-u9bgzb",document.head);s=d(t,"META",{name:!0,content:!0}),t.forEach(n),y=i(e),c=d(e,"P",{}),P(c).forEach(n),l=i(e),g(f.$$.fragment,e),o=i(e),g(b.$$.fragment,e),ie=i(e),G=d(e,"P",{"data-svelte-h":!0}),j(G)!=="svelte-xt2qk"&&(G.innerHTML=Ue),le=i(e),z=d(e,"P",{"data-svelte-h":!0}),j(z)!=="svelte-vfdo9a"&&(z.textContent=Re),ce=i(e),N=d(e,"P",{"data-svelte-h":!0}),j(N)!=="svelte-lyw843"&&(N.innerHTML=Ge),me=i(e),W=d(e,"P",{"data-svelte-h":!0}),j(W)!=="svelte-1b8os8n"&&(W.textContent=ze),de=i(e),B=d(e,"P",{"data-svelte-h":!0}),j(B)!=="svelte-1ferxi2"&&(B.innerHTML=Ne),pe=i(e),g(E.$$.fragment,e),fe=i(e),v=d(e,"DIV",{class:!0});var J=P(v);g(I.$$.fragment,J),ye=i(J),O=d(J,"P",{"data-svelte-h":!0}),j(O)!=="svelte-1ooiy4t"&&(O.innerHTML=We),ve=i(J),Q=d(J,"P",{"data-svelte-h":!0}),j(Q)!=="svelte-o55m63"&&(Q.innerHTML=Be),we=i(J),g(Z.$$.fragment,J),J.forEach(n),he=i(e),g(F.$$.fragment,e),ge=i(e),D=d(e,"DIV",{class:!0});var Y=P(D);g(H.$$.fragment,Y),je=i(Y),K=d(Y,"DIV",{class:!0});var se=P(K);g(A.$$.fragment,se),se.forEach(n),Y.forEach(n),ue=i(e),g(V.$$.fragment,e),_e=i(e),w=d(e,"DIV",{class:!0});var U=P(w);g(L.$$.fragment,U),$e=i(U),ee=d(U,"P",{"data-svelte-h":!0}),j(ee)!=="svelte-s64yr0"&&(ee.innerHTML=Ee),Je=i(U),te=d(U,"P",{"data-svelte-h":!0}),j(te)!=="svelte-31643h"&&(te.innerHTML=Ie),De=i(U),$=d(U,"DIV",{class:!0});var R=P($);g(S.$$.fragment,R),Ce=i(R),ne=d(R,"P",{"data-svelte-h":!0}),j(ne)!=="svelte-gyhnht"&&(ne.innerHTML=Fe),Ze=i(R),g(x.$$.fragment,R),xe=i(R),g(k.$$.fragment,R),R.forEach(n),U.forEach(n),Te=i(e),g(X.$$.fragment,e),Me=i(e),oe=d(e,"P",{}),P(oe).forEach(n),this.h()},h(){q(s,"name","hf:doc:metadata"),q(s,"content",tt),q(v,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),q(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){p(document.head,s),a(e,y,t),a(e,c,t),a(e,l,t),u(f,e,t),a(e,o,t),u(b,e,t),a(e,ie,t),a(e,G,t),a(e,le,t),a(e,z,t),a(e,ce,t),a(e,N,t),a(e,me,t),a(e,W,t),a(e,de,t),a(e,B,t),a(e,pe,t),u(E,e,t),a(e,fe,t),a(e,v,t),u(I,v,null),p(v,ye),p(v,O),p(v,ve),p(v,Q),p(v,we),u(Z,v,null),a(e,he,t),u(F,e,t),a(e,ge,t),a(e,D,t),u(H,D,null),p(D,je),p(D,K),u(A,K,null),a(e,ue,t),u(V,e,t),a(e,_e,t),a(e,w,t),u(L,w,null),p(w,$e),p(w,ee),p(w,Je),p(w,te),p(w,De),p(w,$),u(S,$,null),p($,Ce),p($,ne),p($,Ze),u(x,$,null),p($,xe),u(k,$,null),a(e,Te,t),u(X,e,t),a(e,Me,t),a(e,oe,t),be=!0},p(e,[t]){const J={};t&2&&(J.$$scope={dirty:t,ctx:e}),Z.$set(J);const Y={};t&2&&(Y.$$scope={dirty:t,ctx:e}),x.$set(Y);const se={};t&2&&(se.$$scope={dirty:t,ctx:e}),k.$set(se)},i(e){be||(_(f.$$.fragment,e),_(b.$$.fragment,e),_(E.$$.fragment,e),_(I.$$.fragment,e),_(Z.$$.fragment,e),_(F.$$.fragment,e),_(H.$$.fragment,e),_(A.$$.fragment,e),_(V.$$.fragment,e),_(L.$$.fragment,e),_(S.$$.fragment,e),_(x.$$.fragment,e),_(k.$$.fragment,e),_(X.$$.fragment,e),be=!0)},o(e){T(f.$$.fragment,e),T(b.$$.fragment,e),T(E.$$.fragment,e),T(I.$$.fragment,e),T(Z.$$.fragment,e),T(F.$$.fragment,e),T(H.$$.fragment,e),T(A.$$.fragment,e),T(V.$$.fragment,e),T(L.$$.fragment,e),T(S.$$.fragment,e),T(x.$$.fragment,e),T(k.$$.fragment,e),T(X.$$.fragment,e),be=!1},d(e){e&&(n(y),n(c),n(l),n(o),n(ie),n(G),n(le),n(z),n(ce),n(N),n(me),n(W),n(de),n(B),n(pe),n(fe),n(v),n(he),n(ge),n(D),n(ue),n(_e),n(w),n(Te),n(Me),n(oe)),n(s),M(f,e),M(b,e),M(E,e),M(I),M(Z),M(F,e),M(H),M(A),M(V,e),M(L),M(S),M(x),M(k),M(X,e)}}}const tt='{"title":"Decision Transformer","local":"decision-transformer","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"DecisionTransformerConfig","local":"transformers.DecisionTransformerConfig","sections":[],"depth":2},{"title":"DecisionTransformerGPT2Model","local":"transformers.DecisionTransformerGPT2Model","sections":[],"depth":2},{"title":"DecisionTransformerModel","local":"transformers.DecisionTransformerModel","sections":[],"depth":2}],"depth":1}';function nt(C){return Le(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class mt extends Se{constructor(s){super(),Xe(this,s,nt,et,Ve,{})}}export{mt as component}; | |
Xet Storage Details
- Size:
- 31.8 kB
- Xet hash:
- 85c9ef547fc719f1b92885f147dfc22277a71f9446c3272559afa6332b849e36
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.