Buckets:

hf-doc-build/doc / diffusers /main /en /_app /pages /using-diffusers /rl.mdx-hf-doc-builder.js
rtrm's picture
download
raw
8.11 kB
import{S as Be,i as Fe,s as He,e as i,k as p,w as we,t as o,M as Ye,c as n,d as t,m as h,a,x as Pe,h as l,b as s,N as ze,G as r,g as u,y as De,L as Je,q as ye,o as Ee,B as $e,v as Ke}from"../../chunks/vendor-hf-doc-builder.js";import{D as Qe}from"../../chunks/Docstring-hf-doc-builder.js";import{I as Te}from"../../chunks/IconCopyLink-hf-doc-builder.js";function We(Me){let m,q,g,_,U,w,Z,G,ee,T,b,re,C,te,ie,B,v,x,O,P,ne,I,se,F,c,ae,D,j,oe,le,y,fe,ue,H,E,de,$,N,Ae,Y,d,M,ce,A,pe,R,he,me,ge,S,ve,L,_e,be,z;return w=new Te({}),P=new Te({}),M=new Qe({props:{name:"class diffusers.experimental.ValueGuidedRLPipeline",anchor:"diffusers.experimental.ValueGuidedRLPipeline",parameters:[{name:"value_function",val:": UNet1DModel"},{name:"unet",val:": UNet1DModel"},{name:"scheduler",val:": DDPMScheduler"},{name:"env",val:""}],parametersDescription:[{anchor:"diffusers.experimental.ValueGuidedRLPipeline.value_function",description:'<strong>value_function</strong> (<a href="/docs/diffusers/main/en/api/models/unet#diffusers.UNet1DModel">UNet1DModel</a>) &#x2014; A specialized UNet for fine-tuning trajectories base on reward.',name:"value_function"},{anchor:"diffusers.experimental.ValueGuidedRLPipeline.unet",description:'<strong>unet</strong> (<a href="/docs/diffusers/main/en/api/models/unet#diffusers.UNet1DModel">UNet1DModel</a>) &#x2014; U-Net architecture to denoise the encoded trajectories.',name:"unet"},{anchor:"diffusers.experimental.ValueGuidedRLPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/main/en/api/schedulers/overview#diffusers.SchedulerMixin">SchedulerMixin</a>) &#x2014;
A scheduler to be used in combination with <code>unet</code> to denoise the encoded trajectories. Default for this
application is <a href="/docs/diffusers/main/en/api/schedulers/ddpm#diffusers.DDPMScheduler">DDPMScheduler</a>.
env &#x2014; An environment following the OpenAI gym API to act in. For now only Hopper has pretrained models.`,name:"scheduler"}],source:"https://github.com/huggingface/diffusers/blob/main/src/diffusers/experimental/rl/value_guided_sampling.py#L25"}}),{c(){m=i("meta"),q=p(),g=i("h1"),_=i("a"),U=i("span"),we(w.$$.fragment),Z=p(),G=i("span"),ee=o("Using Diffusers for reinforcement learning"),T=p(),b=i("p"),re=o("Support for one RL model and related pipelines is included in the "),C=i("code"),te=o("experimental"),ie=o(` source of diffusers.
More models and examples coming soon!`),B=p(),v=i("h1"),x=i("a"),O=i("span"),we(P.$$.fragment),ne=p(),I=i("span"),se=o("Diffuser Value-guided Planning"),F=p(),c=i("p"),ae=o("You can run the model from "),D=i("a"),j=i("em"),oe=o("Planning with Diffusion for Flexible Behavior Synthesis"),le=o(` with Diffusers.
The script is located in the `),y=i("a"),fe=o("RL Examples"),ue=o(" folder."),H=p(),E=i("p"),de=o("Or, run this example in Colab "),$=i("a"),N=i("img"),Y=p(),d=i("div"),we(M.$$.fragment),ce=p(),A=i("p"),pe=o("This model inherits from "),R=i("a"),he=o("DiffusionPipeline"),me=o(`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
Pipeline for sampling actions from a diffusion model trained to predict sequences of states.`),ge=p(),S=i("p"),ve=o("Original implementation inspired by this repository: "),L=i("a"),_e=o("https://github.com/jannerm/diffuser"),be=o("."),this.h()},l(e){const f=Ye('[data-svelte="svelte-1phssyn"]',document.head);m=n(f,"META",{name:!0,content:!0}),f.forEach(t),q=h(e),g=n(e,"H1",{class:!0});var J=a(g);_=n(J,"A",{id:!0,class:!0,href:!0});var Se=a(_);U=n(Se,"SPAN",{});var Le=a(U);Pe(w.$$.fragment,Le),Le.forEach(t),Se.forEach(t),Z=h(J),G=n(J,"SPAN",{});var Ne=a(G);ee=l(Ne,"Using Diffusers for reinforcement learning"),Ne.forEach(t),J.forEach(t),T=h(e),b=n(e,"P",{});var K=a(b);re=l(K,"Support for one RL model and related pipelines is included in the "),C=n(K,"CODE",{});var Re=a(C);te=l(Re,"experimental"),Re.forEach(t),ie=l(K,` source of diffusers.
More models and examples coming soon!`),K.forEach(t),B=h(e),v=n(e,"H1",{class:!0});var Q=a(v);x=n(Q,"A",{id:!0,class:!0,href:!0});var Ve=a(x);O=n(Ve,"SPAN",{});var ke=a(O);Pe(P.$$.fragment,ke),ke.forEach(t),Ve.forEach(t),ne=h(Q),I=n(Q,"SPAN",{});var Ue=a(I);se=l(Ue,"Diffuser Value-guided Planning"),Ue.forEach(t),Q.forEach(t),F=h(e),c=n(e,"P",{});var V=a(c);ae=l(V,"You can run the model from "),D=n(V,"A",{href:!0,rel:!0});var Ge=a(D);j=n(Ge,"EM",{});var Ce=a(j);oe=l(Ce,"Planning with Diffusion for Flexible Behavior Synthesis"),Ce.forEach(t),Ge.forEach(t),le=l(V,` with Diffusers.
The script is located in the `),y=n(V,"A",{href:!0,rel:!0});var Oe=a(y);fe=l(Oe,"RL Examples"),Oe.forEach(t),ue=l(V," folder."),V.forEach(t),H=h(e),E=n(e,"P",{});var xe=a(E);de=l(xe,"Or, run this example in Colab "),$=n(xe,"A",{href:!0,rel:!0});var Ie=a($);N=n(Ie,"IMG",{src:!0,alt:!0}),Ie.forEach(t),xe.forEach(t),Y=h(e),d=n(e,"DIV",{class:!0});var k=a(d);Pe(M.$$.fragment,k),ce=h(k),A=n(k,"P",{});var W=a(A);pe=l(W,"This model inherits from "),R=n(W,"A",{href:!0});var je=a(R);he=l(je,"DiffusionPipeline"),je.forEach(t),me=l(W,`. Check the superclass documentation for the generic methods the
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
Pipeline for sampling actions from a diffusion model trained to predict sequences of states.`),W.forEach(t),ge=h(k),S=n(k,"P",{});var X=a(S);ve=l(X,"Original implementation inspired by this repository: "),L=n(X,"A",{href:!0,rel:!0});var qe=a(L);_e=l(qe,"https://github.com/jannerm/diffuser"),qe.forEach(t),be=l(X,"."),X.forEach(t),k.forEach(t),this.h()},h(){s(m,"name","hf:doc:metadata"),s(m,"content",JSON.stringify(Xe)),s(_,"id","using-diffusers-for-reinforcement-learning"),s(_,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),s(_,"href","#using-diffusers-for-reinforcement-learning"),s(g,"class","relative group"),s(x,"id","diffusers.experimental.ValueGuidedRLPipeline"),s(x,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),s(x,"href","#diffusers.experimental.ValueGuidedRLPipeline"),s(v,"class","relative group"),s(D,"href","https://arxiv.org/abs/2205.09991"),s(D,"rel","nofollow"),s(y,"href","https://github.com/huggingface/diffusers/tree/main/examples/rl"),s(y,"rel","nofollow"),ze(N.src,Ae="https://colab.research.google.com/assets/colab-badge.svg")||s(N,"src",Ae),s(N,"alt","Open In Colab"),s($,"href","https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/reinforcement_learning_with_diffusers.ipynb"),s($,"rel","nofollow"),s(R,"href","/docs/diffusers/main/en/api/pipelines/overview#diffusers.DiffusionPipeline"),s(L,"href","https://github.com/jannerm/diffuser"),s(L,"rel","nofollow"),s(d,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,f){r(document.head,m),u(e,q,f),u(e,g,f),r(g,_),r(_,U),De(w,U,null),r(g,Z),r(g,G),r(G,ee),u(e,T,f),u(e,b,f),r(b,re),r(b,C),r(C,te),r(b,ie),u(e,B,f),u(e,v,f),r(v,x),r(x,O),De(P,O,null),r(v,ne),r(v,I),r(I,se),u(e,F,f),u(e,c,f),r(c,ae),r(c,D),r(D,j),r(j,oe),r(c,le),r(c,y),r(y,fe),r(c,ue),u(e,H,f),u(e,E,f),r(E,de),r(E,$),r($,N),u(e,Y,f),u(e,d,f),De(M,d,null),r(d,ce),r(d,A),r(A,pe),r(A,R),r(R,he),r(A,me),r(d,ge),r(d,S),r(S,ve),r(S,L),r(L,_e),r(S,be),z=!0},p:Je,i(e){z||(ye(w.$$.fragment,e),ye(P.$$.fragment,e),ye(M.$$.fragment,e),z=!0)},o(e){Ee(w.$$.fragment,e),Ee(P.$$.fragment,e),Ee(M.$$.fragment,e),z=!1},d(e){t(m),e&&t(q),e&&t(g),$e(w),e&&t(T),e&&t(b),e&&t(B),e&&t(v),$e(P),e&&t(F),e&&t(c),e&&t(H),e&&t(E),e&&t(Y),e&&t(d),$e(M)}}}const Xe={local:"diffusers.experimental.ValueGuidedRLPipeline",title:"Diffuser Value-guided Planning"};function Ze(Me){return Ke(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ir extends Be{constructor(m){super();Fe(this,m,Ze,We,He,{})}}export{ir as default,Xe as metadata};

Xet Storage Details

Size:
8.11 kB
·
Xet hash:
1a53d14cc9b1b4ed71cdbb002ca7e5dca50c373d4e6b645999ed34af61145756

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.