Buckets:

rtrm's picture
download
raw
52.1 kB
import{s as so,o as io,n as Ce}from"../chunks/scheduler.8c3d61f6.js";import{S as ro,i as ao,g as c,s as i,r as f,A as lo,h as m,f as s,c as r,j as U,u as h,x as w,k as $,y as a,a as d,v as _,d as b,t as T,w as y}from"../chunks/index.da70eac4.js";import{T as po}from"../chunks/Tip.1d9b8c37.js";import{D as ee}from"../chunks/Docstring.0b9cc58b.js";import{C as Fe}from"../chunks/CodeBlock.a9c4becf.js";import{E as He}from"../chunks/ExampleCodeBlock.ba0ba69d.js";import{H as Je,E as co}from"../chunks/index.a831177d.js";function mo(Z){let t,v='Make sure to check out the Schedulers <a href="../../using-diffusers/schedulers">guide</a> to learn how to explore the tradeoff between scheduler speed and quality, and see the <a href="../../using-diffusers/loading#reuse-a-pipeline">reuse components across pipelines</a> section to learn how to efficiently load the same components into multiple pipelines.';return{c(){t=c("p"),t.innerHTML=v},l(p){t=m(p,"P",{"data-svelte-h":!0}),w(t)!=="svelte-1qn15hi"&&(t.innerHTML=v)},m(p,l){d(p,t,l)},p:Ce,d(p){p&&s(t)}}}function uo(Z){let t,v="Examples:",p,l,u;return l=new Fe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ29zbW9zVGV4dFRvV29ybGRQaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBleHBvcnRfdG9fdmlkZW8lMEElMEFtb2RlbF9pZCUyMCUzRCUyMCUyMm52aWRpYSUyRkNvc21vcy0xLjAtRGlmZnVzaW9uLTdCLVRleHQyV29ybGQlMjIlMEFwaXBlJTIwJTNEJTIwQ29zbW9zVGV4dFRvV29ybGRQaXBlbGluZS5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmJmbG9hdDE2KSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJBJTIwc2xlZWslMkMlMjBodW1hbm9pZCUyMHJvYm90JTIwc3RhbmRzJTIwaW4lMjBhJTIwdmFzdCUyMHdhcmVob3VzZSUyMGZpbGxlZCUyMHdpdGglMjBuZWF0bHklMjBzdGFja2VkJTIwY2FyZGJvYXJkJTIwYm94ZXMlMjBvbiUyMGluZHVzdHJpYWwlMjBzaGVsdmVzLiUyMFRoZSUyMHJvYm90J3MlMjBtZXRhbGxpYyUyMGJvZHklMjBnbGVhbXMlMjB1bmRlciUyMHRoZSUyMGJyaWdodCUyQyUyMGV2ZW4lMjBsaWdodGluZyUyQyUyMGhpZ2hsaWdodGluZyUyMGl0cyUyMGZ1dHVyaXN0aWMlMjBkZXNpZ24lMjBhbmQlMjBpbnRyaWNhdGUlMjBqb2ludHMuJTIwQSUyMGdsb3dpbmclMjBibHVlJTIwbGlnaHQlMjBlbWFuYXRlcyUyMGZyb20lMjBpdHMlMjBjaGVzdCUyQyUyMGFkZGluZyUyMGElMjB0b3VjaCUyMG9mJTIwYWR2YW5jZWQlMjB0ZWNobm9sb2d5LiUyMFRoZSUyMGJhY2tncm91bmQlMjBpcyUyMGRvbWluYXRlZCUyMGJ5JTIwcm93cyUyMG9mJTIwYm94ZXMlMkMlMjBzdWdnZXN0aW5nJTIwYSUyMGhpZ2hseSUyMG9yZ2FuaXplZCUyMHN0b3JhZ2UlMjBzeXN0ZW0uJTIwVGhlJTIwZmxvb3IlMjBpcyUyMGxpbmVkJTIwd2l0aCUyMHdvb2RlbiUyMHBhbGxldHMlMkMlMjBlbmhhbmNpbmclMjB0aGUlMjBpbmR1c3RyaWFsJTIwc2V0dGluZy4lMjBUaGUlMjBjYW1lcmElMjByZW1haW5zJTIwc3RhdGljJTJDJTIwY2FwdHVyaW5nJTIwdGhlJTIwcm9ib3QncyUyMHBvaXNlZCUyMHN0YW5jZSUyMGFtaWRzdCUyMHRoZSUyMG9yZGVybHklMjBlbnZpcm9ubWVudCUyQyUyMHdpdGglMjBhJTIwc2hhbGxvdyUyMGRlcHRoJTIwb2YlMjBmaWVsZCUyMHRoYXQlMjBrZWVwcyUyMHRoZSUyMGZvY3VzJTIwb24lMjB0aGUlMjByb2JvdCUyMHdoaWxlJTIwc3VidGx5JTIwYmx1cnJpbmclMjB0aGUlMjBiYWNrZ3JvdW5kJTIwZm9yJTIwYSUyMGNpbmVtYXRpYyUyMGVmZmVjdC4lMjIlMEElMEFvdXRwdXQlMjAlM0QlMjBwaXBlKHByb21wdCUzRHByb21wdCkuZnJhbWVzJTVCMCU1RCUwQWV4cG9ydF90b192aWRlbyhvdXRwdXQlMkMlMjAlMjJvdXRwdXQubXA0JTIyJTJDJTIwZnBzJTNEMzAp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CosmosTextToWorldPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video
<span class="hljs-meta">&gt;&gt;&gt; </span>model_id = <span class="hljs-string">&quot;nvidia/Cosmos-1.0-Diffusion-7B-Text2World&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = CosmosTextToWorldPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>prompt = <span class="hljs-string">&quot;A sleek, humanoid robot stands in a vast warehouse filled with neatly stacked cardboard boxes on industrial shelves. The robot&#x27;s metallic body gleams under the bright, even lighting, highlighting its futuristic design and intricate joints. A glowing blue light emanates from its chest, adding a touch of advanced technology. The background is dominated by rows of boxes, suggesting a highly organized storage system. The floor is lined with wooden pallets, enhancing the industrial setting. The camera remains static, capturing the robot&#x27;s poised stance amidst the orderly environment, with a shallow depth of field that keeps the focus on the robot while subtly blurring the background for a cinematic effect.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>output = pipe(prompt=prompt).frames[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>export_to_video(output, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),{c(){t=c("p"),t.textContent=v,p=i(),f(l.$$.fragment)},l(o){t=m(o,"P",{"data-svelte-h":!0}),w(t)!=="svelte-kvfsh7"&&(t.textContent=v),p=r(o),h(l.$$.fragment,o)},m(o,g){d(o,t,g),d(o,p,g),_(l,o,g),u=!0},p:Ce,i(o){u||(b(l.$$.fragment,o),u=!0)},o(o){T(l.$$.fragment,o),u=!1},d(o){o&&(s(t),s(p)),y(l,o)}}}function go(Z){let t,v="Image conditioning:",p,l,u;return l=new Fe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ29zbW9zVmlkZW9Ub1dvcmxkUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF9pbWFnZSUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIybnZpZGlhJTJGQ29zbW9zLTEuMC1EaWZmdXNpb24tN0ItVmlkZW8yV29ybGQlMjIlMEFwaXBlJTIwJTNEJTIwQ29zbW9zVmlkZW9Ub1dvcmxkUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcHJvbXB0JTIwJTNEJTIwJTIyVGhlJTIwdmlkZW8lMjBkZXBpY3RzJTIwYSUyMGxvbmclMkMlMjBzdHJhaWdodCUyMGhpZ2h3YXklMjBzdHJldGNoaW5nJTIwaW50byUyMHRoZSUyMGRpc3RhbmNlJTJDJTIwZmxhbmtlZCUyMGJ5JTIwbWV0YWwlMjBndWFyZHJhaWxzLiUyMFRoZSUyMHJvYWQlMjBpcyUyMGRpdmlkZWQlMjBpbnRvJTIwbXVsdGlwbGUlMjBsYW5lcyUyQyUyMHdpdGglMjBhJTIwZmV3JTIwdmVoaWNsZXMlMjB2aXNpYmxlJTIwaW4lMjB0aGUlMjBmYXIlMjBkaXN0YW5jZS4lMjBUaGUlMjBzdXJyb3VuZGluZyUyMGxhbmRzY2FwZSUyMGZlYXR1cmVzJTIwZHJ5JTJDJTIwZ3Jhc3N5JTIwZmllbGRzJTIwb24lMjBvbmUlMjBzaWRlJTIwYW5kJTIwcm9sbGluZyUyMGhpbGxzJTIwb24lMjB0aGUlMjBvdGhlci4lMjBUaGUlMjBza3klMjBpcyUyMG1vc3RseSUyMGNsZWFyJTIwd2l0aCUyMGElMjBmZXclMjBzY2F0dGVyZWQlMjBjbG91ZHMlMkMlMjBzdWdnZXN0aW5nJTIwYSUyMGJyaWdodCUyQyUyMHN1bm55JTIwZGF5LiUyMiUwQWltYWdlJTIwJTNEJTIwbG9hZF9pbWFnZSglMEElMjAlMjAlMjAlMjAlMjJodHRwcyUzQSUyRiUyRmh1Z2dpbmdmYWNlLmNvJTJGZGF0YXNldHMlMkZodWdnaW5nZmFjZSUyRmRvY3VtZW50YXRpb24taW1hZ2VzJTJGcmVzb2x2ZSUyRm1haW4lMkZkaWZmdXNlcnMlMkZjb3Ntb3MlMkZjb3Ntb3MtdmlkZW8yd29ybGQtaW5wdXQuanBnJTIyJTBBKSUwQSUwQXZpZGVvJTIwJTNEJTIwcGlwZShpbWFnZSUzRGltYWdlJTJDJTIwcHJvbXB0JTNEcHJvbXB0KS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKHZpZGVvJTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDMwKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CosmosVideoToWorldPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_image
<span class="hljs-meta">&gt;&gt;&gt; </span>model_id = <span class="hljs-string">&quot;nvidia/Cosmos-1.0-Diffusion-7B-Video2World&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = CosmosVideoToWorldPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>prompt = <span class="hljs-string">&quot;The video depicts a long, straight highway stretching into the distance, flanked by metal guardrails. The road is divided into multiple lanes, with a few vehicles visible in the far distance. The surrounding landscape features dry, grassy fields on one side and rolling hills on the other. The sky is mostly clear with a few scattered clouds, suggesting a bright, sunny day.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = load_image(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input.jpg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>video = pipe(image=image, prompt=prompt).frames[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>export_to_video(video, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),{c(){t=c("p"),t.textContent=v,p=i(),f(l.$$.fragment)},l(o){t=m(o,"P",{"data-svelte-h":!0}),w(t)!=="svelte-187s8ri"&&(t.textContent=v),p=r(o),h(l.$$.fragment,o)},m(o,g){d(o,t,g),d(o,p,g),_(l,o,g),u=!0},p:Ce,i(o){u||(b(l.$$.fragment,o),u=!0)},o(o){T(l.$$.fragment,o),u=!1},d(o){o&&(s(t),s(p)),y(l,o)}}}function fo(Z){let t,v="Video conditioning:",p,l,u;return l=new Fe({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwZGlmZnVzZXJzJTIwaW1wb3J0JTIwQ29zbW9zVmlkZW9Ub1dvcmxkUGlwZWxpbmUlMEFmcm9tJTIwZGlmZnVzZXJzLnV0aWxzJTIwaW1wb3J0JTIwZXhwb3J0X3RvX3ZpZGVvJTJDJTIwbG9hZF92aWRlbyUwQSUwQW1vZGVsX2lkJTIwJTNEJTIwJTIybnZpZGlhJTJGQ29zbW9zLTEuMC1EaWZmdXNpb24tN0ItVmlkZW8yV29ybGQlMjIlMEFwaXBlJTIwJTNEJTIwQ29zbW9zVmlkZW9Ub1dvcmxkUGlwZWxpbmUuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2lkJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiklMEFwaXBlLnRyYW5zZm9ybWVyJTIwJTNEJTIwdG9yY2guY29tcGlsZShwaXBlLnRyYW5zZm9ybWVyKSUwQXBpcGUudG8oJTIyY3VkYSUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJUaGUlMjB2aWRlbyUyMGRlcGljdHMlMjBhJTIwd2luZGluZyUyMG1vdW50YWluJTIwcm9hZCUyMGNvdmVyZWQlMjBpbiUyMHNub3clMkMlMjB3aXRoJTIwYSUyMHNpbmdsZSUyMHZlaGljbGUlMjB0cmF2ZWxpbmclMjBhbG9uZyUyMGl0LiUyMFRoZSUyMHJvYWQlMjBpcyUyMGZsYW5rZWQlMjBieSUyMHN0ZWVwJTJDJTIwcm9ja3klMjBjbGlmZnMlMjBhbmQlMjBzcGFyc2UlMjB2ZWdldGF0aW9uLiUyMFRoZSUyMGxhbmRzY2FwZSUyMGlzJTIwY2hhcmFjdGVyaXplZCUyMGJ5JTIwcnVnZ2VkJTIwdGVycmFpbiUyMGFuZCUyMGElMjByaXZlciUyMHZpc2libGUlMjBpbiUyMHRoZSUyMGRpc3RhbmNlLiUyMFRoZSUyMHNjZW5lJTIwY2FwdHVyZXMlMjB0aGUlMjBzb2xpdHVkZSUyMGFuZCUyMGJlYXV0eSUyMG9mJTIwYSUyMHdpbnRlciUyMGRyaXZlJTIwdGhyb3VnaCUyMGElMjBtb3VudGFpbm91cyUyMHJlZ2lvbi4lMjIlMEF2aWRlbyUyMCUzRCUyMGxvYWRfdmlkZW8oJTBBJTIwJTIwJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGZGlmZnVzZXJzJTJGY29zbW9zJTJGY29zbW9zLXZpZGVvMndvcmxkLWlucHV0LXZpZC5tcDQlMjIlMEEpJTVCJTBBJTIwJTIwJTIwJTIwJTNBMjElMEElNUQlMjAlMjAlMjMlMjBUaGlzJTIwZXhhbXBsZSUyMHVzZXMlMjBvbmx5JTIwdGhlJTIwZmlyc3QlMjAyMSUyMGZyYW1lcyUwQSUwQXZpZGVvJTIwJTNEJTIwcGlwZSh2aWRlbyUzRHZpZGVvJTJDJTIwcHJvbXB0JTNEcHJvbXB0KS5mcmFtZXMlNUIwJTVEJTBBZXhwb3J0X3RvX3ZpZGVvKHZpZGVvJTJDJTIwJTIyb3V0cHV0Lm1wNCUyMiUyQyUyMGZwcyUzRDMwKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> CosmosVideoToWorldPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> export_to_video, load_video
<span class="hljs-meta">&gt;&gt;&gt; </span>model_id = <span class="hljs-string">&quot;nvidia/Cosmos-1.0-Diffusion-7B-Video2World&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = CosmosVideoToWorldPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.transformer = torch.<span class="hljs-built_in">compile</span>(pipe.transformer)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>prompt = <span class="hljs-string">&quot;The video depicts a winding mountain road covered in snow, with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the solitude and beauty of a winter drive through a mountainous region.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>video = load_video(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input-vid.mp4&quot;</span>
<span class="hljs-meta">... </span>)[
<span class="hljs-meta">... </span> :<span class="hljs-number">21</span>
<span class="hljs-meta">... </span>] <span class="hljs-comment"># This example uses only the first 21 frames</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>video = pipe(video=video, prompt=prompt).frames[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>export_to_video(video, <span class="hljs-string">&quot;output.mp4&quot;</span>, fps=<span class="hljs-number">30</span>)`,wrap:!1}}),{c(){t=c("p"),t.textContent=v,p=i(),f(l.$$.fragment)},l(o){t=m(o,"P",{"data-svelte-h":!0}),w(t)!=="svelte-8d7hws"&&(t.textContent=v),p=r(o),h(l.$$.fragment,o)},m(o,g){d(o,t,g),d(o,p,g),_(l,o,g),u=!0},p:Ce,i(o){u||(b(l.$$.fragment,o),u=!0)},o(o){T(l.$$.fragment,o),u=!1},d(o){o&&(s(t),s(p)),y(l,o)}}}function ho(Z){let t,v,p,l,u,o,g,Ye='<a href="https://huggingface.co/papers/2501.03575" rel="nofollow">Cosmos World Foundation Model Platform for Physical AI</a> by NVIDIA.',me,X,Qe='<em>Physical AI needs to be trained digitally first. It needs a digital twin of itself, the policy model, and a digital twin of the world, the world model. In this paper, we present the Cosmos World Foundation Model Platform to help developers build customized world models for their Physical AI setups. We position a world foundation model as a general-purpose world model that can be fine-tuned into customized world models for downstream applications. Our platform covers a video curation pipeline, pre-trained world foundation models, examples of post-training of pre-trained world foundation models, and video tokenizers. To help Physical AI builders solve the most critical problems of our society, we make our platform open-source and our models open-weight with permissive licenses available via <a href="https://github.com/NVIDIA/Cosmos" rel="nofollow">https://github.com/NVIDIA/Cosmos</a>.</em>',ue,V,ge,L,fe,M,E,We,oe,Se='Pipeline for text-to-video generation using <a href="https://github.com/NVIDIA/Cosmos" rel="nofollow">Cosmos</a>.',Ze,te,De=`This model inherits from <a href="/docs/diffusers/pr_11438/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,je,j,H,Ge,ne,qe="The call function to the pipeline for generation.",ke,P,Ie,B,F,Ue,se,Ae="Encodes the prompt into text encoder hidden states.",he,Y,_e,x,Q,$e,ie,Oe='Pipeline for image-to-video and video-to-video generation using <a href="https://github.com/NVIDIA/Cosmos" rel="nofollow">Cosmos</a>.',Ve,re,Ke=`This model inherits from <a href="/docs/diffusers/pr_11438/en/api/pipelines/overview#diffusers.DiffusionPipeline">DiffusionPipeline</a>. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).`,Pe,J,S,Be,ae,eo="The call function to the pipeline for generation.",ze,le,oo="Examples:",Ne,z,Re,N,Xe,R,D,Le,de,to="Encodes the prompt into text encoder hidden states.",be,q,Te,k,A,Ee,pe,no="Output class for Cosmos pipelines.",ye,O,ve,ce,we;return u=new Je({props:{title:"Cosmos",local:"cosmos",headingTag:"h1"}}),V=new po({props:{$$slots:{default:[mo]},$$scope:{ctx:Z}}}),L=new Je({props:{title:"CosmosTextToWorldPipeline",local:"diffusers.CosmosTextToWorldPipeline",headingTag:"h2"}}),E=new ee({props:{name:"class diffusers.CosmosTextToWorldPipeline",anchor:"diffusers.CosmosTextToWorldPipeline",parameters:[{name:"text_encoder",val:": T5EncoderModel"},{name:"tokenizer",val:": T5TokenizerFast"},{name:"transformer",val:": CosmosTransformer3DModel"},{name:"vae",val:": AutoencoderKLCosmos"},{name:"scheduler",val:": EDMEulerScheduler"},{name:"safety_checker",val:": CosmosSafetyChecker = None"}],parametersDescription:[{anchor:"diffusers.CosmosTextToWorldPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) &#x2014;
Frozen text-encoder. Cosmos uses
<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>; specifically the
<a href="https://huggingface.co/google-t5/t5-11b" rel="nofollow">t5-11b</a> variant.`,name:"text_encoder"},{anchor:"diffusers.CosmosTextToWorldPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) &#x2014;
Tokenizer of class
<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer" rel="nofollow">T5Tokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.CosmosTextToWorldPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_11438/en/api/models/cosmos_transformer3d#diffusers.CosmosTransformer3DModel">CosmosTransformer3DModel</a>) &#x2014;
Conditional Transformer to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.CosmosTextToWorldPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11438/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) &#x2014;
A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.CosmosTextToWorldPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11438/en/api/models/autoencoderkl_cosmos#diffusers.AutoencoderKLCosmos">AutoencoderKLCosmos</a>) &#x2014;
Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py#L132"}}),H=new ee({props:{name:"__call__",anchor:"diffusers.CosmosTextToWorldPipeline.__call__",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"height",val:": int = 704"},{name:"width",val:": int = 1280"},{name:"num_frames",val:": int = 121"},{name:"num_inference_steps",val:": int = 36"},{name:"guidance_scale",val:": float = 7.0"},{name:"fps",val:": int = 30"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 512"}],parametersDescription:[{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
instead.`,name:"prompt"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>720</code>) &#x2014;
The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>1280</code>) &#x2014;
The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>129</code>) &#x2014;
The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>6.0</code>) &#x2014;
Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>.
<code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen
Paper</a>. Guidance scale is enabled by setting <code>guidance_scale &gt; 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.fps",description:`<strong>fps</strong> (<code>int</code>, defaults to <code>30</code>) &#x2014;
The frames per second of the generated video.`,name:"fps"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
generation deterministic.`,name:"generator"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be &quot;&quot;. If not
provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;pil&quot;</code>) &#x2014;
The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <code>CosmosPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) &#x2014;
A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of
each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a
list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) &#x2014;
The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py#L393",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>If <code>return_dict</code> is <code>True</code>, <code>CosmosPipelineOutput</code> is returned, otherwise a <code>tuple</code> is returned where
the first element is a list with the generated images and the second element is a list of <code>bool</code>s
indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>~CosmosPipelineOutput</code> or <code>tuple</code></p>
`}}),P=new He({props:{anchor:"diffusers.CosmosTextToWorldPipeline.__call__.example",$$slots:{default:[uo]},$$scope:{ctx:Z}}}),F=new ee({props:{name:"encode_prompt",anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 512"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
prompt to be encoded`,name:"prompt"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts not to guide the image generation. If not defined, one has to pass
<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.device",description:`<strong>device</strong> &#x2014; (<code>torch.device</code>, <em>optional</em>):
torch device`,name:"device"},{anchor:"diffusers.CosmosTextToWorldPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> &#x2014; (<code>torch.dtype</code>, <em>optional</em>):
torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py#L231"}}),Y=new Je({props:{title:"CosmosVideoToWorldPipeline",local:"diffusers.CosmosVideoToWorldPipeline",headingTag:"h2"}}),Q=new ee({props:{name:"class diffusers.CosmosVideoToWorldPipeline",anchor:"diffusers.CosmosVideoToWorldPipeline",parameters:[{name:"text_encoder",val:": T5EncoderModel"},{name:"tokenizer",val:": T5TokenizerFast"},{name:"transformer",val:": CosmosTransformer3DModel"},{name:"vae",val:": AutoencoderKLCosmos"},{name:"scheduler",val:": EDMEulerScheduler"},{name:"safety_checker",val:": CosmosSafetyChecker = None"}],parametersDescription:[{anchor:"diffusers.CosmosVideoToWorldPipeline.text_encoder",description:`<strong>text_encoder</strong> (<code>T5EncoderModel</code>) &#x2014;
Frozen text-encoder. Cosmos uses
<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel" rel="nofollow">T5</a>; specifically the
<a href="https://huggingface.co/google-t5/t5-11b" rel="nofollow">t5-11b</a> variant.`,name:"text_encoder"},{anchor:"diffusers.CosmosVideoToWorldPipeline.tokenizer",description:`<strong>tokenizer</strong> (<code>T5TokenizerFast</code>) &#x2014;
Tokenizer of class
<a href="https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer" rel="nofollow">T5Tokenizer</a>.`,name:"tokenizer"},{anchor:"diffusers.CosmosVideoToWorldPipeline.transformer",description:`<strong>transformer</strong> (<a href="/docs/diffusers/pr_11438/en/api/models/cosmos_transformer3d#diffusers.CosmosTransformer3DModel">CosmosTransformer3DModel</a>) &#x2014;
Conditional Transformer to denoise the encoded image latents.`,name:"transformer"},{anchor:"diffusers.CosmosVideoToWorldPipeline.scheduler",description:`<strong>scheduler</strong> (<a href="/docs/diffusers/pr_11438/en/api/schedulers/flow_match_euler_discrete#diffusers.FlowMatchEulerDiscreteScheduler">FlowMatchEulerDiscreteScheduler</a>) &#x2014;
A scheduler to be used in combination with <code>transformer</code> to denoise the encoded image latents.`,name:"scheduler"},{anchor:"diffusers.CosmosVideoToWorldPipeline.vae",description:`<strong>vae</strong> (<a href="/docs/diffusers/pr_11438/en/api/models/autoencoderkl_cosmos#diffusers.AutoencoderKLCosmos">AutoencoderKLCosmos</a>) &#x2014;
Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.`,name:"vae"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py#L175"}}),S=new ee({props:{name:"__call__",anchor:"diffusers.CosmosVideoToWorldPipeline.__call__",parameters:[{name:"image",val:": typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor]] = None"},{name:"video",val:": typing.List[typing.Union[PIL.Image.Image, numpy.ndarray, torch.Tensor, typing.List[PIL.Image.Image], typing.List[numpy.ndarray], typing.List[torch.Tensor]]] = None"},{name:"prompt",val:": typing.Union[str, typing.List[str]] = None"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"height",val:": int = 704"},{name:"width",val:": int = 1280"},{name:"num_frames",val:": int = 121"},{name:"num_inference_steps",val:": int = 36"},{name:"guidance_scale",val:": float = 7.0"},{name:"input_frames_guidance",val:": bool = False"},{name:"augment_sigma",val:": float = 0.001"},{name:"fps",val:": int = 30"},{name:"num_videos_per_prompt",val:": typing.Optional[int] = 1"},{name:"generator",val:": typing.Union[torch._C.Generator, typing.List[torch._C.Generator], NoneType] = None"},{name:"latents",val:": typing.Optional[torch.Tensor] = None"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"output_type",val:": typing.Optional[str] = 'pil'"},{name:"return_dict",val:": bool = True"},{name:"callback_on_step_end",val:": typing.Union[typing.Callable[[int, int, typing.Dict], NoneType], diffusers.callbacks.PipelineCallback, diffusers.callbacks.MultiPipelineCallbacks, NoneType] = None"},{name:"callback_on_step_end_tensor_inputs",val:": typing.List[str] = ['latents']"},{name:"max_sequence_length",val:": int = 512"}],parametersDescription:[{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts to guide the image generation. If not defined, one has to pass <code>prompt_embeds</code>.
instead.`,name:"prompt"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.height",description:`<strong>height</strong> (<code>int</code>, defaults to <code>720</code>) &#x2014;
The height in pixels of the generated image.`,name:"height"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.width",description:`<strong>width</strong> (<code>int</code>, defaults to <code>1280</code>) &#x2014;
The width in pixels of the generated image.`,name:"width"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.num_frames",description:`<strong>num_frames</strong> (<code>int</code>, defaults to <code>129</code>) &#x2014;
The number of frames in the generated video.`,name:"num_frames"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.num_inference_steps",description:`<strong>num_inference_steps</strong> (<code>int</code>, defaults to <code>50</code>) &#x2014;
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference.`,name:"num_inference_steps"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.guidance_scale",description:`<strong>guidance_scale</strong> (<code>float</code>, defaults to <code>6.0</code>) &#x2014;
Guidance scale as defined in <a href="https://arxiv.org/abs/2207.12598" rel="nofollow">Classifier-Free Diffusion Guidance</a>.
<code>guidance_scale</code> is defined as <code>w</code> of equation 2. of <a href="https://arxiv.org/pdf/2205.11487.pdf" rel="nofollow">Imagen
Paper</a>. Guidance scale is enabled by setting <code>guidance_scale &gt; 1</code>.`,name:"guidance_scale"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.fps",description:`<strong>fps</strong> (<code>int</code>, defaults to <code>30</code>) &#x2014;
The frames per second of the generated video.`,name:"fps"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
The number of images to generate per prompt.`,name:"num_videos_per_prompt"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.generator",description:`<strong>generator</strong> (<code>torch.Generator</code> or <code>List[torch.Generator]</code>, <em>optional</em>) &#x2014;
A <a href="https://pytorch.org/docs/stable/generated/torch.Generator.html" rel="nofollow"><code>torch.Generator</code></a> to make
generation deterministic.`,name:"generator"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.latents",description:`<strong>latents</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random <code>generator</code>.`,name:"latents"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.FloatTensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. For PixArt-Sigma this negative prompt should be &quot;&quot;. If not
provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.output_type",description:`<strong>output_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;pil&quot;</code>) &#x2014;
The output format of the generated image. Choose between <code>PIL.Image</code> or <code>np.array</code>.`,name:"output_type"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether or not to return a <code>CosmosPipelineOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.clip_skip",description:`<strong>clip_skip</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.`,name:"clip_skip"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.callback_on_step_end",description:`<strong>callback_on_step_end</strong> (<code>Callable</code>, <code>PipelineCallback</code>, <code>MultiPipelineCallbacks</code>, <em>optional</em>) &#x2014;
A function or a subclass of <code>PipelineCallback</code> or <code>MultiPipelineCallbacks</code> that is called at the end of
each denoising step during the inference. with the following arguments: <code>callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict)</code>. <code>callback_kwargs</code> will include a
list of all tensors as specified by <code>callback_on_step_end_tensor_inputs</code>.`,name:"callback_on_step_end"},{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.callback_on_step_end_tensor_inputs",description:`<strong>callback_on_step_end_tensor_inputs</strong> (<code>List</code>, <em>optional</em>) &#x2014;
The list of tensor inputs for the <code>callback_on_step_end</code> function. The tensors specified in the list
will be passed as <code>callback_kwargs</code> argument. You will only be able to include variables listed in the
<code>._callback_tensor_inputs</code> attribute of your pipeline class.`,name:"callback_on_step_end_tensor_inputs"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py#L504",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>If <code>return_dict</code> is <code>True</code>, <code>CosmosPipelineOutput</code> is returned, otherwise a <code>tuple</code> is returned where
the first element is a list with the generated images and the second element is a list of <code>bool</code>s
indicating whether the corresponding generated image contains “not-safe-for-work” (nsfw) content.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>~CosmosPipelineOutput</code> or <code>tuple</code></p>
`}}),z=new He({props:{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.example",$$slots:{default:[go]},$$scope:{ctx:Z}}}),N=new He({props:{anchor:"diffusers.CosmosVideoToWorldPipeline.__call__.example-2",$$slots:{default:[fo]},$$scope:{ctx:Z}}}),D=new ee({props:{name:"encode_prompt",anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt",parameters:[{name:"prompt",val:": typing.Union[str, typing.List[str]]"},{name:"negative_prompt",val:": typing.Union[str, typing.List[str], NoneType] = None"},{name:"do_classifier_free_guidance",val:": bool = True"},{name:"num_videos_per_prompt",val:": int = 1"},{name:"prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"negative_prompt_embeds",val:": typing.Optional[torch.Tensor] = None"},{name:"max_sequence_length",val:": int = 512"},{name:"device",val:": typing.Optional[torch.device] = None"},{name:"dtype",val:": typing.Optional[torch.dtype] = None"}],parametersDescription:[{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.prompt",description:`<strong>prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
prompt to be encoded`,name:"prompt"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.negative_prompt",description:`<strong>negative_prompt</strong> (<code>str</code> or <code>List[str]</code>, <em>optional</em>) &#x2014;
The prompt or prompts not to guide the image generation. If not defined, one has to pass
<code>negative_prompt_embeds</code> instead. Ignored when not using guidance (i.e., ignored if <code>guidance_scale</code> is
less than <code>1</code>).`,name:"negative_prompt"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.do_classifier_free_guidance",description:`<strong>do_classifier_free_guidance</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to use classifier free guidance or not.`,name:"do_classifier_free_guidance"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.num_videos_per_prompt",description:`<strong>num_videos_per_prompt</strong> (<code>int</code>, <em>optional</em>, defaults to 1) &#x2014;
Number of videos that should be generated per prompt. torch device to place the resulting embeddings on`,name:"num_videos_per_prompt"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.prompt_embeds",description:`<strong>prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt weighting. If not
provided, text embeddings will be generated from <code>prompt</code> input argument.`,name:"prompt_embeds"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.negative_prompt_embeds",description:`<strong>negative_prompt_embeds</strong> (<code>torch.Tensor</code>, <em>optional</em>) &#x2014;
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, <em>e.g.</em> prompt
weighting. If not provided, negative_prompt_embeds will be generated from <code>negative_prompt</code> input
argument.`,name:"negative_prompt_embeds"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.device",description:`<strong>device</strong> &#x2014; (<code>torch.device</code>, <em>optional</em>):
torch device`,name:"device"},{anchor:"diffusers.CosmosVideoToWorldPipeline.encode_prompt.dtype",description:`<strong>dtype</strong> &#x2014; (<code>torch.dtype</code>, <em>optional</em>):
torch dtype`,name:"dtype"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py#L276"}}),q=new Je({props:{title:"CosmosPipelineOutput",local:"diffusers.pipelines.cosmos.pipeline_output.CosmosPipelineOutput",headingTag:"h2"}}),A=new ee({props:{name:"class diffusers.pipelines.cosmos.pipeline_output.CosmosPipelineOutput",anchor:"diffusers.pipelines.cosmos.pipeline_output.CosmosPipelineOutput",parameters:[{name:"frames",val:": Tensor"}],parametersDescription:[{anchor:"diffusers.pipelines.cosmos.pipeline_output.CosmosPipelineOutput.frames",description:`<strong>frames</strong> (<code>torch.Tensor</code>, <code>np.ndarray</code>, or List[List[PIL.Image.Image]]) &#x2014;
List of video outputs - It can be a nested list of length <code>batch_size,</code> with each sub-list containing
denoised PIL image sequences of length <code>num_frames.</code> It can also be a NumPy array or Torch tensor of shape
<code>(batch_size, num_frames, channels, height, width)</code>.`,name:"frames"}],source:"https://github.com/huggingface/diffusers/blob/vr_11438/src/diffusers/pipelines/cosmos/pipeline_output.py#L8"}}),O=new co({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/api/pipelines/cosmos.md"}}),{c(){t=c("meta"),v=i(),p=c("p"),l=i(),f(u.$$.fragment),o=i(),g=c("p"),g.innerHTML=Ye,me=i(),X=c("p"),X.innerHTML=Qe,ue=i(),f(V.$$.fragment),ge=i(),f(L.$$.fragment),fe=i(),M=c("div"),f(E.$$.fragment),We=i(),oe=c("p"),oe.innerHTML=Se,Ze=i(),te=c("p"),te.innerHTML=De,je=i(),j=c("div"),f(H.$$.fragment),Ge=i(),ne=c("p"),ne.textContent=qe,ke=i(),f(P.$$.fragment),Ie=i(),B=c("div"),f(F.$$.fragment),Ue=i(),se=c("p"),se.textContent=Ae,he=i(),f(Y.$$.fragment),_e=i(),x=c("div"),f(Q.$$.fragment),$e=i(),ie=c("p"),ie.innerHTML=Oe,Ve=i(),re=c("p"),re.innerHTML=Ke,Pe=i(),J=c("div"),f(S.$$.fragment),Be=i(),ae=c("p"),ae.textContent=eo,ze=i(),le=c("p"),le.textContent=oo,Ne=i(),f(z.$$.fragment),Re=i(),f(N.$$.fragment),Xe=i(),R=c("div"),f(D.$$.fragment),Le=i(),de=c("p"),de.textContent=to,be=i(),f(q.$$.fragment),Te=i(),k=c("div"),f(A.$$.fragment),Ee=i(),pe=c("p"),pe.textContent=no,ye=i(),f(O.$$.fragment),ve=i(),ce=c("p"),this.h()},l(e){const n=lo("svelte-u9bgzb",document.head);t=m(n,"META",{name:!0,content:!0}),n.forEach(s),v=r(e),p=m(e,"P",{}),U(p).forEach(s),l=r(e),h(u.$$.fragment,e),o=r(e),g=m(e,"P",{"data-svelte-h":!0}),w(g)!=="svelte-1vce33m"&&(g.innerHTML=Ye),me=r(e),X=m(e,"P",{"data-svelte-h":!0}),w(X)!=="svelte-191cpbh"&&(X.innerHTML=Qe),ue=r(e),h(V.$$.fragment,e),ge=r(e),h(L.$$.fragment,e),fe=r(e),M=m(e,"DIV",{class:!0});var C=U(M);h(E.$$.fragment,C),We=r(C),oe=m(C,"P",{"data-svelte-h":!0}),w(oe)!=="svelte-hrqmiv"&&(oe.innerHTML=Se),Ze=r(C),te=m(C,"P",{"data-svelte-h":!0}),w(te)!=="svelte-1qe5m4v"&&(te.innerHTML=De),je=r(C),j=m(C,"DIV",{class:!0});var I=U(j);h(H.$$.fragment,I),Ge=r(I),ne=m(I,"P",{"data-svelte-h":!0}),w(ne)!=="svelte-50j04k"&&(ne.textContent=qe),ke=r(I),h(P.$$.fragment,I),I.forEach(s),Ie=r(C),B=m(C,"DIV",{class:!0});var K=U(B);h(F.$$.fragment,K),Ue=r(K),se=m(K,"P",{"data-svelte-h":!0}),w(se)!=="svelte-16q0ax1"&&(se.textContent=Ae),K.forEach(s),C.forEach(s),he=r(e),h(Y.$$.fragment,e),_e=r(e),x=m(e,"DIV",{class:!0});var W=U(x);h(Q.$$.fragment,W),$e=r(W),ie=m(W,"P",{"data-svelte-h":!0}),w(ie)!=="svelte-v4g4y1"&&(ie.innerHTML=Oe),Ve=r(W),re=m(W,"P",{"data-svelte-h":!0}),w(re)!=="svelte-1qe5m4v"&&(re.innerHTML=Ke),Pe=r(W),J=m(W,"DIV",{class:!0});var G=U(J);h(S.$$.fragment,G),Be=r(G),ae=m(G,"P",{"data-svelte-h":!0}),w(ae)!=="svelte-50j04k"&&(ae.textContent=eo),ze=r(G),le=m(G,"P",{"data-svelte-h":!0}),w(le)!=="svelte-kvfsh7"&&(le.textContent=oo),Ne=r(G),h(z.$$.fragment,G),Re=r(G),h(N.$$.fragment,G),G.forEach(s),Xe=r(W),R=m(W,"DIV",{class:!0});var Me=U(R);h(D.$$.fragment,Me),Le=r(Me),de=m(Me,"P",{"data-svelte-h":!0}),w(de)!=="svelte-16q0ax1"&&(de.textContent=to),Me.forEach(s),W.forEach(s),be=r(e),h(q.$$.fragment,e),Te=r(e),k=m(e,"DIV",{class:!0});var xe=U(k);h(A.$$.fragment,xe),Ee=r(xe),pe=m(xe,"P",{"data-svelte-h":!0}),w(pe)!=="svelte-19ihrgn"&&(pe.textContent=no),xe.forEach(s),ye=r(e),h(O.$$.fragment,e),ve=r(e),ce=m(e,"P",{}),U(ce).forEach(s),this.h()},h(){$(t,"name","hf:doc:metadata"),$(t,"content",_o),$(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,n){a(document.head,t),d(e,v,n),d(e,p,n),d(e,l,n),_(u,e,n),d(e,o,n),d(e,g,n),d(e,me,n),d(e,X,n),d(e,ue,n),_(V,e,n),d(e,ge,n),_(L,e,n),d(e,fe,n),d(e,M,n),_(E,M,null),a(M,We),a(M,oe),a(M,Ze),a(M,te),a(M,je),a(M,j),_(H,j,null),a(j,Ge),a(j,ne),a(j,ke),_(P,j,null),a(M,Ie),a(M,B),_(F,B,null),a(B,Ue),a(B,se),d(e,he,n),_(Y,e,n),d(e,_e,n),d(e,x,n),_(Q,x,null),a(x,$e),a(x,ie),a(x,Ve),a(x,re),a(x,Pe),a(x,J),_(S,J,null),a(J,Be),a(J,ae),a(J,ze),a(J,le),a(J,Ne),_(z,J,null),a(J,Re),_(N,J,null),a(x,Xe),a(x,R),_(D,R,null),a(R,Le),a(R,de),d(e,be,n),_(q,e,n),d(e,Te,n),d(e,k,n),_(A,k,null),a(k,Ee),a(k,pe),d(e,ye,n),_(O,e,n),d(e,ve,n),d(e,ce,n),we=!0},p(e,[n]){const C={};n&2&&(C.$$scope={dirty:n,ctx:e}),V.$set(C);const I={};n&2&&(I.$$scope={dirty:n,ctx:e}),P.$set(I);const K={};n&2&&(K.$$scope={dirty:n,ctx:e}),z.$set(K);const W={};n&2&&(W.$$scope={dirty:n,ctx:e}),N.$set(W)},i(e){we||(b(u.$$.fragment,e),b(V.$$.fragment,e),b(L.$$.fragment,e),b(E.$$.fragment,e),b(H.$$.fragment,e),b(P.$$.fragment,e),b(F.$$.fragment,e),b(Y.$$.fragment,e),b(Q.$$.fragment,e),b(S.$$.fragment,e),b(z.$$.fragment,e),b(N.$$.fragment,e),b(D.$$.fragment,e),b(q.$$.fragment,e),b(A.$$.fragment,e),b(O.$$.fragment,e),we=!0)},o(e){T(u.$$.fragment,e),T(V.$$.fragment,e),T(L.$$.fragment,e),T(E.$$.fragment,e),T(H.$$.fragment,e),T(P.$$.fragment,e),T(F.$$.fragment,e),T(Y.$$.fragment,e),T(Q.$$.fragment,e),T(S.$$.fragment,e),T(z.$$.fragment,e),T(N.$$.fragment,e),T(D.$$.fragment,e),T(q.$$.fragment,e),T(A.$$.fragment,e),T(O.$$.fragment,e),we=!1},d(e){e&&(s(v),s(p),s(l),s(o),s(g),s(me),s(X),s(ue),s(ge),s(fe),s(M),s(he),s(_e),s(x),s(be),s(Te),s(k),s(ye),s(ve),s(ce)),s(t),y(u,e),y(V,e),y(L,e),y(E),y(H),y(P),y(F),y(Y,e),y(Q),y(S),y(z),y(N),y(D),y(q,e),y(A),y(O,e)}}}const _o='{"title":"Cosmos","local":"cosmos","sections":[{"title":"CosmosTextToWorldPipeline","local":"diffusers.CosmosTextToWorldPipeline","sections":[],"depth":2},{"title":"CosmosVideoToWorldPipeline","local":"diffusers.CosmosVideoToWorldPipeline","sections":[],"depth":2},{"title":"CosmosPipelineOutput","local":"diffusers.pipelines.cosmos.pipeline_output.CosmosPipelineOutput","sections":[],"depth":2}],"depth":1}';function bo(Z){return io(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Co extends ro{constructor(t){super(),ao(this,t,bo,ho,so,{})}}export{Co as component};

Xet Storage Details

Size:
52.1 kB
·
Xet hash:
13e4b9f87fa318c0534478b8bc45e5e5e927c5baa6d12bfe1cf6370143c02c14

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.