Upload SeedVRPipeline

Browse files

Files changed (5) hide show

README.md +198 -0
dit/config.json +1 -1
model_index.json +5 -1
vae/config.json +22 -12
vae/model.flashpack +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+---
+library_name: diffusers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+This is the model card of a 🧨 diffusers pipeline that has been pushed on the Hub. This model card has been automatically generated.
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]

dit/config.json CHANGED Viewed

@@ -40,7 +40,7 @@
     "mmdit_sr",
     "mmdit_sr"
   ],
-  "dtype": "bfloat16",
   "emb_dim": 18432,
   "expand_ratio": 4,
   "head_dim": 128,

     "mmdit_sr",
     "mmdit_sr"
   ],
+  "dtype": "float32",
   "emb_dim": 18432,
   "expand_ratio": 4,
   "head_dim": 128,

model_index.json CHANGED Viewed

@@ -2,9 +2,13 @@
   "_class_name": "SeedVRPipeline",
   "_diffusers_version": "0.35.2",
   "dit": [
-    "seedvr.models.dit.nadit",
     "NaDiT"
   ],
   "sampler": [
     "seedvr.common.diffusion.samplers.euler",
     "EulerSampler"

   "_class_name": "SeedVRPipeline",
   "_diffusers_version": "0.35.2",
   "dit": [
+    "dit",
     "NaDiT"
   ],
+  "embeds": [
+    "seedvr.models.embeds",
+    "PrecomputedEmbeddings"
+  ],
   "sampler": [
     "seedvr.common.diffusion.samplers.euler",
     "EulerSampler"

vae/config.json CHANGED Viewed

@@ -10,10 +10,10 @@
     512
   ],
   "down_block_types": [
-    "DownEncoderBlock3D",
-    "DownEncoderBlock3D",
-    "DownEncoderBlock3D",
-    "DownEncoderBlock3D"
   ],
   "extra_cond_dim": null,
   "force_upcast": true,
@@ -23,13 +23,23 @@
   "in_channels": 3,
   "inflation_mode": "pad",
   "latent_channels": 16,
   "layers_per_block": 2,
-  "memory_limit": null,
   "norm_num_groups": 32,
   "out_channels": 3,
   "sample_size": 32,
   "scaling_factor": 0.9152,
-  "slicing": null,
   "slicing_sample_min_size": 4,
   "slicing_up_num": 0,
   "spatial_downsample_factor": 8,
@@ -37,11 +47,11 @@
   "temporal_scale_num": 2,
   "time_receptive_field": "full",
   "up_block_types": [
-    "UpDecoderBlock3D",
-    "UpDecoderBlock3D",
-    "UpDecoderBlock3D",
-    "UpDecoderBlock3D"
   ],
-  "use_post_quant_conv": false,
-  "use_quant_conv": false
 }

     512
   ],
   "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
   ],
   "extra_cond_dim": null,
   "force_upcast": true,
   "in_channels": 3,
   "inflation_mode": "pad",
   "latent_channels": 16,
+  "latents_mean": null,
+  "latents_std": null,
   "layers_per_block": 2,
+  "memory_limit": {
+    "conv_max_mem": 0.5,
+    "norm_max_mem": 0.5
+  },
+  "mid_block_add_attention": true,
   "norm_num_groups": 32,
   "out_channels": 3,
   "sample_size": 32,
   "scaling_factor": 0.9152,
+  "shift_factor": null,
+  "slicing": {
+    "memory_device": "same",
+    "split_size": 4
+  },
   "slicing_sample_min_size": 4,
   "slicing_up_num": 0,
   "spatial_downsample_factor": 8,
   "temporal_scale_num": 2,
   "time_receptive_field": "full",
   "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
   ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
 }

vae/model.flashpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78beb228cb7b11e15eddbab9e30028b7c60484acd8ccd05d196e8ebe7bc7ed85
-size 1002618763

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9ff6bbe5feb92ae3fbe79b826afbf1dcb26ec77b20faabce9c90515c05026e6
+size 501322374