Upload LTXVideo2Pipeline
Browse files- audio_decoder/config.json +1 -1
- audio_encoder/config.json +1 -1
- audio_preprocessor/config.json +1 -1
- model_index.json +5 -1
- spatial_upsampler/config.json +1 -1
- text_encoder/config.json +1 -1
- text_encoder/generation_config.json +1 -1
- transformer/config.json +1 -1
- video_decoder/config.json +1 -1
- video_encoder/config.json +1 -1
- vocoder/config.json +1 -1
audio_decoder/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "Decoder",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"attn_resolutions": [],
|
| 5 |
"attn_type": "vanilla",
|
| 6 |
"causality_axis": "height",
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "Decoder",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"attn_resolutions": [],
|
| 5 |
"attn_type": "vanilla",
|
| 6 |
"causality_axis": "height",
|
audio_encoder/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "Encoder",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"attn_resolutions": [],
|
| 5 |
"attn_type": "vanilla",
|
| 6 |
"causality_axis": "height",
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "Encoder",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"attn_resolutions": [],
|
| 5 |
"attn_type": "vanilla",
|
| 6 |
"causality_axis": "height",
|
audio_preprocessor/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "AudioPreprocessor",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"add_ambience": true,
|
| 5 |
"add_reverb": true,
|
| 6 |
"add_room": true,
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "AudioPreprocessor",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"add_ambience": true,
|
| 5 |
"add_reverb": true,
|
| 6 |
"add_room": true,
|
model_index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "LTXVideo2Pipeline",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"audio_decoder": [
|
| 5 |
"ltx2.model.audio_vae.audio_vae",
|
| 6 |
"Decoder"
|
|
@@ -17,6 +17,10 @@
|
|
| 17 |
"ltx2.model.upsampler.model",
|
| 18 |
"LatentUpsampler"
|
| 19 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
"text_encoder": [
|
| 21 |
"ltx2.model.text_encoder.gemma.model",
|
| 22 |
"LTXTextEncoderModel"
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "LTXVideo2Pipeline",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"audio_decoder": [
|
| 5 |
"ltx2.model.audio_vae.audio_vae",
|
| 6 |
"Decoder"
|
|
|
|
| 17 |
"ltx2.model.upsampler.model",
|
| 18 |
"LatentUpsampler"
|
| 19 |
],
|
| 20 |
+
"spatial_upsampler_1_5": [
|
| 21 |
+
null,
|
| 22 |
+
null
|
| 23 |
+
],
|
| 24 |
"text_encoder": [
|
| 25 |
"ltx2.model.text_encoder.gemma.model",
|
| 26 |
"LTXTextEncoderModel"
|
spatial_upsampler/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "LatentUpsampler",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"dims": 3,
|
| 5 |
"in_channels": 128,
|
| 6 |
"mid_channels": 1024,
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "LatentUpsampler",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"dims": 3,
|
| 5 |
"in_channels": 128,
|
| 6 |
"mid_channels": 1024,
|
text_encoder/config.json
CHANGED
|
@@ -101,7 +101,7 @@
|
|
| 101 |
"use_cache": true,
|
| 102 |
"vocab_size": 262208
|
| 103 |
},
|
| 104 |
-
"transformers_version": "4.57.
|
| 105 |
"vision_config": {
|
| 106 |
"attention_dropout": 0.0,
|
| 107 |
"dtype": "bfloat16",
|
|
|
|
| 101 |
"use_cache": true,
|
| 102 |
"vocab_size": 262208
|
| 103 |
},
|
| 104 |
+
"transformers_version": "4.57.6",
|
| 105 |
"vision_config": {
|
| 106 |
"attention_dropout": 0.0,
|
| 107 |
"dtype": "bfloat16",
|
text_encoder/generation_config.json
CHANGED
|
@@ -8,5 +8,5 @@
|
|
| 8 |
],
|
| 9 |
"max_length": 1024,
|
| 10 |
"pad_token_id": 0,
|
| 11 |
-
"transformers_version": "4.57.
|
| 12 |
}
|
|
|
|
| 8 |
],
|
| 9 |
"max_length": 1024,
|
| 10 |
"pad_token_id": 0,
|
| 11 |
+
"transformers_version": "4.57.6"
|
| 12 |
}
|
transformer/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "LTXModel",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"attention_head_dim": 128,
|
| 5 |
"attention_type": "default",
|
| 6 |
"audio_attention_head_dim": 64,
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "LTXModel",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"attention_head_dim": 128,
|
| 5 |
"attention_type": "default",
|
| 6 |
"audio_attention_head_dim": 64,
|
video_decoder/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "Decoder",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"causal": false,
|
| 5 |
"convolution_dimensions": 3,
|
| 6 |
"decode_noise_scale": 0.025,
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "Decoder",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"causal": false,
|
| 5 |
"convolution_dimensions": 3,
|
| 6 |
"decode_noise_scale": 0.025,
|
video_encoder/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "Encoder",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"convolution_dimensions": 3,
|
| 5 |
"encoder_blocks": [
|
| 6 |
[
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "Encoder",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"convolution_dimensions": 3,
|
| 5 |
"encoder_blocks": [
|
| 6 |
[
|
vocoder/config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"_class_name": "Vocoder",
|
| 3 |
-
"_diffusers_version": "0.
|
| 4 |
"output_sample_rate": 24000,
|
| 5 |
"resblock": "1",
|
| 6 |
"resblock_dilation_sizes": [
|
|
|
|
| 1 |
{
|
| 2 |
"_class_name": "Vocoder",
|
| 3 |
+
"_diffusers_version": "0.37.0",
|
| 4 |
"output_sample_rate": 24000,
|
| 5 |
"resblock": "1",
|
| 6 |
"resblock_dilation_sizes": [
|