Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

x-scene-video_336x600-E70/can_bus_embedder/can_bus_embedder_model.bin +3 -0
x-scene-video_336x600-E70/controlnet/config.json +129 -0
x-scene-video_336x600-E70/controlnet/diffusion_pytorch_model.bin +3 -0
x-scene-video_336x600-E70/scene_embedder/scene_embedder_model.bin +3 -0
x-scene-video_336x600-E70/unet/config.json +112 -0
x-scene-video_336x600-E70/unet/diffusion_pytorch_model.bin +3 -0

x-scene-video_336x600-E70/can_bus_embedder/can_bus_embedder_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a42242d4c96cf857930eaff97ae8dae3d5555e13b858af13acd0e20d5aa0d24d
+size 2132495

x-scene-video_336x600-E70/controlnet/config.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "_class_name": "BEVControlNetModel",
+  "_diffusers_version": "0.17.1",
+  "act_fn": "silu",
+  "attention_head_dim": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "bbox_embedder_cls": "xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding",
+  "bbox_embedder_param": {
+    "class_token_dim": 1024,
+    "embedder_num_freq": 4,
+    "minmax_normalize": false,
+    "mode": "all-xyz",
+    "n_classes": 10,
+    "proj_dims": [
+      1024,
+      512,
+      512,
+      1024
+    ],
+    "trainable_class_token": false,
+    "use_text_encoder_init": true
+  },
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "cam_embedder_param": {
+    "include_input": true,
+    "input_dims": 3,
+    "log_sampling": true,
+    "num_freqs": 4
+  },
+  "camera_in_dim": 189,
+  "camera_out_dim": 1024,
+  "canvas_conditioning_channels": 14,
+  "canvas_size": [
+    14,
+    224,
+    400
+  ],
+  "class_embed_type": null,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "drop_cam_num": 6,
+  "drop_cam_with_box": false,
+  "drop_cond_ratio": 0.25,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "global_pool_conditions": false,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "map_embedder_cls": "xscene.networks.map_embedder.BEVControlNetConditioningEmbedding",
+  "map_embedder_param": {
+    "block_out_channels": [
+      16,
+      32,
+      96,
+      256
+    ],
+    "conditioning_size": [
+      4,
+      200,
+      200
+    ],
+    "output_size": [
+      42,
+      75
+    ]
+  },
+  "map_size": [
+    4,
+    200,
+    200
+  ],
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "occrender_conditioning_channels": 20,
+  "occrender_embedding_out_channels": [
+    16,
+    32,
+    64,
+    96,
+    256
+  ],
+  "occrender_output_size": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": null,
+  "render_depth_size": [
+    1,
+    224,
+    400
+  ],
+  "render_img_size": [
+    20,
+    224,
+    400
+  ],
+  "resnet_time_scale_shift": "default",
+  "uncond_cam_in_dim": [
+    3,
+    7
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": true,
+  "use_uncond_map": null,
+  "with_layout_canvas": true,
+  "with_occ_render_img": false
+}

x-scene-video_336x600-E70/controlnet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c62064b481efdf232ec1546efc121720042b0877b07c305e1de4a817001b5c83
+size 1660164363

x-scene-video_336x600-E70/scene_embedder/scene_embedder_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3129e2b9e316fd8d65335a1484a435646808dd82eb6ba5abafbd9031d00da0f0
+size 13907167

x-scene-video_336x600-E70/unet/config.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "_class_name": "UNet2DConditionModelMultiviewT",
+  "_diffusers_version": "0.17.1",
+  "_name_or_path": "work_dirs/x-scene-video_224x400/x-scene-video_224x400",
+  "act_fn": "silu",
+  "addition_embed_type": null,
+  "addition_embed_type_num_heads": 64,
+  "attention_head_dim": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "attn1_q_trainable": true,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 1024,
+  "cross_attention_norm": null,
+  "crossview_attn_type": "temporal_t5_crossview",
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "img_size": [
+    224,
+    400
+  ],
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "neighboring_attn_type": "add",
+  "neighboring_view_pair": {
+    "0": [
+      5,
+      1
+    ],
+    "1": [
+      0,
+      2
+    ],
+    "2": [
+      1,
+      3
+    ],
+    "3": [
+      2,
+      4
+    ],
+    "4": [
+      3,
+      5
+    ],
+    "5": [
+      4,
+      0
+    ]
+  },
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": null,
+  "ref_length": 2,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 64,
+  "scene_channels": 320,
+  "spatial_trainable": true,
+  "temp_pos_emb": "learnable",
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "trainable_state": "only_new",
+  "transformer_type": "_ff_last",
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": true,
+  "video_length": 7,
+  "with_can_bus": true,
+  "with_motion": true,
+  "with_ref": true,
+  "zero_module_type": "zero_linear",
+  "zero_module_type2": "none"
+}

x-scene-video_336x600-E70/unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e758b95dea800f55f1208f882ce135ac0e16ebad978e8cc0815d42831cc7005
+size 2975154683