ramu0e commited on
Commit
ada2828
·
verified ·
1 Parent(s): d806a42

Upload folder using huggingface_hub

Browse files
lam/config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_depth": 5,
3
+ "action_dropout": 0.0,
4
+ "action_hidden_dim": 96,
5
+ "action_obs_dim": 0,
6
+ "action_prev_dim": 10,
7
+ "action_state_dim": 5,
8
+ "action_target_dim": 10,
9
+ "action_wide_dim": 512,
10
+ "architectures": [
11
+ "LAMModel"
12
+ ],
13
+ "decoder_attention_head_dim": 64,
14
+ "decoder_attn_implementation": "flash_attention_2",
15
+ "decoder_encoder_hidden_dim": 5,
16
+ "decoder_eps": 1e-06,
17
+ "decoder_ffn_dim": 768,
18
+ "decoder_freq_dim": 64,
19
+ "decoder_in_channels": 3,
20
+ "decoder_num_attention_heads": 3,
21
+ "decoder_num_layers": 12,
22
+ "decoder_out_channels": 3,
23
+ "decoder_patch_size": [
24
+ 4,
25
+ 4
26
+ ],
27
+ "decoder_pos_embed_seq_len": null,
28
+ "decoder_rope_max_seq_len": 1024,
29
+ "dtype": "bfloat16",
30
+ "encoder_height": 64,
31
+ "encoder_width": 64,
32
+ "fsq_levels": [
33
+ 7,
34
+ 5,
35
+ 5,
36
+ 5,
37
+ 5
38
+ ],
39
+ "initializer_range": 0.02,
40
+ "is_diffusion": true,
41
+ "latent_channels": 5,
42
+ "max_tokens": 128,
43
+ "min_tokens": 1,
44
+ "model_type": "lam",
45
+ "null_latent": 0,
46
+ "transformers_version": "4.57.1",
47
+ "use_tail_drop": true,
48
+ "videomae_config": {
49
+ "attn_drop_rate": 0.0,
50
+ "cos_attn": false,
51
+ "depth": 8,
52
+ "drop_path_rate": 0.0,
53
+ "drop_rate": 0.0,
54
+ "embed_dim": 192,
55
+ "img_size": [
56
+ 64,
57
+ 64
58
+ ],
59
+ "in_chans": 3,
60
+ "init_values": 0.0,
61
+ "layer_norm_eps": 1e-06,
62
+ "mlp_ratio": 4,
63
+ "norm_layer": "nn.LayerNorm",
64
+ "num_classes": 0,
65
+ "num_frames": 2,
66
+ "num_heads": 3,
67
+ "patch_size": 4,
68
+ "qk_scale": null,
69
+ "qkv_bias": true,
70
+ "tubelet_size": 2,
71
+ "use_learnable_pos_emb": false,
72
+ "use_mean_pooling": false,
73
+ "with_cp": false
74
+ },
75
+ "videomae_from_pretrained": null,
76
+ "vocab_size": 4375
77
+ }
lam/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6616086d63da5698d5e7835e8cb0b1adb71268943a1772f9a361d213dee6194
3
+ size 23682376
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LAMPolicyPipeline",
3
+ "_diffusers_version": "0.35.2",
4
+ "lam": [
5
+ "flexlam_mini.models.lam.modeling_lam",
6
+ "LAMModel"
7
+ ],
8
+ "policy": [
9
+ "flexlam_mini.models.policy.modeling_policy",
10
+ "PolicyQwen3ForConditionalGeneration"
11
+ ],
12
+ "policy_processor": [
13
+ "transformers",
14
+ "Qwen2VLImageProcessor"
15
+ ],
16
+ "processor": [
17
+ "flexlam_mini.models.lam.processing_lam",
18
+ "LAMProcessorFast"
19
+ ],
20
+ "scheduler": [
21
+ "diffusers",
22
+ "FlowMatchEulerDiscreteScheduler"
23
+ ]
24
+ }
policy/config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_latent_dim": null,
3
+ "action_seq_len": 128,
4
+ "action_start_token_id": 4378,
5
+ "action_vocab_size": 4375,
6
+ "architectures": [
7
+ "PolicyQwen3ForConditionalGeneration"
8
+ ],
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "dtype": "bfloat16",
12
+ "eos_token_id": 4379,
13
+ "frame_stride": 1,
14
+ "head_dim": 64,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 256,
17
+ "image_token_id": 4377,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 1024,
20
+ "layer_types": [
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention"
33
+ ],
34
+ "max_position_embeddings": 2048,
35
+ "max_window_layers": 28,
36
+ "model_type": "policy_qwen3",
37
+ "num_attention_heads": 4,
38
+ "num_frames": 2,
39
+ "num_hidden_layers": 12,
40
+ "num_key_value_heads": 4,
41
+ "pad_token_id": 0,
42
+ "policy_image_height": 64,
43
+ "policy_image_width": 64,
44
+ "predict_tokens": true,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": true,
50
+ "transformers_version": "4.57.1",
51
+ "use_cache": false,
52
+ "use_sliding_window": false,
53
+ "vision_end_token_id": 4376,
54
+ "vision_in_channels": 3,
55
+ "vision_merge_size": 1,
56
+ "vision_patch_size": 4,
57
+ "vision_start_token_id": 4375,
58
+ "vocab_size": 4380
59
+ }
policy/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 4379,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.57.1",
6
+ "use_cache": false
7
+ }
policy/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799caaaff322eb02ba852fae7cb29801f0066dc9ef7768b7b445e47b82fe9b21
3
+ size 27464120
policy_processor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 1,
19
+ "min_pixels": 3136,
20
+ "patch_size": 4,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 1003520,
25
+ "shortest_edge": 3136
26
+ },
27
+ "temporal_patch_size": 1
28
+ }
processor/processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_height": 64,
3
+ "encoder_width": 64,
4
+ "height": 64,
5
+ "processor_class": "LAMProcessorFast",
6
+ "width": 64
7
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.35.2",
4
+ "base_image_seq_len": 256,
5
+ "base_shift": 0.5,
6
+ "invert_sigmas": false,
7
+ "max_image_seq_len": 4096,
8
+ "max_shift": 1.15,
9
+ "num_train_timesteps": 1000,
10
+ "shift": 1.0,
11
+ "shift_terminal": null,
12
+ "stochastic_sampling": false,
13
+ "time_shift_type": "exponential",
14
+ "use_beta_sigmas": false,
15
+ "use_dynamic_shifting": false,
16
+ "use_exponential_sigmas": false,
17
+ "use_karras_sigmas": false
18
+ }