zharer commited on
Commit
965aaa5
·
verified ·
1 Parent(s): 81a93df

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +10 -26
  2. generation_config.json +6 -3
  3. model_index.json +15 -27
  4. processor_config.json +0 -2
config.json CHANGED
@@ -5,7 +5,7 @@
5
  "params": {
6
  "depth": 2,
7
  "input_dim": 1024,
8
- "n_embed": 2048,
9
  "projector_type": "mlp_gelu"
10
  }
11
  },
@@ -15,7 +15,7 @@
15
  "params": {
16
  "depth": 2,
17
  "input_dim": 8,
18
- "n_embed": 2048,
19
  "projector_type": "mlp_gelu"
20
  }
21
  },
@@ -23,9 +23,9 @@
23
  "cls": "vision_head",
24
  "model_type": "gen_head",
25
  "params": {
26
- "image_token_embed": 2048,
27
  "image_token_size": 16384,
28
- "n_embed": 2048
29
  }
30
  },
31
  "gen_vision_config": {
@@ -43,27 +43,9 @@
43
  "torch_dtype": "bfloat16",
44
  "vocab_size": 102400
45
  },
46
- "model_type": "multi_modality",
47
  "torch_dtype": "float16",
48
- "transformers_version": "4.36.0",
49
- "transformers.js_config": {
50
- "kv_cache_dtype": {
51
- "fp16": "float16",
52
- "q4f16": "float16"
53
- },
54
- "dtype": {
55
- "prepare_inputs_embeds": "fp32",
56
- "language_model": "q4",
57
- "lm_head": "fp32",
58
- "gen_head": "fp32",
59
- "gen_img_embeds": "fp32",
60
- "image_decode": "fp32"
61
- },
62
- "use_external_data_format": {
63
- "language_model.onnx": true,
64
- "language_model_fp16.onnx": true
65
- }
66
- },
67
  "vision_config": {
68
  "cls": "CLIPVisionTower",
69
  "model_type": "vision",
@@ -75,8 +57,10 @@
75
  }
76
  },
77
  "architectures": [
78
- "MultiModalityCausalLM"
79
  ],
80
  "use_cache": true,
81
- "_name_or_path": "janus-pro-7b-webgpu-working"
 
 
82
  }
 
5
  "params": {
6
  "depth": 2,
7
  "input_dim": 1024,
8
+ "n_embed": 4096,
9
  "projector_type": "mlp_gelu"
10
  }
11
  },
 
15
  "params": {
16
  "depth": 2,
17
  "input_dim": 8,
18
+ "n_embed": 4096,
19
  "projector_type": "mlp_gelu"
20
  }
21
  },
 
23
  "cls": "vision_head",
24
  "model_type": "gen_head",
25
  "params": {
26
+ "image_token_embed": 4096,
27
  "image_token_size": 16384,
28
+ "n_embed": 4096
29
  }
30
  },
31
  "gen_vision_config": {
 
43
  "torch_dtype": "bfloat16",
44
  "vocab_size": 102400
45
  },
46
+ "model_type": "janus",
47
  "torch_dtype": "float16",
48
+ "transformers_version": "4.33.1",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "vision_config": {
50
  "cls": "CLIPVisionTower",
51
  "model_type": "vision",
 
57
  }
58
  },
59
  "architectures": [
60
+ "JanusForConditionalGeneration"
61
  ],
62
  "use_cache": true,
63
+ "webgpu_compatible": true,
64
+ "quantization": "q4f16",
65
+ "pipeline_tag": "text-to-image"
66
  }
generation_config.json CHANGED
@@ -1,9 +1,12 @@
1
  {
2
  "bos_token_id": 100000,
3
  "eos_token_id": 100001,
 
 
 
4
  "do_sample": true,
5
- "num_image_tokens": 576,
6
- "pad_token_id": 100015,
7
  "temperature": 0.7,
8
- "top_p": 0.95
 
 
9
  }
 
1
  {
2
  "bos_token_id": 100000,
3
  "eos_token_id": 100001,
4
+ "pad_token_id": 151643,
5
+ "max_length": 2048,
6
+ "max_new_tokens": 1024,
7
  "do_sample": true,
 
 
8
  "temperature": 0.7,
9
+ "top_p": 0.9,
10
+ "use_cache": true,
11
+ "num_image_tokens": 576
12
  }
model_index.json CHANGED
@@ -1,32 +1,20 @@
1
  {
2
- "model_type": "multi_modality",
3
- "pipeline_tag": "image-to-text",
4
- "framework": "transformers.js",
5
  "format": "onnx",
 
6
  "device": "webgpu",
7
- "total_size_mb": 4743.1,
8
- "onnx_files": [
9
- {
10
- "name": "decoder.onnx",
11
- "size_mb": 320.1
12
- },
13
- {
14
- "name": "embed_tokens.onnx",
15
- "size_mb": 64.0
16
- },
17
- {
18
- "name": "encoder.onnx",
19
- "size_mb": 1159.0
20
- },
21
- {
22
- "name": "lm_head.onnx",
23
- "size_mb": 1600.0
24
- },
25
- {
26
- "name": "prepare_inputs_embeds.onnx",
27
- "size_mb": 1600.0
28
- }
29
  ],
30
- "based_on": "onnx-community/Janus-1.3B-ONNX (working structure)",
31
- "scaled_to": "Janus-Pro-7B"
 
 
32
  }
 
1
  {
2
+ "model_name": "Janus-Pro-7B",
3
+ "model_type": "multimodal",
4
+ "architecture": "janus",
5
  "format": "onnx",
6
+ "quantization": "q4f16",
7
  "device": "webgpu",
8
+ "total_size_mb": 4935.2,
9
+ "components_count": 6,
10
+ "status": "complete",
11
+ "capabilities": [
12
+ "text-to-image",
13
+ "image-to-text",
14
+ "multimodal-chat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ],
16
+ "transformers_js_compatible": true,
17
+ "webgpu_optimized": true,
18
+ "export_date": "2025-09-27",
19
+ "notes": "All 6 components successfully exported for WebGPU deployment"
20
  }
processor_config.json CHANGED
@@ -2,8 +2,6 @@
2
  "add_special_token": false,
3
  "ignore_id": -100,
4
  "image_tag": "<image_placeholder>",
5
- "image_start_tag": "<begin_of_image>",
6
- "image_end_tag": "<end_of_image>",
7
  "mask_prompt": true,
8
  "num_image_tokens": 576,
9
  "processor_class": "VLChatProcessor",
 
2
  "add_special_token": false,
3
  "ignore_id": -100,
4
  "image_tag": "<image_placeholder>",
 
 
5
  "mask_prompt": true,
6
  "num_image_tokens": 576,
7
  "processor_class": "VLChatProcessor",