rkazants commited on
Commit
4389c60
·
verified ·
1 Parent(s): d2ca102

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -26
README.md CHANGED
@@ -7,7 +7,6 @@ license: apache-2.0
7
  from transformers import AutoConfig, AutoModel, logging
8
  from transformers import AutoModel, AutoTokenizer
9
  import torch
10
- import torch.nn as nn
11
  from PIL import Image
12
  import os
13
 
@@ -18,44 +17,53 @@ MODEL_ID = "openbmb/MiniCPM-o-2_6"
18
  device = "cpu"
19
  cfg = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True)
20
 
21
- cfg.hidden_size = 28 * 6
22
- cfg.num_heads = 2
23
- cfg.num_hidden_layers = 2
24
- cfg.intermediate_size = 32
 
 
25
 
26
- cfg.vision_config.hidden_size = 32
27
  cfg.vision_config.num_hidden_layers = 1
28
- cfg.vision_config.num_attention_heads = 2
29
- cfg.vision_config.intermediate_size = 32
30
- cfg.vision_config.image_size = 224
31
 
32
- cfg.audio_config.encoder_layers = 2
 
 
 
 
 
33
 
34
- cfg.tts_config.llm_dim = 32
35
- cfg.tts_config.hidden_size = 24
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  model = AutoModel.from_config(cfg, trust_remote_code=True)
38
 
 
 
 
39
  print("Built tiny MiniCPM-o model on", device)
40
- print("Config summary:", {k: getattr(cfg, k) for k in ["hidden_size", "num_hidden_layers", "num_attention_heads", "vocab_size"] if hasattr(cfg, k)})
41
 
42
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
43
 
44
- image = Image.open('./image.jpg').convert('RGB')
45
- question = 'What is in the image?'
46
- msgs = [{'role': 'user', 'content': [image, question]}]
47
-
48
  output_dir = "./tiny-random-minicpmo-new-version"
49
  os.makedirs(output_dir, exist_ok=True)
50
- model.save_pretrained(output_dir)
51
  tokenizer.save_pretrained(output_dir)
52
  model.processor.save_pretrained(output_dir)
53
-
54
- print("Inference starts here")
55
- res = model.chat(
56
- image=None,
57
- msgs=msgs,
58
- tokenizer=tokenizer
59
- )
60
- print(res)
61
  ```
 
7
  from transformers import AutoConfig, AutoModel, logging
8
  from transformers import AutoModel, AutoTokenizer
9
  import torch
 
10
  from PIL import Image
11
  import os
12
 
 
17
  device = "cpu"
18
  cfg = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True)
19
 
20
+ cfg.hidden_size = 24 * 6
21
+ #cfg.hidden_size = 128
22
+ cfg.num_heads = 1
23
+ cfg.num_hidden_layers = 28
24
+ cfg.intermediate_size = 16
25
+ cfg.num_attention_heads=24
26
 
27
+ cfg.vision_config.hidden_size = 8
28
  cfg.vision_config.num_hidden_layers = 1
29
+ cfg.vision_config.num_attention_heads = 1
30
+ cfg.vision_config.intermediate_size = 8
31
+ #cfg.vision_config.image_size = 100
32
 
33
+ cfg.audio_config.encoder_layers = 1
34
+ cfg.audio_config.decoder_layers = 1
35
+ cfg.audio_config.decoder_ffn_dim = 1024
36
+ #cfg.audio_config.d_model = 32
37
+ #cfg.audio_config.encoder_ffn_dim = 1024
38
+ #cfg.audio_config.use_bfloat16=True
39
 
40
+ cfg.tts_config.llm_dim = 16
41
+ cfg.tts_config.hidden_size = 12
42
+
43
+ cfg.tts_config.llm_dim = 4 # keep small (interface with LM)
44
+ cfg.tts_config.hidden_size = 8 # shrink internal TTS width
45
+ cfg.tts_config.intermediate_size = 4 # shrink FFN
46
+ cfg.tts_config.num_layers = 1 # minimum, keeps a single block
47
+ cfg.tts_config.num_heads = 1 # avoid multi-head blowup
48
+ cfg.tts_config.num_hidden_layers = 1
49
+ cfg.tts_config.num_mel_bins = 10
50
+ cfg.tts_config.num_attention_heads = 1
51
+ cfg.tts_config.num_text_tokens = 20
52
+ cfg.tts_config.num_audio_tokens = 10
53
+ #cfg.tts_config.use_bfloat16=True
54
 
55
  model = AutoModel.from_config(cfg, trust_remote_code=True)
56
 
57
+ # cast to bfloat16
58
+ model = model.to(dtype=torch.bfloat16, device=device)
59
+
60
  print("Built tiny MiniCPM-o model on", device)
 
61
 
62
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
63
 
 
 
 
 
64
  output_dir = "./tiny-random-minicpmo-new-version"
65
  os.makedirs(output_dir, exist_ok=True)
66
+ model.save_pretrained(output_dir, safe_serialization=True)
67
  tokenizer.save_pretrained(output_dir)
68
  model.processor.save_pretrained(output_dir)
 
 
 
 
 
 
 
 
69
  ```