--- license: apache-2.0 --- ```py from transformers import AutoConfig, AutoModel, logging from transformers import AutoModel, AutoTokenizer import torch from PIL import Image import os logging.set_verbosity_error() # silence HF info spam MODEL_ID = "openbmb/MiniCPM-o-2_6" device = "cpu" cfg = AutoConfig.from_pretrained(MODEL_ID, trust_remote_code=True) cfg.hidden_size = 24 * 6 #cfg.hidden_size = 128 cfg.num_heads = 1 cfg.num_hidden_layers = 28 cfg.intermediate_size = 16 cfg.num_attention_heads=24 cfg.vision_config.hidden_size = 8 cfg.vision_config.num_hidden_layers = 1 cfg.vision_config.num_attention_heads = 1 cfg.vision_config.intermediate_size = 8 #cfg.vision_config.image_size = 100 cfg.audio_config.encoder_layers = 1 cfg.audio_config.decoder_layers = 1 cfg.audio_config.decoder_ffn_dim = 1024 #cfg.audio_config.d_model = 32 #cfg.audio_config.encoder_ffn_dim = 1024 #cfg.audio_config.use_bfloat16=True cfg.tts_config.llm_dim = 16 cfg.tts_config.hidden_size = 12 cfg.tts_config.llm_dim = 4 # keep small (interface with LM) cfg.tts_config.hidden_size = 8 # shrink internal TTS width cfg.tts_config.intermediate_size = 4 # shrink FFN cfg.tts_config.num_layers = 1 # minimum, keeps a single block cfg.tts_config.num_heads = 1 # avoid multi-head blowup cfg.tts_config.num_hidden_layers = 1 cfg.tts_config.num_mel_bins = 10 cfg.tts_config.num_attention_heads = 1 cfg.tts_config.num_text_tokens = 20 cfg.tts_config.num_audio_tokens = 10 #cfg.tts_config.use_bfloat16=True model = AutoModel.from_config(cfg, trust_remote_code=True) # cast to bfloat16 model = model.to(dtype=torch.bfloat16, device=device) print("Built tiny MiniCPM-o model on", device) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) output_dir = "./tiny-random-minicpmo-new-version" os.makedirs(output_dir, exist_ok=True) model.save_pretrained(output_dir, safe_serialization=True) tokenizer.save_pretrained(output_dir) model.processor.save_pretrained(output_dir) ```