| from __future__ import annotations |
|
|
| from tokenizers import Tokenizer, models, pre_tokenizers |
| from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedTokenizerFast |
|
|
|
|
| def make_tiny_qwen2_model_and_tokenizer( |
| *, |
| n_layers: int = 3, |
| d_model: int = 48, |
| n_heads: int = 4, |
| n_kv_heads: int = 2, |
| max_pos: int = 128, |
| ): |
| config = AutoConfig.for_model( |
| "qwen2", |
| vocab_size=500, |
| hidden_size=d_model, |
| intermediate_size=d_model * 2, |
| num_hidden_layers=n_layers, |
| num_attention_heads=n_heads, |
| num_key_value_heads=n_kv_heads, |
| max_position_embeddings=max_pos, |
| use_sliding_window=False, |
| attn_implementation="eager", |
| ) |
| model = AutoModelForCausalLM.from_config(config, attn_implementation="eager") |
| model.eval() |
|
|
| backend = Tokenizer(models.WordLevel(vocab={f"t{i}": i for i in range(500)}, unk_token="t0")) |
| backend.pre_tokenizer = pre_tokenizers.Whitespace() |
| tokenizer = PreTrainedTokenizerFast(tokenizer_object=backend, eos_token="t1", pad_token="t2") |
| tokenizer.chat_template = "{% for m in messages %}{{ m['content'] }}{% endfor %}" |
| return model, tokenizer |
|
|