If you would like to modify a base model to add our custom reasoning tokens,
here's how to do it.

Firstly, please install the `add-tokens` extra via
`pip install ether0[add-tokens]` for the `transformers` package.

Then, configure the following inputs.

In [None]:
# Model name/revisions for Hugging Face Hub
input_model_name = "mistralai/Mistral-Small-24B-Instruct-2501"
input_model_revision: str | None = None
output_model_name = "FILL ME IN"
output_model_revision: str | None = None
output_model_is_private = True
tokenizer_only = False  # Set True to only update the tokenizer
push_to_hf = False  # Set True to push to Hugging Face Hub

# Chat template file that uses the new tokens
chat_template_path = "updated_mistral_chat_template.jinja"

In [None]:
from pathlib import Path

from transformers import AutoModelForCausalLM, AutoTokenizer

from ether0.model_prompts import ANSWER_END, ANSWER_START, THINK_END, THINK_START

REASONING_TOKENS_TO_ADD = [
    THINK_START,
    THINK_END,
    ANSWER_START,
    ANSWER_END,
]

tokenizer = AutoTokenizer.from_pretrained(
    input_model_name, revision=input_model_revision
)
# NOTE: reasoning tokens are normal (not special) tokens so they aren't
# removed when passing skip_special_tokens=True to a tokenizer
tokenizer.add_tokens(REASONING_TOKENS_TO_ADD)
tokenizer.chat_template = Path(chat_template_path).read_text(encoding="utf-8")
if push_to_hf:
    tokenizer.push_to_hub(
        output_model_name,
        revision=output_model_revision,
        private=output_model_is_private,
    )

if not tokenizer_only:
    model = AutoModelForCausalLM.from_pretrained(
        input_model_name, revision=input_model_revision
    )
    # SEE: https://www.thonking.ai/p/what-shapes-do-matrix-multiplications
    model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
    if push_to_hf:
        model.push_to_hub(
            output_model_name,
            revision=output_model_revision,
            private=output_model_is_private,
        )

Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
