Spaces:

rubenaghayan
/

llm_memory_visualizer

Sleeping

File size: 2,932 Bytes

from state import Model

# https://huggingface.co/google/gemma-3-270m/blob/main/config.json
GEMMA3_270M = Model(
    vocab_size=262144,
    num_layers=18,
    hidden_dim=640,
    intermediate_size=2048,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)
GEMMA3_1B = Model(
    vocab_size=262144,
    num_layers=26,
    hidden_dim=1152,
    intermediate_size=6912,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)
GEMMA3_4B = Model(
    vocab_size=262144,
    num_layers=34,
    hidden_dim=2560,
    intermediate_size=10240,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)
GEMMA3_12B = Model(
    vocab_size=262144,
    num_layers=48,
    hidden_dim=3840,
    intermediate_size=15360,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)
GEMMA3_27B = Model(
    vocab_size=262144,
    num_layers=62,
    hidden_dim=5376,
    intermediate_size=21504,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)
# No maverick, don't support non-homogenous layers yet

# https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/config.json
LLAMA4_SCOUT = Model(
    vocab_size=202048,
    num_layers=48,
    hidden_dim=5120,
    intermediate_size=8192,
    weight_tied_embeddings=True,
    active_experts=2,
    total_experts=17,
    is_moe=True,
)

# https://huggingface.co/unsloth/Llama-3.2-1B-Instruct/blob/main/config.json
LLAMA3_1B = Model(
    vocab_size=128256,
    num_layers=16,
    hidden_dim=2048,
    intermediate_size=8192,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)

# https://huggingface.co/unsloth/Llama-3.2-3B-Instruct/blob/main/config.json
LLAMA3_3B = Model(
    vocab_size=128256,
    num_layers=28,
    hidden_dim=3072,
    intermediate_size=8192,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)

# https://huggingface.co/unsloth/llama-3-8b-Instruct/blob/main/config.json
LLAMA3_8B = Model(
    vocab_size=128256,
    num_layers=32,
    hidden_dim=4096,
    intermediate_size=14336,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)

# https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/config.json
LLAMA3_70B = Model(
    vocab_size=128256,
    num_layers=80,
    hidden_dim=8192,
    intermediate_size=28672,
    weight_tied_embeddings=True,
    active_experts=1,
    total_experts=1,
    is_moe=False,
)

DEFAULTS = {
    "Gemma3 270M": GEMMA3_270M,
    "Gemma3 1B": GEMMA3_1B,
    "Gemma3 4B": GEMMA3_4B,
    "Gemma3 12B": GEMMA3_12B,
    "Gemma3 27B": GEMMA3_27B,
    "Llama3 1B": LLAMA3_1B,
    "Llama3 3B": LLAMA3_3B,
    "Llama3 8B": LLAMA3_8B,
    "Llama3 70B": LLAMA3_70B,
    "Llama4 Scout": LLAMA4_SCOUT,
}