keras
/

smollm3_3b_en

Text Generation

Model card Files Files and versions

smollm3_3b_en / task.json

prasadsachin's picture

Upload folder using huggingface_hub

1936656 verified 3 months ago

history blame contribute delete

6.4 kB

	{
	"module": "keras_hub.src.models.smollm3.smollm3_causal_lm",
	"class_name": "SmolLM3CausalLM",
	"config": {
	"backbone": {
	"module": "keras_hub.src.models.smollm3.smollm3_backbone",
	"class_name": "SmolLM3Backbone",
	"config": {
	"name": "smol_lm3_backbone",
	"trainable": true,
	"dtype": {
	"module": "keras",
	"class_name": "DTypePolicy",
	"config": {
	"name": "float32"
	},
	"registered_name": null
	},
	"vocabulary_size": 128256,
	"hidden_dim": 2048,
	"intermediate_dim": 11008,
	"num_layers": 36,
	"num_attention_heads": 16,
	"num_key_value_heads": 4,
	"attention_bias": false,
	"attention_dropout": 0.0,
	"rope_layer_enabled_list": [
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false,
	true,
	true,
	true,
	false
	],
	"layer_types": [
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention",
	"full_attention"
	],
	"mlp_bias": false,
	"layer_norm_epsilon": 1e-06,
	"max_position_embeddings": 65536,
	"rope_theta": 5000000.0,
	"partial_rotary_factor": 1.0
	},
	"registered_name": "keras_hub>SmolLM3Backbone"
	},
	"preprocessor": {
	"module": "keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor",
	"class_name": "SmolLM3CausalLMPreprocessor",
	"config": {
	"name": "smol_lm3_causal_lm_preprocessor_1",
	"trainable": true,
	"dtype": {
	"module": "keras",
	"class_name": "DTypePolicy",
	"config": {
	"name": "float32"
	},
	"registered_name": null
	},
	"tokenizer": {
	"module": "keras_hub.src.models.smollm3.smollm3_tokenizer",
	"class_name": "SmolLM3Tokenizer",
	"config": {
	"name": "smol_lm3_tokenizer",
	"trainable": true,
	"dtype": {
	"module": "keras",
	"class_name": "DTypePolicy",
	"config": {
	"name": "int32"
	},
	"registered_name": null
	},
	"config_file": "tokenizer.json",
	"sequence_length": null,
	"add_prefix_space": false,
	"unsplittable_tokens": [
	"<think>",
	"<tool_call>",
	"<\|end_header_id\|>",
	"<\|eot_id\|>",
	"<\|im_end\|>",
	"<\|end_of_text\|>",
	"</think>",
	"<tool_response>",
	"<\|im_start\|>",
	"</tool_response>",
	"</tool_call>",
	"<\|start_header_id\|>",
	"<\|begin_of_text\|>",
	"<\|eom_id\|>",
	"<code>",
	"</code>",
	"<\|python_tag\|>",
	"<\|finetune_right_pad_id\|>"
	]
	},
	"registered_name": "keras_hub>SmolLM3Tokenizer"
	},
	"config_file": "preprocessor.json",
	"sequence_length": 1024,
	"add_start_token": true,
	"add_end_token": true
	},
	"registered_name": "keras_hub>SmolLM3CausalLMPreprocessor"
	},
	"name": "smol_lm3_causal_lm"
	},
	"registered_name": "keras_hub>SmolLM3CausalLM"
	}