codsworth-3.8m / codsworth /huggingface_wrapper.py

Initial upload of Codsworth model

b84d85a verified about 1 month ago

4.76 kB

	"""
	Codsworth HuggingFace Model Wrapper
	Allows loading Codsworth using HuggingFace's AutoModel
	"""

	import json
	from typing import Optional, Tuple

	import torch
	import torch.nn as nn
	from transformers import PreTrainedModel, PretrainedConfig
	from transformers.modeling_outputs import CausalLMOutputWithPast

	from .config import CodsworthConfig


	class CodsworthConfig(PretrainedConfig):
	"""HuggingFace compatible config for Codsworth"""

	model_type = "codsworth"

	def __init__(
	self,
	vocab_size: int = 5004,
	hidden_size: int = 256,
	num_hidden_layers: int = 2,
	num_attention_heads: int = 4,
	head_dim: int = 64,
	intermediate_size: int = 512,
	max_position_embeddings: int = 128,
	rope_theta: float = 10000.0,
	use_rope: bool = True,
	hidden_dropout: float = 0.1,
	attention_dropout: float = 0.0,
	pad_token_id: int = 0,
	bos_token_id: int = 1,
	eos_token_id: int = 2,
	torch_dtype: str = "float32",
	**kwargs,
	):
	self.vocab_size = vocab_size
	self.hidden_size = hidden_size
	self.num_hidden_layers = num_hidden_layers
	self.num_attention_heads = num_attention_heads
	self.head_dim = head_dim
	self.intermediate_size = intermediate_size
	self.max_position_embeddings = max_position_embeddings
	self.rope_theta = rope_theta
	self.use_rope = use_rope
	self.hidden_dropout = hidden_dropout
	self.attention_dropout = attention_dropout

	super().__init__(
	pad_token_id=pad_token_id,
	bos_token_id=bos_token_id,
	eos_token_id=eos_token_id,
	**kwargs,
	)


	class CodsworthPreTrainedModel(PreTrainedModel):
	"""Base class for Codsworth"""

	config_class = CodsworthConfig
	base_model_prefix = "codsworth"

	def _init_weights(self, module):
	if isinstance(module, nn.Linear):
	module.weight.data.normal_(mean=0.0, std=0.02)
	if module.bias is not None:
	module.bias.data.zero_()
	elif isinstance(module, nn.Embedding):
	module.weight.data.normal_(mean=0.0, std=0.02)


	class CodsworthModel(CodsworthPreTrainedModel):
	"""Codsworth model for HuggingFace"""

	def __init__(self, config: CodsworthConfig):
	super().__init__(config)

	self.config = config

	# Create internal config for the original model
	internal_config = CodsworthConfig(
	vocab_size=config.vocab_size,
	context_length=config.max_position_embeddings,
	embedding_dim=config.hidden_size,
	num_layers=config.num_hidden_layers,
	num_heads=config.num_attention_heads,
	head_dim=config.head_dim,
	ffn_hidden_dim=config.intermediate_size,
	use_rope=config.use_rope,
	rope_theta=config.rope_theta,
	dropout=config.hidden_dropout,
	attention_dropout=config.attention_dropout,
	use_flash_attention=False,
	use_gradient_checkpointing=False,
	)

	# Import the actual model
	from codsworth.model import CodsworthTransformer
	self.transformer = CodsworthTransformer(internal_config)

	# Tie weights
	self.lm_head = self.transformer.lm_head

	self.post_init()

	def forward(
	self,
	input_ids: torch.LongTensor = None,
	attention_mask: Optional[torch.Tensor] = None,
	position_ids: Optional[torch.LongTensor] = None,
	labels: Optional[torch.LongTensor] = None,
	) -> CausalLMOutputWithPast:

	outputs = self.transformer(
	input_ids=input_ids,
	attention_mask=attention_mask,
	position_ids=position_ids,
	)

	logits = outputs["logits"]
	loss = outputs.get("loss")

	return CausalLMOutputWithPast(
	loss=loss,
	logits=logits,
	past_key_values=None,
	hidden_states=outputs.get("hidden_states"),
	attentions=None,
	)

	def generate(self, inputs, **kwargs):
	"""Wrapper for generation"""
	return self.transformer.generate(inputs, **kwargs)


	# Auto mapping
	AutoConfig = None
	AutoModel = None


	def from_pretrained(model_path: str, **kwargs):
	"""Load Codsworth from pretrained"""
	global AutoConfig, AutoModel

	if AutoConfig is None:
	from transformers import AutoConfig as _AutoConfig, AutoModel as _AutoModel
	AutoConfig = _AutoConfig
	AutoModel = _AutoModel

	return AutoModel.from_pretrained(model_path, **kwargs)