import torch import torch.nn as nn import yaml from transformers import GPT2Config, PreTrainedTokenizerFast from huggingface_hub import HfApi, upload_file import os import json import sentencepiece as spm from tokenizers import SentencePieceBPETokenizer # Define the TinyLlama Model class TinyLlama(nn.Module): def __init__(self, config): super(TinyLlama, self).__init__() self.embedding = nn.Embedding(config.vocab_size, config.n_embd) self.transformer_blocks = nn.ModuleList([ nn.TransformerEncoderLayer( d_model=config.n_embd, nhead=config.n_head, dim_feedforward=config.hidden_dim ) for _ in range(config.n_layer) ]) self.output_layer = nn.Linear(config.n_embd, config.vocab_size) def forward(self, x): x = self.embedding(x) for block in self.transformer_blocks: x = block(x) x = self.output_layer(x) return x # Load Model Configuration from YAML file model_config_path = "/home/jax/out/custom-model/final/model_config.yaml" with open(model_config_path, 'r') as file: config_data = yaml.safe_load(file) # Create Model Configuration config = GPT2Config( vocab_size=config_data.get("vocab_size", 32000), # Adjust to match TinyLlama's vocab size n_embd=config_data.get("n_embd", 2048), # Embedding size for TinyLlama n_layer=config_data.get("n_layer", 24), # Number of transformer layers n_head=config_data.get("n_head", 16), # Number of attention heads hidden_dim=config_data.get("hidden_dim", 8192) # Feedforward layer dimension ) # Instantiate the TinyLlama Model model = TinyLlama(config) # Load Weights from the .pth file model_weights_path = "/home/jax/out/custom-model/final/lit_model.pth" model_weights = torch.load(model_weights_path, map_location=torch.device('cpu'), weights_only=True) model.load_state_dict(model_weights, strict=False) # strict=False to allow for minor mismatches # Directory to Save the Model and Tokenizer for Hugging Face model_dir = "./huggingface_tinyllama" os.makedirs(model_dir, exist_ok=True) # Save the Model Weights model_weights_save_path = os.path.join(model_dir, "pytorch_model.bin") torch.save(model.state_dict(), model_weights_save_path) # Save the Configuration in JSON format config_save_path = os.path.join(model_dir, "config.json") with open(config_save_path, 'w') as f: json.dump(config.to_dict(), f) # Load and Convert the SentencePiece model to Hugging Face-compatible format # Load the SentencePiece model tokenizer_path = "/home/jax/out/custom-model/final/tokenizer.model" sp_tokenizer = spm.SentencePieceProcessor() sp_tokenizer.load(tokenizer_path) # Create a new tokenizer using the tokenizers library hf_tokenizer = SentencePieceBPETokenizer( vocab=tokenizer_path, # Load SentencePiece model add_prefix_space=True ) # Train the tokenizer if needed (optional step, depending on your model and usage) # hf_tokenizer.train([tokenizer_path]) # Set special tokens hf_tokenizer.add_special_tokens(["", "", "", ""]) # Wrap the tokenizer with PreTrainedTokenizerFast for Hugging Face compatibility hf_tokenizer = PreTrainedTokenizerFast.from_pretrained(tokenizer_path) # Save the Hugging Face tokenizer hf_tokenizer.save_pretrained(model_dir) # Push to Hugging Face Hub api.create_repo(repo_id=repo_id, token=os.getenv('HUGGINGFACE_API_TOKEN'), exist_ok=True) # Create a Model Card for the Model model_card = """ --- language: en tags: - tinyllama - language-model - chat license: apache-2.0 --- # TinyLlama 1.1B Chat Model ## Model Description TinyLlama is a lightweight LLaMA-based model with 1.1 billion parameters, designed to perform well on conversational and text generation tasks. It has been fine-tuned specifically for chat applications, providing coherent and context-aware responses. ## Training Data The model was trained on a diverse dataset, including web text, books, and conversational data, to make it capable of handling a wide range of language styles. ## Usage You can use this model for conversational AI, text completion, or other natural language generation tasks. Here’s a quick example: ```python from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("jacksonstrut/tinyllama-1.1B-chat") model = AutoModelForCausalLM.from_pretrained("jacksonstrut/tinyllama-1.1B-chat") input_ids = tokenizer("Hello, how are you?", return_tensors="pt").input_ids output = model.generate(input_ids) print(tokenizer.decode(output[0])) ``` ## Limitations - The model may produce biased or inappropriate outputs as it is trained on general datasets from the internet. - It may not be suitable for all applications, especially those requiring factual accuracy. ## License This model is licensed under the Apache 2.0 License. """ # Save the Model Card to README.md readme_path = os.path.join(model_dir, "README.md") with open(readme_path, 'w') as f: f.write(model_card) # Upload the Model Card to Hugging Face Hub # Upload files to Hugging Face Hub upload_file( path_or_fileobj=model_weights_save_path, path_in_repo="pytorch_model.bin", repo_id=repo_id, token=os.getenv('HUGGINGFACE_API_TOKEN') ) upload_file( path_or_fileobj=config_save_path, path_in_repo="config.json", repo_id=repo_id, token=os.getenv('HUGGINGFACE_API_TOKEN') ) # Upload the tokenizer files upload_file( path_or_fileobj=os.path.join(model_dir, "tokenizer.model"), path_in_repo="tokenizer.model", repo_id=repo_id, token=os.getenv('HUGGINGFACE_API_TOKEN') ) upload_file( path_or_fileobj=os.path.join(model_dir, "tokenizer_config.json"), path_in_repo="tokenizer_config.json", repo_id=repo_id, token=os.getenv('HUGGINGFACE_API_TOKEN') )