import os import sys import torch from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer from huggingface_hub import create_repo, upload_folder # Add project directories to path to ensure proper imports sys.path.append(os.path.dirname(os.path.abspath(__file__))) from sys import path path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "3d_parallel", "step1_modelling")) from model import Llama class HFWeightConverter: @staticmethod def get_key_mapping(num_layers): """ Creates a mapping of parameter names from Hugging Face LlamaForCausalLM/SmolLMForCausalLM to our custom Llama implementation. """ mapping = { "model.embed_tokens.weight": "embedding.weight", "model.norm.weight": "final_norm.weight", "lm_head.weight": "final_proj.weight" } for i in range(num_layers): prefix_hf = f"model.layers.{i}" prefix_custom = f"decoder_layers.{i}" mapping[f"{prefix_hf}.input_layernorm.weight"] = f"{prefix_custom}.input_layernorm.weight" mapping[f"{prefix_hf}.post_attention_layernorm.weight"] = f"{prefix_custom}.post_attention_layernorm.weight" mapping[f"{prefix_hf}.self_attn.q_proj.weight"] = f"{prefix_custom}.attention.q_proj.weight" mapping[f"{prefix_hf}.self_attn.k_proj.weight"] = f"{prefix_custom}.attention.k_proj.weight" mapping[f"{prefix_hf}.self_attn.v_proj.weight"] = f"{prefix_custom}.attention.v_proj.weight" mapping[f"{prefix_hf}.self_attn.o_proj.weight"] = f"{prefix_custom}.attention.out_proj.weight" mapping[f"{prefix_hf}.mlp.gate_proj.weight"] = f"{prefix_custom}.mlp.gate_proj.weight" mapping[f"{prefix_hf}.mlp.up_proj.weight"] = f"{prefix_custom}.mlp.up_proj.weight" mapping[f"{prefix_hf}.mlp.down_proj.weight"] = f"{prefix_custom}.mlp.down_proj.weight" return mapping @classmethod def load_and_convert_hf(cls, model_name: str, device="cpu") -> tuple[Llama, AutoConfig, AutoTokenizer]: """ Downloads a Llama/SmolLM model from Hugging Face, maps its weights to our custom Llama model. """ print(f"Fetching config and tokenizer for {model_name}...") config = AutoConfig.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) # Make sure mandatory properties are in config if not hasattr(config, "rope_theta"): config.rope_theta = 10000.0 print(f"Downloading pre-trained weights from Hugging Face...") hf_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32) hf_state_dict = hf_model.state_dict() print(f"Initializing custom Llama framework model...") custom_model = Llama(config) custom_state_dict = custom_model.state_dict() print("Mapping state dict weights...") key_map = cls.get_key_mapping(config.num_hidden_layers) mapped_state_dict = {} for hf_key, custom_key in key_map.items(): if hf_key in hf_state_dict: mapped_state_dict[custom_key] = hf_state_dict[hf_key] else: print(f"Warning: Expected key {hf_key} not found in HF state dict!") # Fill any remaining keys from custom model defaults for k in custom_state_dict.keys(): if k not in mapped_state_dict: mapped_state_dict[k] = custom_state_dict[k] custom_model.load_state_dict(mapped_state_dict) custom_model.to(device) print("Model weight mapping completed successfully!") # Clean up memory del hf_model del hf_state_dict if torch.cuda.is_available(): torch.cuda.empty_cache() return custom_model, config, tokenizer @classmethod def save_custom_to_hf_format(cls, custom_model: Llama, hf_config: AutoConfig, tokenizer: AutoTokenizer, save_directory: str): """ Converts custom Llama model state_dict back to Hugging Face format and saves it. """ os.makedirs(save_directory, exist_ok=True) print(f"Initializing a native Hugging Face model template...") # Suppress weight initialization warning since we will overwrite all of them hf_model = AutoModelForCausalLM.from_config(hf_config) custom_state_dict = custom_model.state_dict() key_map = cls.get_key_mapping(hf_config.num_hidden_layers) reverse_map = {v: k for k, v in key_map.items()} hf_state_dict = {} for custom_key, val in custom_state_dict.items(): if custom_key in reverse_map: hf_key = reverse_map[custom_key] hf_state_dict[hf_key] = val else: # E.g., rotary embeddings, which are not saved in HF state dict pass hf_model.load_state_dict(hf_state_dict, strict=False) print(f"Saving standard Hugging Face model and tokenizer to {save_directory}...") hf_model.save_pretrained(save_directory) tokenizer.save_pretrained(save_directory) print("Save completed successfully!") @staticmethod def push_to_huggingface(local_dir: str, repo_id: str, token: str) -> str: """ Pushes a saved Hugging Face model directory directly to Hugging Face Hub. """ print(f"Connecting to Hugging Face to push model to {repo_id}...") # Create repo if not exist repo_url = create_repo( repo_id=repo_id, token=token, private=False, exist_ok=True ) # Generate custom Model Card README.md readme_path = os.path.join(local_dir, "README.md") if not os.path.exists(readme_path): with open(readme_path, "w", encoding="utf-8") as f: f.write(f"""--- language: en license: mit tags: - llama - text-generation - custom-framework --- # {repo_id.split('/')[-1]} This model was trained or fine-tuned using the **Distributed-Transformer-Training-Framework**, an advanced and intelligent distributed training system. ## Model Description - **Architecture**: LLaMA-style custom transformer - **Base Model**: Loaded via pre-trained open-source parameters and mapped directly to custom modeling structures. - **Framework capabilities**: Hand-rolled rotary positional embeddings, grouped query attention, data/tensor/pipeline parallelism. ## How to use You can load this model directly using Hugging Face `transformers`: ```python from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("{repo_id}") tokenizer = AutoTokenizer.from_pretrained("{repo_id}") inputs = tokenizer("Hello, I am a custom LLaMA model", return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=50) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) ``` Developed and uploaded using the **Intelligent Framework Command Center** of the Distributed-Transformer-Training-Framework. """) print("Uploading directory contents to Hugging Face Hub...") upload_folder( folder_path=local_dir, repo_id=repo_id, token=token, commit_message="Upload fine-tuned model from Distributed-Transformer-Training-Framework" ) print(f"Model pushed successfully! Available at: {repo_url}") return repo_url