Spaces:

Aravindhan11
/

Distributed-Transformer-Framework

Running

App Files Files Community

Distributed-Transformer-Framework / hf_converter.py

Aravindhan11

Deploy Intelligent Distributed LLaMA Framework

52510e8 verified 3 days ago

raw

history blame contribute delete

7.72 kB

	import os
	import sys
	import torch
	from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
	from huggingface_hub import create_repo, upload_folder

	# Add project directories to path to ensure proper imports
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))
	from sys import path
	path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "3d_parallel", "step1_modelling"))

	from model import Llama

	class HFWeightConverter:
	@staticmethod
	def get_key_mapping(num_layers):
	"""
	Creates a mapping of parameter names from Hugging Face LlamaForCausalLM/SmolLMForCausalLM
	to our custom Llama implementation.
	"""
	mapping = {
	"model.embed_tokens.weight": "embedding.weight",
	"model.norm.weight": "final_norm.weight",
	"lm_head.weight": "final_proj.weight"
	}
	for i in range(num_layers):
	prefix_hf = f"model.layers.{i}"
	prefix_custom = f"decoder_layers.{i}"

	mapping[f"{prefix_hf}.input_layernorm.weight"] = f"{prefix_custom}.input_layernorm.weight"
	mapping[f"{prefix_hf}.post_attention_layernorm.weight"] = f"{prefix_custom}.post_attention_layernorm.weight"

	mapping[f"{prefix_hf}.self_attn.q_proj.weight"] = f"{prefix_custom}.attention.q_proj.weight"
	mapping[f"{prefix_hf}.self_attn.k_proj.weight"] = f"{prefix_custom}.attention.k_proj.weight"
	mapping[f"{prefix_hf}.self_attn.v_proj.weight"] = f"{prefix_custom}.attention.v_proj.weight"
	mapping[f"{prefix_hf}.self_attn.o_proj.weight"] = f"{prefix_custom}.attention.out_proj.weight"

	mapping[f"{prefix_hf}.mlp.gate_proj.weight"] = f"{prefix_custom}.mlp.gate_proj.weight"
	mapping[f"{prefix_hf}.mlp.up_proj.weight"] = f"{prefix_custom}.mlp.up_proj.weight"
	mapping[f"{prefix_hf}.mlp.down_proj.weight"] = f"{prefix_custom}.mlp.down_proj.weight"

	return mapping

	@classmethod
	def load_and_convert_hf(cls, model_name: str, device="cpu") -> tuple[Llama, AutoConfig, AutoTokenizer]:
	"""
	Downloads a Llama/SmolLM model from Hugging Face, maps its weights to our custom Llama model.
	"""
	print(f"Fetching config and tokenizer for {model_name}...")
	config = AutoConfig.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Make sure mandatory properties are in config
	if not hasattr(config, "rope_theta"):
	config.rope_theta = 10000.0

	print(f"Downloading pre-trained weights from Hugging Face...")
	hf_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
	hf_state_dict = hf_model.state_dict()

	print(f"Initializing custom Llama framework model...")
	custom_model = Llama(config)
	custom_state_dict = custom_model.state_dict()

	print("Mapping state dict weights...")
	key_map = cls.get_key_mapping(config.num_hidden_layers)

	mapped_state_dict = {}
	for hf_key, custom_key in key_map.items():
	if hf_key in hf_state_dict:
	mapped_state_dict[custom_key] = hf_state_dict[hf_key]
	else:
	print(f"Warning: Expected key {hf_key} not found in HF state dict!")

	# Fill any remaining keys from custom model defaults
	for k in custom_state_dict.keys():
	if k not in mapped_state_dict:
	mapped_state_dict[k] = custom_state_dict[k]

	custom_model.load_state_dict(mapped_state_dict)
	custom_model.to(device)
	print("Model weight mapping completed successfully!")

	# Clean up memory
	del hf_model
	del hf_state_dict
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	return custom_model, config, tokenizer

	@classmethod
	def save_custom_to_hf_format(cls, custom_model: Llama, hf_config: AutoConfig, tokenizer: AutoTokenizer, save_directory: str):
	"""
	Converts custom Llama model state_dict back to Hugging Face format and saves it.
	"""
	os.makedirs(save_directory, exist_ok=True)
	print(f"Initializing a native Hugging Face model template...")

	# Suppress weight initialization warning since we will overwrite all of them
	hf_model = AutoModelForCausalLM.from_config(hf_config)

	custom_state_dict = custom_model.state_dict()
	key_map = cls.get_key_mapping(hf_config.num_hidden_layers)
	reverse_map = {v: k for k, v in key_map.items()}

	hf_state_dict = {}
	for custom_key, val in custom_state_dict.items():
	if custom_key in reverse_map:
	hf_key = reverse_map[custom_key]
	hf_state_dict[hf_key] = val
	else:
	# E.g., rotary embeddings, which are not saved in HF state dict
	pass

	hf_model.load_state_dict(hf_state_dict, strict=False)

	print(f"Saving standard Hugging Face model and tokenizer to {save_directory}...")
	hf_model.save_pretrained(save_directory)
	tokenizer.save_pretrained(save_directory)
	print("Save completed successfully!")

	@staticmethod
	def push_to_huggingface(local_dir: str, repo_id: str, token: str) -> str:
	"""
	Pushes a saved Hugging Face model directory directly to Hugging Face Hub.
	"""
	print(f"Connecting to Hugging Face to push model to {repo_id}...")

	# Create repo if not exist
	repo_url = create_repo(
	repo_id=repo_id,
	token=token,
	private=False,
	exist_ok=True
	)

	# Generate custom Model Card README.md
	readme_path = os.path.join(local_dir, "README.md")
	if not os.path.exists(readme_path):
	with open(readme_path, "w", encoding="utf-8") as f:
	f.write(f"""---
	language: en
	license: mit
	tags:
	- llama
	- text-generation
	- custom-framework
	---

	# {repo_id.split('/')[-1]}

	This model was trained or fine-tuned using the Distributed-Transformer-Training-Framework, an advanced and intelligent distributed training system.

	## Model Description
	- Architecture: LLaMA-style custom transformer
	- Base Model: Loaded via pre-trained open-source parameters and mapped directly to custom modeling structures.
	- Framework capabilities: Hand-rolled rotary positional embeddings, grouped query attention, data/tensor/pipeline parallelism.

	## How to use
	You can load this model directly using Hugging Face `transformers`:
	```python
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model = AutoModelForCausalLM.from_pretrained("{repo_id}")
	tokenizer = AutoTokenizer.from_pretrained("{repo_id}")

	inputs = tokenizer("Hello, I am a custom LLaMA model", return_tensors="pt")
	outputs = model.generate(**inputs, max_new_tokens=50)
	print(tokenizer.decode(outputs[0], skip_special_tokens=True))
	```

	Developed and uploaded using the Intelligent Framework Command Center of the Distributed-Transformer-Training-Framework.
	""")

	print("Uploading directory contents to Hugging Face Hub...")
	upload_folder(
	folder_path=local_dir,
	repo_id=repo_id,
	token=token,
	commit_message="Upload fine-tuned model from Distributed-Transformer-Training-Framework"
	)

	print(f"Model pushed successfully! Available at: {repo_url}")
	return repo_url