HaileyStorm
/

llama3-5.4b-instruct-unhealed

Text Generation

text-generation-inference

Model card Files Files and versions

HaileyStorm commited on May 25, 2024

Commit

7b54808

·

verified ·

1 Parent(s): 3000a46

Upload 2 files

Files changed (2) hide show

scripts/ckpt2hf.py +47 -0
scripts/full.yaml +2 -2

scripts/ckpt2hf.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+print("Loading checkpoint...")
+# Define the paths to your checkpoint files
+checkpoint_paths = [
+    './llama3-5b/model-00001-of-00003.pt',
+    './llama3-5b/model-00002-of-00003.pt',
+    './llama3-5b/model-00003-of-00003.pt'
+]
+# Initialize an empty state dictionary
+merged_state_dict = {}
+# Load each checkpoint and merge them
+for checkpoint_path in checkpoint_paths:
+    checkpoint = torch.load(checkpoint_path, map_location='cpu')
+    merged_state_dict.update(checkpoint)
+print("Loading original model...")
+# Define the original model name or path
+original_model_name = "../../slice_with_mergekit/merged/"
+# Load the model configuration and create a new model instance
+model = AutoModelForCausalLM.from_pretrained(original_model_name, state_dict=merged_state_dict)
+print("Converting to fp16...")
+# Convert model parameters to float16
+model.half()
+print("Saving model...")
+# Save the model in the safetensors format
+output_dir = './llama3-5b/hf/'
+model.save_pretrained(output_dir, safe_serialization=True)
+print("Saving tokenizer...")
+# Save the tokenizer as well
+tokenizer = AutoTokenizer.from_pretrained(original_model_name)
+tokenizer.save_pretrained(output_dir)
+print(f"Merged model saved to {output_dir}")

scripts/full.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 # Tokenizer
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
-  path: ../original/tokenizer.model
 # Dataset and Sampler
 dataset:
@@ -29,7 +29,7 @@ model:
 checkpointer:
   _component_: torchtune.utils.FullModelHFCheckpointer
-  checkpoint_dir: ../merged/
   checkpoint_files: [
     model-00001-of-00003.safetensors,
     model-00002-of-00003.safetensors,

 # Tokenizer
 tokenizer:
   _component_: torchtune.models.llama3.llama3_tokenizer
+  path: ../tokenizer.model
 # Dataset and Sampler
 dataset:
 checkpointer:
   _component_: torchtune.utils.FullModelHFCheckpointer
+  checkpoint_dir: ../
   checkpoint_files: [
     model-00001-of-00003.safetensors,
     model-00002-of-00003.safetensors,