llama2.c-stories110M-pruned2.4 / README.md

Create README.md

a4779a1 verified almost 2 years ago

783 Bytes

import sparseml.transformers

original_model_name = "Xenova/llama2.c-stories110M"
output_directory = "output/"
final_model_name = "nm-testing/llama2.c-stories110M-pruned2.4"

dataset = "open_platypus"

recipe = """
test_stage:
  obcq_modifiers:
    SparseGPTModifier:
      sparsity: 0.5
      sequential_update: true
      quantize: false
      mask_structure: '2:4'
      targets: ['re:model.layers.\d*$']
"""

# Apply SparseGPT to the model
sparseml.transformers.oneshot(
    model_name_or_path=original_model_name,
    dataset_name=dataset,
    recipe=recipe,
    output_dir=output_directory,
)

# Upload the output model to Hugging Face Hub
from huggingface_hub import HfApi

HfApi().upload_folder(
    folder_path=output_directory,
    repo_id=final_model_name,
)