Spaces:

Tech-Meld
/

Smaller_is_Better

Runtime error

App Files Files Community

Tech-Meld commited on Jun 1, 2024

Commit

3b422da

verified ·

1 Parent(s): 241160e

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -7

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline, AutoConfig
 from huggingface_hub import cached_download, hf_hub_url, list_models
 import requests
 import json
@@ -7,10 +7,9 @@ import os
 import matplotlib.pyplot as plt
 from io import BytesIO
 import base64
-from transformers.models.auto import AutoModel
-from transformers.modeling_utils import PreTrainedModel
 import torch
 from torch.nn.utils import prune
 # Function to fetch open-weight LLM models
 def fetch_open_weight_models():
@@ -22,14 +21,23 @@ def prune_model(llm_model_name, target_size, output_dir):
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
-        llm_model = AutoModel.from_pretrained(llm_model_name)  # Load using AutoModel
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
         # Calculate the target number of parameters
         target_num_parameters = int(config.num_parameters * (target_size / 100))
-        # Use merge-kit to prune the model (modify pruning logic for Llama)
         pruned_model = merge_kit_prune(llm_model, target_num_parameters)
         # Save the pruned model
@@ -49,7 +57,7 @@ def prune_model(llm_model_name, target_size, output_dir):
     except Exception as e:
         return f"Error: {e}", None
-# Merge-kit Pruning Function
 def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
@@ -128,7 +136,7 @@ def create_interface():
             try:
                 # Load the pruned model and tokenizer
                 tokenizer = AutoTokenizer.from_pretrained(model_path)
-                model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
                 # Use the pipeline for text generation
                 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

 import gradio as gr
+from transformers import AutoModel, AutoTokenizer, pipeline, AutoConfig
 from huggingface_hub import cached_download, hf_hub_url, list_models
 import requests
 import json
 import matplotlib.pyplot as plt
 from io import BytesIO
 import base64
 import torch
 from torch.nn.utils import prune
+from transformers.models.auto import AutoModelForCausalLM # Import for CausalLM
 # Function to fetch open-weight LLM models
 def fetch_open_weight_models():
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
+        # Handle cases where the model is split into multiple safetensors
+        if "safetensors" in llm_tokenizer.vocab_files_names:
+            llm_model = AutoModelForCausalLM.from_pretrained(
+                llm_model_name,
+                from_safetensors=True,
+                torch_dtype=torch.float16,  # Adjust dtype as needed
+                use_auth_token=None,
+            )
+        else:
+            llm_model = AutoModel.from_pretrained(llm_model_name)
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
         # Calculate the target number of parameters
         target_num_parameters = int(config.num_parameters * (target_size / 100))
+        # Use merge-kit to prune the model
         pruned_model = merge_kit_prune(llm_model, target_num_parameters)
         # Save the pruned model
     except Exception as e:
         return f"Error: {e}", None
+# Merge-kit Pruning Function (adjust as needed)
 def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
             try:
                 # Load the pruned model and tokenizer
                 tokenizer = AutoTokenizer.from_pretrained(model_path)
+                model = AutoModelForCausalLM.from_pretrained(model_path) # Load as CausalLM
                 # Use the pipeline for text generation
                 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)