Spaces:

Tech-Meld
/

Smaller_is_Better

Runtime error

App Files Files Community

Tech-Meld commited on Jun 1, 2024

Commit

8bb39cb

verified ·

1 Parent(s): 7730f68

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -32

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
-from transformers import AutoModel, AutoTokenizer, pipeline, AutoConfig
-from huggingface_hub import cached_download, hf_hub_url, list_models
 from transformers.modeling_utils import PreTrainedModel
 import requests
 import json
@@ -10,7 +10,6 @@ from io import BytesIO
 import base64
 import torch
 from torch.nn.utils import prune
-from transformers.models.auto import AutoModelForCausalLM # Import for CausalLM
 # Function to fetch open-weight LLM models
 def fetch_open_weight_models():
@@ -18,20 +17,15 @@ def fetch_open_weight_models():
     return models
 # Function to prune a model using the "merge-kit" approach
-def prune_model(llm_model_name, target_size, output_dir):
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
         # Handle cases where the model is split into multiple safetensors
-        if "safetensors" in llm_tokenizer.vocab_files_names:
-            llm_model = AutoModelForCausalLM.from_pretrained(
-                llm_model_name,
-                from_safetensors=True,
-                torch_dtype=torch.float16,  # Adjust dtype as needed
-                use_auth_token=None,
-            )
-        else:
-            llm_model = AutoModel.from_pretrained(llm_model_name)
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
@@ -41,8 +35,12 @@ def prune_model(llm_model_name, target_size, output_dir):
         # Use merge-kit to prune the model
         pruned_model = merge_kit_prune(llm_model, target_num_parameters)
-        # Save the pruned model
-        pruned_model.save_pretrained(output_dir)
         # Create a visualization
         fig, ax = plt.subplots(figsize=(10, 5))
@@ -53,7 +51,7 @@ def prune_model(llm_model_name, target_size, output_dir):
         fig.savefig(buf, format="png")
         buf.seek(0)
         image_base64 = base64.b64encode(buf.read()).decode("utf-8")
-        return f"Pruned model saved to {output_dir}", f"data:image/png;base64,{image_base64}"
     except Exception as e:
         return f"Error: {e}", None
@@ -61,23 +59,19 @@ def prune_model(llm_model_name, target_size, output_dir):
 # Merge-kit Pruning Function (adjust as needed)
 def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
     Args:
         model (PreTrainedModel): The model to be pruned.
         target_num_parameters (int): The target number of parameters after pruning.
     Returns:
         PreTrainedModel: The pruned model.
     """
     # Define the pruning method
     pruning_method = "unstructured"
     # Calculate the pruning amount
-    amount = 1 - (target_num_parameters / model.num_parameters)
-    # Prune the model using the selected method (adapt for Llama)
-    # Example: If Llama uses specific layers, adjust the pruning logic here
     for name, module in model.named_modules():
         if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
             prune.random_unstructured(module, name="weight", amount=amount)
@@ -107,22 +101,25 @@ def create_interface():
             interactive=True,
         )
-        # Output for pruning status
-        pruning_status = gr.Textbox(label="Pruning Status")
-        # Output for saving the model
-        save_model_path = gr.Textbox(label="Save Model Path", placeholder="Path to save the pruned model", interactive=True)
         # Button to start pruning
         prune_button = gr.Button("Prune Model")
         # Output for visualization
-        visualization = gr.Image(label="Model Size Comparison")
         # Connect components
         prune_button.click(
             fn=prune_model,
-            inputs=[llm_model_name, target_size, save_model_path],
             outputs=[pruning_status, visualization],
         )
@@ -133,11 +130,11 @@ def create_interface():
         # Generate text button
         generate_button = gr.Button("Generate Text")
-        def generate_text(text, model_path):
             try:
                 # Load the pruned model and tokenizer
-                tokenizer = AutoTokenizer.from_pretrained(model_path)
-                model = AutoModelForCausalLM.from_pretrained(model_path) # Load as CausalLM
                 # Use the pipeline for text generation
                 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -146,7 +143,7 @@ def create_interface():
             except Exception as e:
                 return f"Error: {e}"
-        generate_button.click(fn=generate_text, inputs=[text_input, save_model_path], outputs=text_output)
     return demo

 import gradio as gr
+from transformers import AutoModel, AutoTokenizer, pipeline, AutoConfig, AutoModelForCausalLM
+from huggingface_hub import cached_download, hf_hub_url, list_models, create_repo, HfApi
 from transformers.modeling_utils import PreTrainedModel
 import requests
 import json
 import base64
 import torch
 from torch.nn.utils import prune
 # Function to fetch open-weight LLM models
 def fetch_open_weight_models():
     return models
 # Function to prune a model using the "merge-kit" approach
+def prune_model(llm_model_name, target_size, hf_write_token, repo_name):
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
         # Handle cases where the model is split into multiple safetensors
+        llm_model = AutoModelForCausalLM.from_pretrained(
+            llm_model_name,
+            torch_dtype=torch.float16,  # Adjust dtype as needed
+        )
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
         # Use merge-kit to prune the model
         pruned_model = merge_kit_prune(llm_model, target_num_parameters)
+        # Save the pruned model to Hugging Face repository
+        api = HfApi()
+        repo_id = f"{hf_write_token}/{repo_name}"
+        create_repo(repo_id, token=hf_write_token, private=False, exist_ok=True)
+        pruned_model.push_to_hub(repo_id, use_auth_token=hf_write_token)
+        llm_tokenizer.push_to_hub(repo_id, use_auth_token=hf_write_token)
         # Create a visualization
         fig, ax = plt.subplots(figsize=(10, 5))
         fig.savefig(buf, format="png")
         buf.seek(0)
         image_base64 = base64.b64encode(buf.read()).decode("utf-8")
+        return f"Pruned model saved to Hugging Face Hub in repository {repo_id}", f"data:image/png;base64,{image_base64}"
     except Exception as e:
         return f"Error: {e}", None
 # Merge-kit Pruning Function (adjust as needed)
 def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
     Args:
         model (PreTrainedModel): The model to be pruned.
         target_num_parameters (int): The target number of parameters after pruning.
     Returns:
         PreTrainedModel: The pruned model.
     """
     # Define the pruning method
     pruning_method = "unstructured"
     # Calculate the pruning amount
+    amount = 1 - (target_num_parameters / sum(p.numel() for p in model.parameters()))
+    # Prune the model using the selected method
     for name, module in model.named_modules():
         if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
             prune.random_unstructured(module, name="weight", amount=amount)
             interactive=True,
         )
+        # Input for Hugging Face write token
+        hf_write_token = gr.Textbox(label="Hugging Face Write Token", placeholder="Enter your HF write token", interactive=True, type="password")
+        # Input for repository name
+        repo_name = gr.Textbox(label="Repository Name", placeholder="Enter the name of the repository", interactive=True)
+        # Output for pruning status
+        pruning_status = gr.Textbox(label="Pruning Status", interactive=False)
         # Button to start pruning
         prune_button = gr.Button("Prune Model")
         # Output for visualization
+        visualization = gr.Image(label="Model Size Comparison", interactive=False)
         # Connect components
         prune_button.click(
             fn=prune_model,
+            inputs=[llm_model_name, target_size, hf_write_token, repo_name],
             outputs=[pruning_status, visualization],
         )
         # Generate text button
         generate_button = gr.Button("Generate Text")
+        def generate_text(text, repo_name):
             try:
                 # Load the pruned model and tokenizer
+                tokenizer = AutoTokenizer.from_pretrained(repo_name, use_auth_token=hf_write_token)
+                model = AutoModelForCausalLM.from_pretrained(repo_name, use_auth_token=hf_write_token)
                 # Use the pipeline for text generation
                 generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
             except Exception as e:
                 return f"Error: {e}"
+        generate_button.click(fn=generate_text, inputs=[text_input, repo_name], outputs=text_output)
     return demo