Spaces:

Tech-Meld
/

Smaller_is_Better

Runtime error

App Files Files Community

Tech-Meld commited on Jun 1, 2024

Commit

0445e3f

verified ·

1 Parent(s): 5c5e320

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -14

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer, pipeline, AutoConfig, AutoModelForCausalLM
-from huggingface_hub import cached_download, hf_hub_url, list_models, create_repo, HfApi
 from transformers.modeling_utils import PreTrainedModel
 import requests
 import json
@@ -11,11 +11,11 @@ import base64
 import torch
 from torch.nn.utils import prune
 import subprocess
-# Function to fetch open-weight LLM models
-def fetch_open_weight_models():
-    models = list_models()
-    return models
 # Ensure sentencepiece is installed
 try:
@@ -23,8 +23,14 @@ try:
 except ImportError:
     subprocess.check_call(["pip", "install", "sentencepiece"])
 # Function to prune a model using the "merge-kit" approach
-def prune_model(llm_model_name, target_size, hf_write_token, repo_name):
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
@@ -33,12 +39,18 @@ def prune_model(llm_model_name, target_size, hf_write_token, repo_name):
             torch_dtype=torch.float16,
         )
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
         target_num_parameters = int(config.num_parameters * (target_size / 100))
         # Prune the model
-        pruned_model = merge_kit_prune(llm_model, target_num_parameters)
         # Save the pruned model
         api = HfApi()
@@ -47,6 +59,9 @@ def prune_model(llm_model_name, target_size, hf_write_token, repo_name):
         pruned_model.push_to_hub(repo_id, use_auth_token=hf_write_token)
         llm_tokenizer.push_to_hub(repo_id, use_auth_token=hf_write_token)
         # Create a visualization
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(["Original", "Pruned"], [config.num_parameters, pruned_model.num_parameters])
@@ -57,13 +72,16 @@ def prune_model(llm_model_name, target_size, hf_write_token, repo_name):
         buf.seek(0)
         image_base64 = base64.b64encode(buf.read()).decode("utf-8")
-        return f"Pruned model saved to Hugging Face Hub in repository {repo_id}", f"data:image/png;base64,{image_base64}", None
     except Exception as e:
-        return f"Error: {e}", None, None
 # Merge-kit Pruning Function (adjust as needed)
-def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
     Args:
         model (PreTrainedModel): The model to be pruned.
@@ -75,10 +93,11 @@ def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int) -> PreTr
     pruning_method = "unstructured"
     # Calculate the pruning amount
-    amount = 1 - (target_num_parameters / sum(p.numel() for p in model.parameters()))
     # Prune the model using the selected method
-    for name, module in model.named_modules():
         if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
             prune.random_unstructured(module, name="weight", amount=amount)
@@ -101,8 +120,18 @@ def create_interface():
         pruning_status = gr.Textbox(label="Pruning Status", interactive=False)
         prune_button = gr.Button("Prune Model")
         visualization = gr.Image(label="Model Size Comparison", interactive=False)
-        prune_button.click(fn=prune_model, inputs=[llm_model_name, target_size, hf_write_token, repo_name], outputs=[pruning_status, visualization])
         text_input = gr.Textbox(label="Input Text")
         text_output = gr.Textbox(label="Generated Text")
@@ -124,4 +153,4 @@ def create_interface():
 # Create and launch the Gradio interface
 demo = create_interface()
-demo.launch(share=True)

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer, pipeline, AutoConfig, AutoModelForCausalLM
+from huggingface_hub import create_repo, HfApi, list_models
 from transformers.modeling_utils import PreTrainedModel
 import requests
 import json
 import torch
 from torch.nn.utils import prune
 import subprocess
+from tqdm import tqdm
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Ensure sentencepiece is installed
 try:
 except ImportError:
     subprocess.check_call(["pip", "install", "sentencepiece"])
+# Function to fetch open-weight LLM models
+def fetch_open_weight_models():
+    models = list_models()
+    return models
 # Function to prune a model using the "merge-kit" approach
+def prune_model(llm_model_name, target_size, hf_write_token, repo_name, progress=gr.Progress(track_tqdm=True)):
+    log_messages = []
     try:
         # Load the LLM model and tokenizer
         llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
             torch_dtype=torch.float16,
         )
+        log_messages.append("Model and tokenizer loaded successfully.")
+        logging.info("Model and tokenizer loaded successfully.")
         # Get the model config
         config = AutoConfig.from_pretrained(llm_model_name)
         target_num_parameters = int(config.num_parameters * (target_size / 100))
         # Prune the model
+        pruned_model = merge_kit_prune(llm_model, target_num_parameters, progress)
+        log_messages.append("Model pruned successfully.")
+        logging.info("Model pruned successfully.")
         # Save the pruned model
         api = HfApi()
         pruned_model.push_to_hub(repo_id, use_auth_token=hf_write_token)
         llm_tokenizer.push_to_hub(repo_id, use_auth_token=hf_write_token)
+        log_messages.append(f"Pruned model saved to Hugging Face Hub in repository {repo_id}")
+        logging.info(f"Pruned model saved to Hugging Face Hub in repository {repo_id}")
         # Create a visualization
         fig, ax = plt.subplots(figsize=(10, 5))
         ax.bar(["Original", "Pruned"], [config.num_parameters, pruned_model.num_parameters])
         buf.seek(0)
         image_base64 = base64.b64encode(buf.read()).decode("utf-8")
+        return f"Pruned model saved to Hugging Face Hub in repository {repo_id}", f"data:image/png;base64,{image_base64}", "\n".join(log_messages)
     except Exception as e:
+        error_message = f"Error: {e}"
+        log_messages.append(error_message)
+        logging.error(error_message)
+        return error_message, None, "\n".join(log_messages)
 # Merge-kit Pruning Function (adjust as needed)
+def merge_kit_prune(model: PreTrainedModel, target_num_parameters: int, progress) -> PreTrainedModel:
     """Prunes a model using a merge-kit approach.
     Args:
         model (PreTrainedModel): The model to be pruned.
     pruning_method = "unstructured"
     # Calculate the pruning amount
+    total_params = sum(p.numel() for p in model.parameters())
+    amount = 1 - (target_num_parameters / total_params)
     # Prune the model using the selected method
+    for name, module in tqdm(model.named_modules(), desc="Pruning", file=sys.stdout):
         if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
             prune.random_unstructured(module, name="weight", amount=amount)
         pruning_status = gr.Textbox(label="Pruning Status", interactive=False)
         prune_button = gr.Button("Prune Model")
         visualization = gr.Image(label="Model Size Comparison", interactive=False)
+        progress_bar = gr.Progress()
+        logs_button = gr.Button("Show Logs")
+        logs_output = gr.Textbox(label="Logs", interactive=False)
+        def show_logs():
+            with open("pruning.log", "r") as log_file:
+                logs = log_file.read()
+            return logs
+        logs_button.click(fn=show_logs, outputs=logs_output)
+        prune_button.click(fn=prune_model, inputs=[llm_model_name, target_size, hf_write_token, repo_name, progress_bar], outputs=[pruning_status, visualization, logs_output])
         text_input = gr.Textbox(label="Input Text")
         text_output = gr.Textbox(label="Generated Text")
 # Create and launch the Gradio interface
 demo = create_interface()
+demo.launch(share=True)