Spaces:

Steelskull
/

Vis_Diff

Running

App Files Files Community

Steelskull commited on Jan 21

Commit

893fdc7

verified ·

1 Parent(s): 3aed6d3

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -29

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import io
 import torch
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
 import seaborn as sns
 import plotly.graph_objects as go
 import gradio as gr
 import PIL.Image
@@ -15,17 +14,19 @@ sns.set_theme(style="whitegrid")
 def calculate_weight_diff(base_weight, chat_weight):
     """Calculates the mean absolute difference between two tensors."""
-    return torch.abs(base_weight - chat_weight).mean().item()
 def calculate_layer_diffs(base_model, chat_model):
     """Iterates through layers and calculates differences for specific projections."""
     layer_diffs = []
-    # We zip the layers to iterate through them simultaneously
     layers = zip(base_model.model.layers, chat_model.model.layers)
     total_layers = len(base_model.model.layers)
-    # List of components we want to track
     components_to_track = [
         ('input_layernorm', lambda l: l.input_layernorm.weight),
         ('self_attn_q_proj', lambda l: l.self_attn.q_proj.weight),
@@ -46,7 +47,6 @@ def calculate_layer_diffs(base_model, chat_model):
                 val = calculate_weight_diff(getter(base_layer), getter(chat_layer))
                 layer_data[name] = val
             except AttributeError:
-                # Handle cases where architecture might differ slightly (e.g., bias terms)
                 layer_data[name] = 0.0
         layer_diffs.append(layer_data)
@@ -62,11 +62,9 @@ def visualize_2d_heatmap(layer_diffs, base_model_name, chat_model_name):
     components = list(layer_diffs[0].keys())
     num_components = len(components)
-    # Dynamically adjust figure size
     height = max(8, num_layers / 6)
     width = max(20, num_components * 2.5)
-    # Logic for subplot arrangement
     if num_components > 6:
         nrows = 2
         ncols = (num_components + 1) // 2
@@ -79,11 +77,9 @@ def visualize_2d_heatmap(layer_diffs, base_model_name, chat_model_name):
     fig.suptitle(f"Weight Differences: {base_model_name} vs {chat_model_name}", fontsize=16, y=0.98)
-    # Font sizing logic
     tick_font_size = max(6, min(10, 300 / num_layers))
     for i, component in enumerate(components):
-        # Extract data for this specific component across all layers
         data = [[row[component]] for row in layer_diffs]
         sns.heatmap(data,
@@ -97,12 +93,9 @@ def visualize_2d_heatmap(layer_diffs, base_model_name, chat_model_name):
         axs[i].set_title(component, fontsize=12, fontweight='bold')
         axs[i].set_yticks(range(num_layers))
         axs[i].set_yticklabels(range(num_layers), fontsize=tick_font_size)
-        axs[i].set_xticks([]) # Hide x-axis ticks for the single column heatmap
-        axs[i].invert_yaxis() # Layer 0 at bottom or top? Usually 0 is bottom in diagrams, but top in matrices.
-                              # Let's keep 0 at top (standard matrix view) or remove invert for 0 at bottom.
-                              # Standard heatmap has index 0 at top.
-    # Remove empty subplots
     for j in range(i + 1, len(axs)):
         fig.delaxes(axs[j])
@@ -114,19 +107,14 @@ def visualize_2d_heatmap(layer_diffs, base_model_name, chat_model_name):
     plt.close(fig)
     return PIL.Image.open(buf)
-def visualize_3d_surface(layer_diffs):
-    """Generates an interactive 3D Surface plot using Plotly."""
     if not layer_diffs:
-        return None
-    # Convert list of dicts to DataFrame for easier handling
     df = pd.DataFrame(layer_diffs)
-    # X axis: Components
     x_labels = df.columns.tolist()
-    # Y axis: Layers
     y_labels = df.index.tolist()
-    # Z axis: Values (Transposed because Plotly expects Z[y][x])
     z_data = df.values
     fig = go.Figure(data=[go.Surface(z=z_data, x=x_labels, y=y_labels, colorscale='Viridis')])
@@ -140,20 +128,25 @@ def visualize_3d_surface(layer_diffs):
             xaxis=dict(tickangle=45),
         ),
         autosize=True,
-        height=800,
         margin=dict(l=65, r=50, b=65, t=90)
     )
-    return fig
 def process_models(base_name, chat_name, hf_token):
     try:
         print(f"Loading {base_name}...")
         base_model = AutoModelForCausalLM.from_pretrained(
             base_name,
             torch_dtype=torch.bfloat16,
             token=hf_token,
-            device_map="cpu", # Force CPU to avoid GPU OOM during comparison if models are large
             trust_remote_code=True
         )
@@ -174,11 +167,13 @@ def process_models(base_name, chat_name, hf_token):
         torch.cuda.empty_cache()
         img_2d = visualize_2d_heatmap(diffs, base_name, chat_name)
-        plot_3d = visualize_3d_surface(diffs)
-        return img_2d, plot_3d
     except Exception as e:
         raise gr.Error(f"Error processing models: {str(e)}")
 # --- Gradio UI Layout ---
@@ -201,7 +196,8 @@ with gr.Blocks(title="Model Diff Visualizer") as demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown("### 3D Interactive Landscape")
-            output_3d = gr.Plot(label="3D Visualization")
     submit_btn.click(
         fn=process_models,

 import io
 import torch
 import pandas as pd
 import seaborn as sns
+import matplotlib.pyplot as plt
 import plotly.graph_objects as go
 import gradio as gr
 import PIL.Image
 def calculate_weight_diff(base_weight, chat_weight):
     """Calculates the mean absolute difference between two tensors."""
+    # Move to CPU for calculation to save GPU memory and ensure numpy compatibility
+    b_w = base_weight.detach().cpu()
+    c_w = chat_weight.detach().cpu()
+    return torch.abs(b_w - c_w).mean().item()
 def calculate_layer_diffs(base_model, chat_model):
     """Iterates through layers and calculates differences for specific projections."""
     layer_diffs = []
     layers = zip(base_model.model.layers, chat_model.model.layers)
     total_layers = len(base_model.model.layers)
+    # Components to track
     components_to_track = [
         ('input_layernorm', lambda l: l.input_layernorm.weight),
         ('self_attn_q_proj', lambda l: l.self_attn.q_proj.weight),
                 val = calculate_weight_diff(getter(base_layer), getter(chat_layer))
                 layer_data[name] = val
             except AttributeError:
                 layer_data[name] = 0.0
         layer_diffs.append(layer_data)
     components = list(layer_diffs[0].keys())
     num_components = len(components)
     height = max(8, num_layers / 6)
     width = max(20, num_components * 2.5)
     if num_components > 6:
         nrows = 2
         ncols = (num_components + 1) // 2
     fig.suptitle(f"Weight Differences: {base_model_name} vs {chat_model_name}", fontsize=16, y=0.98)
     tick_font_size = max(6, min(10, 300 / num_layers))
     for i, component in enumerate(components):
         data = [[row[component]] for row in layer_diffs]
         sns.heatmap(data,
         axs[i].set_title(component, fontsize=12, fontweight='bold')
         axs[i].set_yticks(range(num_layers))
         axs[i].set_yticklabels(range(num_layers), fontsize=tick_font_size)
+        axs[i].set_xticks([])
+        axs[i].invert_yaxis()
     for j in range(i + 1, len(axs)):
         fig.delaxes(axs[j])
     plt.close(fig)
     return PIL.Image.open(buf)
+def generate_3d_html(layer_diffs):
+    """Generates an interactive 3D Surface plot as an HTML string."""
     if not layer_diffs:
+        return "<p>No data to display</p>"
     df = pd.DataFrame(layer_diffs)
     x_labels = df.columns.tolist()
     y_labels = df.index.tolist()
     z_data = df.values
     fig = go.Figure(data=[go.Surface(z=z_data, x=x_labels, y=y_labels, colorscale='Viridis')])
             xaxis=dict(tickangle=45),
         ),
         autosize=True,
+        height=700,
         margin=dict(l=65, r=50, b=65, t=90)
     )
+    # Return HTML string instead of Figure object to avoid Gradio schema bugs
+    return fig.to_html(include_plotlyjs='cdn', full_html=False)
 def process_models(base_name, chat_name, hf_token):
+    # Set default values if empty to prevent crash
+    if not base_name or not chat_name:
+        raise gr.Error("Please provide both model names.")
     try:
         print(f"Loading {base_name}...")
         base_model = AutoModelForCausalLM.from_pretrained(
             base_name,
             torch_dtype=torch.bfloat16,
             token=hf_token,
+            device_map="cpu",
             trust_remote_code=True
         )
         torch.cuda.empty_cache()
         img_2d = visualize_2d_heatmap(diffs, base_name, chat_name)
+        html_3d = generate_3d_html(diffs)
+        return img_2d, html_3d
     except Exception as e:
+        import traceback
+        traceback.print_exc()
         raise gr.Error(f"Error processing models: {str(e)}")
 # --- Gradio UI Layout ---
     with gr.Row():
         with gr.Column():
             gr.Markdown("### 3D Interactive Landscape")
+            # Using HTML component avoids Pydantic/Gradio schema validation bugs
+            output_3d = gr.HTML(label="3D Visualization")
     submit_btn.click(
         fn=process_models,