Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

Anisha Bhatnagar commited on Sep 9

Commit

ce95080

1 Parent(s): 40fde16

plot zoom working

Browse files

Files changed (3) hide show

app.py +73 -7
utils/interp_space_utils.py +38 -46
utils/visualizations.py +28 -53

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import json
 import os
 os.environ["GRADIO_TEMP_DIR"] = "./datasets/temp"  # Set a custom temp directory for Gradio
@@ -55,7 +55,7 @@ def validate_ground_truth(gt1, gt2, gt3):
     return index, f"Candidate {index+1} is marked as the ground truth author."
-def app(share=False):#, use_cluster_feats=False):
     instances, instance_ids = get_instances(cfg['instances_to_explain_path'])
     interp      = load_interp_space(cfg)
@@ -392,9 +392,6 @@ def app(share=False):#, use_cluster_feats=False):
         visible_zoomed_authors = gr.State()
         gr.HTML(instruction_callout("Zoom in on the plot to select a set of background authors and see the presence of the top features from this set in candidate and mystery authors."))
-        # State to store precomputed regions
-        precomputed_regions_state = gr.State()
         # Add this after the plot generation
         gr.HTML("""
@@ -413,6 +410,17 @@ def app(share=False):#, use_cluster_feats=False):
                 Select a precomputed region to analyze, or zoom manually on the plot above
             </div>
         """)
         precomputed_regions_radio = gr.Radio(
             choices=["None"],
@@ -420,6 +428,8 @@ def app(share=False):#, use_cluster_feats=False):
             label="Precomputed Regions",
             info="Select a region to automatically zoom and analyze"
         )
         with gr.Row():
@@ -471,9 +481,65 @@ def app(share=False):#, use_cluster_feats=False):
         )
         precomputed_regions_radio.change(
-            fn=lambda region_name, precomputed_regions: trigger_precomputed_region(region_name, precomputed_regions),
             inputs=[precomputed_regions_radio, precomputed_regions_state],
-            outputs=[axis_ranges]
         )
         axis_ranges.change(

 import gradio as gr
 import json
+import ast
 import os
 os.environ["GRADIO_TEMP_DIR"] = "./datasets/temp"  # Set a custom temp directory for Gradio
     return index, f"Candidate {index+1} is marked as the ground truth author."
+def app(share=False):
     instances, instance_ids = get_instances(cfg['instances_to_explain_path'])
     interp      = load_interp_space(cfg)
         visible_zoomed_authors = gr.State()
         gr.HTML(instruction_callout("Zoom in on the plot to select a set of background authors and see the presence of the top features from this set in candidate and mystery authors."))
         # Add this after the plot generation
         gr.HTML("""
                 Select a precomputed region to analyze, or zoom manually on the plot above
             </div>
         """)
+        # State to store precomputed regions
+        precomputed_regions_state = gr.Textbox(
+            visible=True,  # Keep it visible to DOM
+            elem_id="precomputed-regions",
+            interactive=True,
+            show_label=False,
+            container=False,
+            value="",
+            elem_classes=["hidden-textbox"]  # Add custom CSS class
+        )
         precomputed_regions_radio = gr.Radio(
             choices=["None"],
             label="Precomputed Regions",
             info="Select a region to automatically zoom and analyze"
         )
+        # Add a hidden HTML component for JavaScript execution
+        js_trigger = gr.HTML(visible=False, elem_id="js-trigger")
         with gr.Row():
         )
         precomputed_regions_radio.change(
+            fn=lambda region_name, precomputed_regions_json: trigger_precomputed_region(region_name, ast.literal_eval(precomputed_regions_json)),
             inputs=[precomputed_regions_radio, precomputed_regions_state],
+            outputs=[axis_ranges],
+            js="""
+            function(region_name, regions_json_str) {
+                console.log('=== ZOOM DEBUG START ===');
+                console.log('Region selected:', region_name);
+                console.log('Regions JSON string received:', typeof regions_json_str);
+                // Check if Plotly is available
+                console.log('Plotly available:', typeof window.Plotly);
+                // Find plot element
+                const plotDiv = document.querySelector('#feature-plot .js-plotly-plot');
+                console.log('Plot element found:', !!plotDiv);
+                if (plotDiv) {
+                    console.log('Plot element exists');
+                }
+                // Try to parse regions
+                try {
+                    const precomputed_regions = JSON.parse(regions_json_str);
+                    console.log('Regions parsed successfully');
+                    console.log('Available regions:', Object.keys(precomputed_regions));
+                    if (region_name !== "None" && precomputed_regions[region_name]) {
+                        const region = precomputed_regions[region_name];
+                        const bbox = region.bbox;
+                        console.log('Bbox to apply:', bbox);
+                        if (window.Plotly && plotDiv) {
+                            console.log('Calling Plotly.relayout...');
+                            const update = {
+                                'xaxis.range': [bbox.xaxis[0], bbox.xaxis[1]],
+                                'yaxis.range': [bbox.yaxis[0], bbox.yaxis[1]],
+                                'xaxis.autorange': false,
+                                'yaxis.autorange': false
+                            };
+                            console.log('Update object:', update);
+                            window.Plotly.relayout(plotDiv, update)
+                                .then(() => console.log('✓ Relayout completed successfully'))
+                                .catch(err => console.log('✗ Relayout failed:', err));
+                        } else {
+                            console.log('Missing requirements - Plotly:', !!window.Plotly, 'PlotDiv:', !!plotDiv);
+                        }
+                    } else {
+                        console.log('Region not found or None selected');
+                    }
+                } catch(e) {
+                    console.log('Error in region processing:', e);
+                }
+                console.log('=== ZOOM DEBUG END ===');
+                return [region_name, regions_json_str];
+            }
+            """
         )
         axis_ranges.change(

utils/interp_space_utils.py CHANGED Viewed

@@ -828,7 +828,7 @@ def compute_predicted_author(task_authors_df: pd.DataFrame, col_name: str) -> in
     return predicted_author
-def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
     """
     Compute precomputed regions for mystery author and candidates.
@@ -857,14 +857,19 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
         print(f"Cache miss. Computing regions.")
     regions = {}
     # All points for distance calculation (mystery + candidates + background)
     all_points = np.vstack([q_proj.reshape(1, -1), c_proj, bg_proj])
     all_ids = ['mystery'] + [f'candidate_{i}' for i in range(3)] + bg_ids
-    def get_region_around_point(center_point, region_name):
         """Get region around a specific point"""
         # Calculate distances from center point to all background authors
-        distances = euclidean_distances([center_point], bg_proj)[0]
         # Get indices of closest neighbors
         closest_indices = np.argsort(distances)[:n_neighbors]
@@ -872,7 +877,14 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
         closest_points = bg_proj[closest_indices]
         # Include the center point in the region
-        region_points = np.vstack([center_point.reshape(1, -1), closest_points])
         # Calculate bounding box with some padding
         x_min, x_max = region_points[:, 0].min(), region_points[:, 0].max()
@@ -898,10 +910,11 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
         """Get region around the midpoint between two points"""
         midpoint = (point1 + point2) / 2
         region_name = f"{name1} & {name2}"
-        return get_region_around_point(midpoint, region_name)
     # Region 1: Around mystery author only
-    print(f"Mystery author: {q_proj}")
     regions["Mystery Author Neighborhood"] = get_region_around_point(
         q_proj, "Mystery Author"
     )
@@ -913,10 +926,8 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
         )
     # Regions 5-7: Between mystery and each candidate
     for i in range(3):
         region_name = f"Mystery & Candidate {i+1}"
-        print(q_proj, c_proj[i])
         regions[region_name] = get_region_between_points(
             q_proj, c_proj[i], "Mystery", f"Candidate {i+1}"
         )
@@ -939,46 +950,27 @@ def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, n_neighbors=7):
         task_centroid, "All Task Authors"
     )
-    # Region 12: Wider region encompassing all task authors
-    all_task_points = np.vstack([q_proj, c_proj])
-    task_centroid = np.mean(all_task_points, axis=0)
-    # Find distances from task centroid to all background authors
-    distances_from_centroid = euclidean_distances([task_centroid], bg_proj)[0]
-    # Take a larger number of neighbors (e.g., 20) for the expanded region
-    n_expanded = min(20, len(bg_ids))  # Don't exceed available authors
-    expanded_indices = np.argsort(distances_from_centroid)[:n_expanded]
-    expanded_authors = [bg_ids[i] for i in expanded_indices]
-    expanded_points = bg_proj[expanded_indices]
-    # Include all task points in the bounding box calculation
-    all_region_points = np.vstack([all_task_points, expanded_points])
-    x_min, x_max = all_region_points[:, 0].min(), all_region_points[:, 0].max()
-    y_min, y_max = all_region_points[:, 1].min(), all_region_points[:, 1].max()
-    # Add moderate padding
-    x_padding = (x_max - x_min) * 0.15
-    y_padding = (y_max - y_min) * 0.15
-    expanded_bbox = {
-        'xaxis': [x_min - x_padding, x_max + x_padding],
-        'yaxis': [y_min - y_padding, y_max + y_padding]
-    }
-    regions["Expanded Task Region"] = {
-        'bbox': expanded_bbox,
-        'authors': expanded_authors,
-        'center_point': task_centroid,
-        'description': f"Expanded region around all task authors ({len(expanded_authors)} authors)"
-    }
-    cache[key] = regions
     with open(REGION_CACHE, 'wb') as f:
         pickle.dump(cache, f)
-    return regions
 if __name__ == "__main__":
     background_corpus = pd.read_pickle('../datasets/luar_interp_space_cluster_19/train_authors.pkl')

     return predicted_author
+def compute_precomputed_regions(bg_proj, bg_ids, q_proj, c_proj, mystery_id, candidate_ids, n_neighbors=7):
     """
     Compute precomputed regions for mystery author and candidates.
         print(f"Cache miss. Computing regions.")
     regions = {}
     # All points for distance calculation (mystery + candidates + background)
     all_points = np.vstack([q_proj.reshape(1, -1), c_proj, bg_proj])
     all_ids = ['mystery'] + [f'candidate_{i}' for i in range(3)] + bg_ids
+    def get_region_around_point(center_point, region_name, include_points=None):
         """Get region around a specific point"""
+        # Ensure center_point is 2D for euclidean_distances
+        if center_point.ndim == 1:
+            center_point = center_point.reshape(1, -1)
         # Calculate distances from center point to all background authors
+        distances = euclidean_distances(center_point, bg_proj)[0]
         # Get indices of closest neighbors
         closest_indices = np.argsort(distances)[:n_neighbors]
         closest_points = bg_proj[closest_indices]
         # Include the center point in the region
+        # region_points = np.vstack([center_point.reshape(1, -1), closest_points])
+        if include_points is not None:
+            region_points = include_points.copy()
+            # Add center point and closest background authors
+            region_points = np.vstack([region_points, center_point, closest_points])
+        else:
+            # Standard case - just center point and neighbors
+            region_points = np.vstack([center_point, closest_points])
         # Calculate bounding box with some padding
         x_min, x_max = region_points[:, 0].min(), region_points[:, 0].max()
         """Get region around the midpoint between two points"""
         midpoint = (point1 + point2) / 2
         region_name = f"{name1} & {name2}"
+        # Include both original points in the region
+        include_points = np.vstack([point1.reshape(1, -1), point2.reshape(1, -1)])
+        return get_region_around_point(midpoint, region_name, include_points=include_points)
     # Region 1: Around mystery author only
     regions["Mystery Author Neighborhood"] = get_region_around_point(
         q_proj, "Mystery Author"
     )
         )
     # Regions 5-7: Between mystery and each candidate
     for i in range(3):
         region_name = f"Mystery & Candidate {i+1}"
         regions[region_name] = get_region_between_points(
             q_proj, c_proj[i], "Mystery", f"Candidate {i+1}"
         )
         task_centroid, "All Task Authors"
     )
+    def serialize_numpy_dtypes(obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        elif isinstance(obj, (np.float32, np.float64)):
+            return float(obj)
+        elif isinstance(obj, (np.int32, np.int64)):
+            return int(obj)
+        elif isinstance(obj, dict):
+            return {key: serialize_numpy_dtypes(value) for key, value in obj.items()}
+        elif isinstance(obj, list):
+            return [serialize_numpy_dtypes(item) for item in obj]
+        else:
+            return obj
+    serializable_regions = serialize_numpy_dtypes(regions)
+    response = json.dumps(serializable_regions, default=str)
+    cache[key] = response
     with open(REGION_CACHE, 'wb') as f:
         pickle.dump(cache, f)
+    return response
 if __name__ == "__main__":
     background_corpus = pd.read_pickle('../datasets/luar_interp_space_cluster_19/train_authors.pkl')

utils/visualizations.py CHANGED Viewed

@@ -515,12 +515,16 @@ def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_inp
     bg_ids_for_regions = bg_ids[4:]  # Background IDs
     # Compute precomputed regions
     precomputed_regions = compute_precomputed_regions(
-        bg_proj_for_regions, bg_ids_for_regions, q_proj, c_proj
     )
     # Create choices for radio buttons
-    region_choices = ["None"] + list(precomputed_regions.keys())
     print('Done processing....')
@@ -537,60 +541,31 @@ def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_inp
     )
     # return fig, update(choices=feature_list, value=feature_list[0]),feature_list
-def extract_cluster_key(display_label: str) -> int:
-    """
-    Given a dropdown label like
-      "Cluster 5 (closest to mystery author; closest to Candidate 1 author)"
-    returns the integer 5.
-    """
-    m = re.match(r"Cluster\s+(\d+)", display_label)
-    if not m:
-        raise ValueError(f"Unrecognized cluster label: {display_label}")
-    return int(m.group(1))
 def trigger_precomputed_region(region_name, precomputed_regions):
     """
     Simulate a zoom event for a precomputed region.
     Returns the JSON payload that would be sent to axis_ranges.
     """
     print(f"[INFO] Triggering precomputed region: {region_name}")
-    print(f"Available regions: {list(precomputed_regions.keys())}")
-    if region_name == "None" or region_name not in precomputed_regions:
-        return ""
-    region = precomputed_regions[region_name]
-    payload = region['bbox']
-    json_payload = {
-        'xaxis': [float(payload['xaxis'][0]), float(payload['xaxis'][1])],
-        'yaxis': [float(payload['yaxis'][0]), float(payload['yaxis'][1])]
-    }
-    return json.dumps(json_payload)
-# When a cluster is selected, split features and populate radio buttons
-def on_cluster_change(selected_cluster, style_map):
-    cluster_key = extract_cluster_key(selected_cluster)
-    all_feats = style_map[cluster_key]
-    llm_feats, g2v_feats = split_features(all_feats)
-    # print(f"Selected cluster: {selected_cluster} ({cluster_key})")
-    # print(f"LLM features: {llm_feats}")
-    # Add "None" as a default selectable option
-    llm_feats = ["None"] + llm_feats
-    # filter out any g2v feature without a shorthand
-    filtered_g2v = []
-    for feat in g2v_feats:
-        if get_shorthand(feat) is None:
-            print(f"Skipping Gram2Vec feature without shorthand: {feat}")
-        else:
-            filtered_g2v.append(feat)
-    # Add "None" as a default selectable option
-    filtered_g2v = ["None"] + filtered_g2v
-    return (
-        gr.update(choices=llm_feats, value=llm_feats[0]),
-        gr.update(choices=filtered_g2v, value=filtered_g2v[0]),
-        llm_feats
-    )

     bg_ids_for_regions = bg_ids[4:]  # Background IDs
     # Compute precomputed regions
+    mystery_id = task_authors_df['authorID'].iloc[0]  # Mystery author ID
+    candidate_ids = task_authors_df['authorID'].iloc[1:4].tolist()  # 3 candidate IDs
     precomputed_regions = compute_precomputed_regions(
+        bg_proj_for_regions, bg_ids_for_regions, q_proj, c_proj, mystery_id, candidate_ids
     )
     # Create choices for radio buttons
+    pc=json.loads(precomputed_regions)
+    region_choices = ["None"] + list(pc.keys())
     print('Done processing....')
     )
     # return fig, update(choices=feature_list, value=feature_list[0]),feature_list
 def trigger_precomputed_region(region_name, precomputed_regions):
     """
     Simulate a zoom event for a precomputed region.
     Returns the JSON payload that would be sent to axis_ranges.
     """
     print(f"[INFO] Triggering precomputed region: {region_name}")
+    print(f"precomputed_regions type: {type(precomputed_regions)}")
+    # print(f"precomputed_regions content: {precomputed_regions}")
+    try:
+        # Parse the JSON string back to dictionary
+        # precomputed_regions = json.loads(precomputed_regions) if precomputed_regions else {}
+        print(f"Available regions: {len(list(precomputed_regions.keys()))}")
+        # print(f"Available regions: {list(precomputed_regions.keys())}")
+        if region_name == "None" or region_name not in precomputed_regions:
+            return ""
+        region = precomputed_regions[region_name]
+        payload = region['bbox']
+        json_payload = {
+            'xaxis': [float(payload['xaxis'][0]), float(payload['xaxis'][1])],
+            'yaxis': [float(payload['yaxis'][0]), float(payload['yaxis'][1])]
+        }
+        # js_code = trigger_plot_zoom_js(region_name, precomputed_regions)
+        return json.dumps(json_payload)#, js_code
+    except Exception as e:
+        print(f"[ERROR] Failed to trigger precomputed region: {e}")
+        return ""