Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

peter-zeng commited on Sep 22

Commit

a6ee680

1 Parent(s): 5d8a39b

Revert "modified to use mystery + predicted"

Browse files

This reverts commit ca61898a61c4be1dc7cfdcf034e63561228a5915.
remove changes gram2vec

Files changed (1) hide show

utils/visualizations.py +26 -57

utils/visualizations.py CHANGED Viewed

@@ -10,7 +10,7 @@ import plotly.graph_objects as go
 from plotly.colors import sample_colorscale
 from gradio import update
 import re
-from utils.interp_space_utils import compute_clusters_style_representation_3, compute_clusters_g2v_representation, compute_precomputed_regions, compute_task_only_g2v_similarity
 from utils.llm_feat_utils import split_features
 from utils.gram2vec_feat_utils import get_shorthand, get_fullform
 from gram2vec.feature_locator import find_feature_spans
@@ -204,12 +204,11 @@ def load_interp_space(cfg):
 # Function to process G2V features and create display choices
 def format_g2v_features_for_display(g2v_features_with_scores):
     """
-    Convert G2V features with a numeric score into display format for Gradio radio buttons.
-    The label uses S= for a generic similarity score (not Z).
     Args:
         g2v_features_with_scores: List of tuples like:
-            [('None', None), ('Feature Name', score), ...]
     Returns:
         tuple: (display_choices, original_values)
@@ -219,21 +218,21 @@ def format_g2v_features_for_display(g2v_features_with_scores):
     for item in g2v_features_with_scores:
         if len(item) == 2:
-            feature_name, score = item
             # Handle None case
-            if feature_name == "None" or score is None:
                 display_choices.append("None")
                 original_values.append("None")
             else:
                 # Convert numpy float to regular float if needed
-                if hasattr(score, 'item'):
-                    score = float(score.item())
                 else:
-                    score = float(score)
-                # Create display string with similarity score
-                display_string = f"{feature_name}"
                 display_choices.append(display_string)
                 original_values.append(feature_name)
         else:
@@ -276,18 +275,17 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
     print(f"Task authors: {len(task_authors_df)}, Clustered authors: {len(clustered_authors_df)}")
     merged_authors_df = pd.concat([task_authors_df, clustered_authors_df])
     print(f"Merged authors DataFrame:\n{len(merged_authors_df)}")
-    style_analysis_response = {'features': [], 'spans': []}
-    # style_analysis_response = compute_clusters_style_representation_3(
-    #     background_corpus_df=merged_authors_df,
-    #     cluster_ids=visible_authors,
-    #     cluster_label_clm_name='authorID',
-    # )
     llm_feats = ['None'] + style_analysis_response['features']
     merged_authors_df = pd.concat([task_authors_df, clustered_authors_df])
-    # Default: contrastive Gram2Vec features
     g2v_feats = compute_clusters_g2v_representation(
         background_corpus_df=merged_authors_df,
         author_ids=visible_authors,
@@ -295,34 +293,6 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
         features_clm_name='g2v_vector'
     )
-    # If both Mystery and the predicted candidate are inside the zoom, switch to task-only similarity
-    task_author_names = {'Mystery author', 'Candidate Author 1', 'Candidate Author 2', 'Candidate Author 3'}
-    visible_task_names = set(visible_authors).intersection(task_author_names)
-    predicted_in_visible = None
-    if 'predicted' in task_authors_df.columns:
-        preds = task_authors_df[task_authors_df['predicted'] == True]['authorID'].tolist()
-        if preds:
-            predicted_in_visible = preds[0] if preds[0] in visible_task_names else None
-    use_task_only = ('Mystery author' in visible_task_names) and (predicted_in_visible is not None)
-    if use_task_only:
-        print("[INFO] Using task-only Gram2Vec similarity (Mystery + Predicted candidate) within zoom")
-        try:
-            g2v_feats = compute_task_only_g2v_similarity(
-                background_corpus_df=merged_authors_df,
-                visible_author_ids=visible_authors,
-                features_clm_name='g2v_vector',
-                top_n=10,
-                require_spans=True
-            )
-            # g2v_feats already enforces spans for both authors; treat as final
-            filtered_g2v_feats = g2v_feats
-        except Exception as e:
-            print(f"[WARN] Task-only similarity failed, falling back to contrastive: {e}")
-            filtered_g2v_feats = None
-    else:
-        filtered_g2v_feats = None
     # ── Span-existence filter on task authors in the zoom ───────────────────
     # Keep only features that have at least one detected span in any of the
     # visible task authors' texts
@@ -335,17 +305,16 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
     task_texts = [_to_text(x) for x in visible_task_authors['fullText'].tolist()]
-    if filtered_g2v_feats is None:
-        filtered_g2v_feats = []
-        for feat in g2v_feats:
-            try:
-                # `feat` is shorthand already (e.g., 'pos_bigrams:NOUN PROPN')
-                if any(find_feature_spans(txt, feat[0]) for txt in task_texts):
-                    filtered_g2v_feats.append(feat)
-                else:
-                    print(f"[INFO] Dropping G2V feature with no spans in task texts: {feat}")
-            except Exception as e:
-                print(f"[WARN] Error while checking spans for {feat}: {e}")
     # Convert to human readable for display
     HR_g2v_list = []

 from plotly.colors import sample_colorscale
 from gradio import update
 import re
+from utils.interp_space_utils import compute_clusters_style_representation_3, compute_clusters_g2v_representation, compute_precomputed_regions
 from utils.llm_feat_utils import split_features
 from utils.gram2vec_feat_utils import get_shorthand, get_fullform
 from gram2vec.feature_locator import find_feature_spans
 # Function to process G2V features and create display choices
 def format_g2v_features_for_display(g2v_features_with_scores):
     """
+    Convert G2V features with z-scores into display format for Gradio radio buttons.
     Args:
         g2v_features_with_scores: List of tuples like:
+            [('None', None), ('Feature Name', z_score), ...]
     Returns:
         tuple: (display_choices, original_values)
     for item in g2v_features_with_scores:
         if len(item) == 2:
+            feature_name, z_score = item
             # Handle None case
+            if feature_name == "None" or z_score is None:
                 display_choices.append("None")
                 original_values.append("None")
             else:
                 # Convert numpy float to regular float if needed
+                if hasattr(z_score, 'item'):
+                    z_score = float(z_score.item())
                 else:
+                    z_score = float(z_score)
+                # Create display string with z-score
+                display_string = f"{feature_name} | [Z={z_score:.2f}]"
                 display_choices.append(display_string)
                 original_values.append(feature_name)
         else:
     print(f"Task authors: {len(task_authors_df)}, Clustered authors: {len(clustered_authors_df)}")
     merged_authors_df = pd.concat([task_authors_df, clustered_authors_df])
     print(f"Merged authors DataFrame:\n{len(merged_authors_df)}")
+    #style_analysis_response = {'features': [], 'spans': []}
+    style_analysis_response = compute_clusters_style_representation_3(
+        background_corpus_df=merged_authors_df,
+        cluster_ids=visible_authors,
+        cluster_label_clm_name='authorID',
+    )
     llm_feats = ['None'] + style_analysis_response['features']
     merged_authors_df = pd.concat([task_authors_df, clustered_authors_df])
     g2v_feats = compute_clusters_g2v_representation(
         background_corpus_df=merged_authors_df,
         author_ids=visible_authors,
         features_clm_name='g2v_vector'
     )
     # ── Span-existence filter on task authors in the zoom ───────────────────
     # Keep only features that have at least one detected span in any of the
     # visible task authors' texts
     task_texts = [_to_text(x) for x in visible_task_authors['fullText'].tolist()]
+    filtered_g2v_feats = []
+    for feat in g2v_feats:
+        try:
+            # `feat` is shorthand already (e.g., 'pos_bigrams:NOUN PROPN')
+            if any(find_feature_spans(txt, feat[0]) for txt in task_texts):
+                filtered_g2v_feats.append(feat)
+            else:
+                print(f"[INFO] Dropping G2V feature with no spans in task texts: {feat}")
+        except Exception as e:
+            print(f"[WARN] Error while checking spans for {feat}: {e}")
     # Convert to human readable for display
     HR_g2v_list = []