Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

Milad Alshomary commited on Aug 19

Commit

ac7facf

1 Parent(s): e392716

updates

Browse files

Files changed (7) hide show

app.py +1 -1
config/config.yaml +3 -3
utils/gram2vec_feat_utils.py +6 -6
utils/interp_space_utils.py +68 -18
utils/llm_feat_utils.py +1 -0
utils/ui.py +5 -4
utils/visualizations.py +1 -1

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def app(share=False, use_cluster_feats=False):
     instances, instance_ids = get_instances(cfg['instances_to_explain_path'])
     interp      = load_interp_space(cfg)
-    clustered_authors_df = interp['clustered_authors_df'][:1000]
     clustered_authors_df['fullText'] = clustered_authors_df['fullText'].map(lambda l: l[:3]) # Take at most 3 texts per author
     with gr.Blocks(title="Author Attribution Explainability Tool") as demo:

     instances, instance_ids = get_instances(cfg['instances_to_explain_path'])
     interp      = load_interp_space(cfg)
+    clustered_authors_df = interp['clustered_authors_df'][:500]
     clustered_authors_df['fullText'] = clustered_authors_df['fullText'].map(lambda l: l[:3]) # Take at most 3 texts per author
     with gr.Blocks(title="Author Attribution Explainability Tool") as demo:

config/config.yaml CHANGED Viewed

@@ -1,8 +1,8 @@
 # config.yaml
 instances_to_explain_path: "./datasets/hrs_explanations.json"
-instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/raw/main/hrs_explanations.json?download"
-interp_space_path:    "./datasets/luar_interp_space_cluster_19/"
-interp_space_url:    "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/luar_interp_space_cluster.zip?download=true"
 gram2vec_feats_path:      "./datasets/gram2vec_feats.csv"
 gram2vec_feats_url:      "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"

 # config.yaml
 instances_to_explain_path: "./datasets/hrs_explanations.json"
+instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_18_balanced.json?/download=true"
+interp_space_path:    "./datasets/luar_interp_space_cluster_18/"
+interp_space_url:    "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/luar_interp_space_cluster_18.zip?download=true"
 gram2vec_feats_path:      "./datasets/gram2vec_feats.csv"
 gram2vec_feats_url:      "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"

utils/gram2vec_feat_utils.py CHANGED Viewed

@@ -126,7 +126,7 @@ def highlight_both_spans(text, llm_spans, gram_spans):
 def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
-                            llm_style_feats_analysis, background_authors_embeddings_df, task_authors_embeddings_df, visible_authors, predicted_author=None, ground_truth_author=None, max_num_authors=7):
     """
     For mystery + 3 candidates:
      1. get llm spans via your existing cache+API
@@ -226,15 +226,15 @@ def get_label(label: str, predicted_author=None, ground_truth_author=None, bg_id
             id = label.split("_")[0][-1] # Get the last character of the first part (a0, a1, a2)
         if predicted_author is not None and ground_truth_author is not None:
             if int(id) == predicted_author and int(id) == ground_truth_author:
-                return f"Candidate {int(id)+1} (Predicted & Ground Truth)"
             elif int(id) == predicted_author:
-                return f"Candidate {int(id)+1} (Predicted)"
             elif int(id) == ground_truth_author:
-                return f"Candidate {int(id)+1} (Ground Truth)"
             else:
-                return f"Candidate {int(id)+1}"
         else:
-            return f"Candidate {int(id)+1}"
     else:
         return f"Background Author {bg_id+1}"

 def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
+                            llm_style_feats_analysis, background_authors_embeddings_df, task_authors_embeddings_df, visible_authors, predicted_author=None, ground_truth_author=None, max_num_authors=4):
     """
     For mystery + 3 candidates:
      1. get llm spans via your existing cache+API
             id = label.split("_")[0][-1] # Get the last character of the first part (a0, a1, a2)
         if predicted_author is not None and ground_truth_author is not None:
             if int(id) == predicted_author and int(id) == ground_truth_author:
+                return f"Candidate {int(id)} (Predicted & Ground Truth)"
             elif int(id) == predicted_author:
+                return f"Candidate {int(id)} (Predicted)"
             elif int(id) == ground_truth_author:
+                return f"Candidate {int(id)} (Ground Truth)"
             else:
+                return f"Candidate {int(id)}"
         else:
+            return f"Candidate {int(id)}"
     else:
         return f"Background Author {bg_id+1}"

utils/interp_space_utils.py CHANGED Viewed

@@ -126,9 +126,9 @@ def instance_to_df(instance, predicted_author=None, ground_truth_author=None):
     #create a dataframe of the task authors
     task_authos_df  = pd.DataFrame([
         {'authorID': 'Mystery author', 'fullText': instance['Q_fullText'], 'predicted': None, 'ground_truth': None},
-        {'authorID': 'Candidate Author 1', 'fullText': instance['a0_fullText'], 'predicted': predicted_author == 0, 'ground_truth': ground_truth_author == 0},
-        {'authorID': 'Candidate Author 2', 'fullText': instance['a1_fullText'], 'predicted': predicted_author == 1, 'ground_truth': ground_truth_author == 1},
-        {'authorID': 'Candidate Author 3', 'fullText': instance['a2_fullText'], 'predicted': predicted_author == 2, 'ground_truth': ground_truth_author == 2}
     ])
@@ -479,7 +479,7 @@ def compute_clusters_style_representation_3(
     background_corpus_df: pd.DataFrame,
     cluster_ids: List[Any],
     cluster_label_clm_name: str = 'authorID',
-    max_num_feats: int = 5,
     max_num_documents_per_author=3,
     max_num_authors=5
     ):
@@ -494,35 +494,46 @@ def compute_clusters_style_representation_3(
     author_names = background_corpus_df_feat_id[cluster_label_clm_name].tolist()[:max_num_authors]
     print(f"Number of authors: {len(background_corpus_df_feat_id)}")
     print(author_names)
-    print(author_texts)
-    print(f"Number of authors: {len(author_names)}")
-    print(f"Number of authors: {len(author_texts)}")
     features = identify_style_features(author_texts, max_num_feats=max_num_feats)
     # STEP 2: Prepare author pool for span extraction
-    span_df = background_corpus_df.iloc[:7]
-    author_names = span_df[cluster_label_clm_name].tolist()[:7]
     print(f"Number of authors for span detection : {len(span_df)}")
     print(author_names)
     spans_by_author = extract_all_spans(span_df, features, cluster_label_clm_name)
     return {
         "features": features,
         "spans": spans_by_author
     }
 def compute_clusters_g2v_representation(
     background_corpus_df: pd.DataFrame,
     author_ids: List[Any],
     other_author_ids: List[Any],
     features_clm_name: str,
-    top_n: int = 10
 ) -> List[str]:
-    # Get boolean mask for documents in selected clusters
     selected_mask = background_corpus_df['authorID'].isin(author_ids).to_numpy()
     if not selected_mask.any():
@@ -530,8 +541,33 @@ def compute_clusters_g2v_representation(
     selected_feats = background_corpus_df[selected_mask][features_clm_name].tolist()
     all_g2v_feats  = list(selected_feats[0].keys())
-    all_g2v_values = np.array([list(x.values()) for x in selected_feats]).mean(axis=0)
     other_selected_feats = background_corpus_df[~selected_mask][features_clm_name].tolist()
     all_g2v_other_feats  = list(other_selected_feats[0].keys())
@@ -541,10 +577,24 @@ def compute_clusters_g2v_representation(
     top_g2v_feats = sorted(list(zip(all_g2v_feats, final_g2v_feats_values)), key=lambda x: -x[1])
-    print(top_g2v_feats[:top_n])
-    return [x[0] for x in top_g2v_feats[:top_n]]
 def generate_interpretable_space_representation(interp_space_path, styles_df_path, feat_clm, output_clm, num_feats=5):

     #create a dataframe of the task authors
     task_authos_df  = pd.DataFrame([
         {'authorID': 'Mystery author', 'fullText': instance['Q_fullText'], 'predicted': None, 'ground_truth': None},
+        {'authorID': 'Candidate Author 1', 'fullText': instance['a0_fullText'], 'predicted': int(predicted_author) == 0, 'ground_truth': int(ground_truth_author) == 0},
+        {'authorID': 'Candidate Author 2', 'fullText': instance['a1_fullText'], 'predicted': int(predicted_author) == 1, 'ground_truth': int(ground_truth_author) == 1},
+        {'authorID': 'Candidate Author 3', 'fullText': instance['a2_fullText'], 'predicted': int(predicted_author) == 2, 'ground_truth': int(ground_truth_author) == 2}
     ])
     background_corpus_df: pd.DataFrame,
     cluster_ids: List[Any],
     cluster_label_clm_name: str = 'authorID',
+    max_num_feats: int = 10,
     max_num_documents_per_author=3,
     max_num_authors=5
     ):
     author_names = background_corpus_df_feat_id[cluster_label_clm_name].tolist()[:max_num_authors]
     print(f"Number of authors: {len(background_corpus_df_feat_id)}")
     print(author_names)
     features = identify_style_features(author_texts, max_num_feats=max_num_feats)
     # STEP 2: Prepare author pool for span extraction
+    span_df = background_corpus_df.iloc[:4]
+    author_names = span_df[cluster_label_clm_name].tolist()[:4]
     print(f"Number of authors for span detection : {len(span_df)}")
     print(author_names)
     spans_by_author = extract_all_spans(span_df, features, cluster_label_clm_name)
+    # Filter out features that are not present in any of the authors
+    filtered_spans_by_author = {x[0] : x[1] for x in spans_by_author.items() if x[0] in {'Mystery author', 'Candidate Author 1', 'Candidate Author 2', 'Candidate Author 3'}.intersection(set(cluster_ids))}
+    print('Filtering in features for only the following authors: ', filtered_spans_by_author.keys())
+    filtered_features = []
+    for feature in features:
+        found_in_any_author = False
+        for author_name, author_spans in filtered_spans_by_author.items():
+            if feature in author_spans:
+                found_in_any_author = True
+                break
+        if found_in_any_author:
+            filtered_features.append(feature)
+    features = filtered_features
     return {
         "features": features,
         "spans": spans_by_author
     }
 def compute_clusters_g2v_representation(
     background_corpus_df: pd.DataFrame,
     author_ids: List[Any],
     other_author_ids: List[Any],
     features_clm_name: str,
+    top_n: int = 10,
+    mode: str = "sharedness",
+    sharedness_method: str = "mean_minus_alpha_std",
+    alpha: float = 0.5
 ) -> List[str]:
     selected_mask = background_corpus_df['authorID'].isin(author_ids).to_numpy()
     if not selected_mask.any():
     selected_feats = background_corpus_df[selected_mask][features_clm_name].tolist()
     all_g2v_feats  = list(selected_feats[0].keys())
+    # If the user requested a sharedness-based score, compute it and return top-N.
+    if mode == "sharedness":
+        selected_matrix = np.array([list(x.values()) for x in selected_feats], dtype=float)
+        if sharedness_method == "mean":
+            scores = selected_matrix.mean(axis=0)
+        elif sharedness_method in ("mean_minus_alpha_std", "mean-std", "mean_minus_std"):
+            means = selected_matrix.mean(axis=0)
+            stds  = selected_matrix.std(axis=0)
+            scores = means - float(alpha) * stds
+        elif sharedness_method == "min":
+            scores = selected_matrix.min(axis=0)
+        else:
+            # Default fallback to mean-minus-alpha*std if unknown method
+            means = selected_matrix.mean(axis=0)
+            stds  = selected_matrix.std(axis=0)
+            scores = means - float(alpha) * stds
+        # Rank and return
+        feature_scores = [(feat, score) for feat, score in zip(all_g2v_feats, scores) if score > 0]
+        feature_scores.sort(key=lambda x: x[1], reverse=True)
+        return [feat for feat, _ in feature_scores[:top_n]]
+    # Contrastive mode (default): compute target mean and subtract contrast mean
+    all_g2v_values = np.array([list(x.values()) for x in selected_feats]).mean(axis=0)
     other_selected_feats = background_corpus_df[~selected_mask][features_clm_name].tolist()
     all_g2v_other_feats  = list(other_selected_feats[0].keys())
     top_g2v_feats = sorted(list(zip(all_g2v_feats, final_g2v_feats_values)), key=lambda x: -x[1])
+    # Filter out features that are not present in any of the authors
+    selected_authors = {'Mystery author', 'Candidate Author 1', 'Candidate Author 2', 'Candidate Author 3'}.intersection(set(author_ids))
+    print('Filtering in g2v features for only the following authors: ', selected_authors)
+    authors_g2v_feats = background_corpus_df[background_corpus_df['authorID'].isin(selected_authors)][features_clm_name].tolist()
+    filtered_features = []
+    for feature, score in top_g2v_feats:
+        found_in_any_author = False
+        for author_g2v_feats in authors_g2v_feats:
+            if author_g2v_feats[feature] > 0:
+                found_in_any_author = True
+                break
+        if found_in_any_author:
+            filtered_features.append(feature)
+    print('Filtered G2V features: ', filtered_features)
+    return filtered_features[:top_n]
 def generate_interpretable_space_representation(interp_space_path, styles_df_path, feat_clm, output_clm, num_feats=5):

utils/llm_feat_utils.py CHANGED Viewed

@@ -90,6 +90,7 @@ def generate_feature_spans_cached(client, text: str, features: list[str], role:
     os.makedirs(CACHE_DIR, exist_ok=True)
     cache_path = os.path.join(CACHE_DIR, f"{role}.json")
     if os.path.exists(cache_path):
         with open(cache_path) as f:
             cache: dict[str, dict] = json.load(f)
     else:

     os.makedirs(CACHE_DIR, exist_ok=True)
     cache_path = os.path.join(CACHE_DIR, f"{role}.json")
     if os.path.exists(cache_path):
+        print(f"Cache hit....")
         with open(cache_path) as f:
             cache: dict[str, dict] = json.load(f)
     else:

utils/ui.py CHANGED Viewed

@@ -100,7 +100,7 @@ def update_task_display(mode, iid, instances, background_df, mystery_file, cand1
         candidate_texts = [c1_txt, c2_txt, c3_txt]
         #create a dataframe of the task authors
-        task_authors_df  = instance_to_df(instances[iid])
         print(f"\n\n\n ----> Loaded task {iid} with {len(task_authors_df)} authors\n\n\n")
         print(task_authors_df)
     else:
@@ -139,9 +139,10 @@ def update_task_display(mode, iid, instances, background_df, mystery_file, cand1
     print(background_df.columns)
-    # Computing predicted author by checking pairwise cosine similarity over luar embeddings
-    col_name = f'{model_name.split("/")[-1]}_style_embedding'
-    predicted_author = compute_predicted_author(task_authors_df, col_name)
     #generating html for the task
     header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)

         candidate_texts = [c1_txt, c2_txt, c3_txt]
         #create a dataframe of the task authors
+        task_authors_df  = instance_to_df(instances[iid], predicted_author=predicted_author, ground_truth_author=ground_truth_author)
         print(f"\n\n\n ----> Loaded task {iid} with {len(task_authors_df)} authors\n\n\n")
         print(task_authors_df)
     else:
     print(background_df.columns)
+    if mode != "Predefined HRS Task":
+        # Computing predicted author by checking pairwise cosine similarity over luar embeddings
+        col_name = f'{model_name.split("/")[-1]}_style_embedding'
+        predicted_author = compute_predicted_author(task_authors_df, col_name)
     #generating html for the task
     header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)

utils/visualizations.py CHANGED Viewed

@@ -290,7 +290,7 @@ def handle_zoom_with_retries(event_json, bg_proj, bg_lbls, clustered_authors_df,
     for attempt in range(3):
         try:
-            return handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors_df)
         except Exception as e:
             print(f"[ERROR] Attempt {attempt + 1} failed: {e}")
             if attempt < 2:

     for attempt in range(3):
         try:
+            handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors_df)
         except Exception as e:
             print(f"[ERROR] Attempt {attempt + 1} failed: {e}")
             if attempt < 2: