Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

App Files Files Community

Milad Alshomary commited on Oct 30, 2025

Commit

a5e49c0

1 Parent(s): dcbbcbd

updates

Browse files

Files changed (6) hide show

README.md +20 -0
app.py +1 -1
baseline_static_explanations.py +196 -0
prepare_data.py +3 -0
utils/clustering_utils.py +4 -0
utils/interp_space_utils.py +116 -33

README.md CHANGED Viewed

@@ -13,3 +13,23 @@ short_description: Interpreting the latent space of Authorship Attribution
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+## Useful commands
+### Prepare data training/test
+### Clustering the background corpus
+python cluster_corpus.py ../../iarpa-hiatus/explanation_tool_files/reddit_cluster_training.pkl ../../iarpa-hiatus/explanation_tool_files/reddit_cluster_test.pkl "AnnaWegmann/Style-Embedding" ./datasets/reddit_clustered_authors.pkl --min_samples 2 --metric cosine --pca_dimensions 100 --eps 0.04
+### Generate explainability sample
+python prepare_data.py ../explanation_tool_files/reddit_cluster_test.pkl ./datasets/reddit_explanation_sample.json
+### Generate static explanations for a sample
+python baseline_static_explanations.py generate_explanations ./datasets/reddit_explanation_sample.json ./datasets/reddit_explanation_sample_with_explanations.json --interp_space_path ./datasets/reddit_interp_space.json --model_name 'AnnaWegmann/Style-Embedding'

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ from utils.interp_space_utils import *
 from utils.ui import *
 load_dotenv()
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 # ── load once at startup ────────────────────────────────────────

 from utils.ui import *
 load_dotenv()
+client = OpenAI(base_url=os.getenv("OPENAI_API_BASE"), api_key=os.getenv("OPENAI_API_KEY"))
 # ── load once at startup ────────────────────────────────────────

baseline_static_explanations.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import argparse
+import pandas as pd
+import numpy as np
+import os, json
+from utils.interp_space_utils import cached_generate_style_embedding
+from utils.clustering_utils import clustering_author
+from utils.interp_space_utils import compute_clusters_style_representation_3, summarize_style_features_to_paragraph, find_closest_cluster_style
+from sklearn.metrics.pairwise import cosine_distances, cosine_similarity
+def build_static_interp_space(cluster_df):
+    """
+    Takes a dataframe with cluster_label indicates every author's cluster and return a
+    json file with key the cluster_label and value containing the style-embedding representation and the style description
+    Example cluster_df
+                                                 fullText         authorID                    Style-Embedding_style_embedding  cluster_label
+        4   [I've play them all (D3, Torchlight 1&amp;2, P...         HaxRyter  [0.7126333904811682, -0.5076461933032986, -0.1...              0
+        10  [Back in Texas.  Buddy had a kid in an up and ...  OaklandHellBent  [0.11238726238181786, 0.9263576185812101, -0.2...              1
+    """
+    # Find the embedding column (assuming it's the only one ending with '_style_embedding')
+    embedding_clm = next((col for col in cluster_df.columns if col.endswith('_style_embedding')), None)
+    if not embedding_clm:
+        raise ValueError("No style embedding column found in the DataFrame.")
+    print(f"Using embedding column: {embedding_clm}")
+    # Group by cluster label and calculate the average embedding for each cluster
+    # We also aggregate authorIDs to use them for style representation
+    cluster_groups = cluster_df.groupby('cluster_label').agg({
+        embedding_clm: lambda embs: np.mean(np.vstack(embs), axis=0).tolist(),
+        'authorID': list
+    }).reset_index()
+    interpretable_space = {}
+    for _, row in cluster_groups.iterrows():
+        cluster_label = row['cluster_label']
+        avg_embedding = row[embedding_clm]
+        author_ids_in_cluster = row['authorID']
+        print(f"\nProcessing cluster {cluster_label} with {len(author_ids_in_cluster)} authors...")
+        # Generate style description using an LLM
+        # We reuse the utility function from the interactive tool for consistency
+        style_analysis = compute_clusters_style_representation_3(
+            background_corpus_df=cluster_df,
+            cluster_ids=author_ids_in_cluster,
+            cluster_label_clm_name='authorID',
+            max_num_feats=5, # Requesting 5 top features
+            max_num_authors=20, # Use up to 20 authors from the cluster for analysis
+            return_only_feats=True
+        )
+        # When return_only_feats=True, style_analysis is a list of features
+        style_features_list = style_analysis
+        print(f"  Generated style features: {style_features_list}")
+        # Summarize the list of features into a coherent paragraph
+        style_paragraph = summarize_style_features_to_paragraph(style_features_list)
+        print(f"  Summarized paragraph: {style_paragraph}")
+        # JSON cannot serialize numpy integers, so convert cluster_label
+        interpretable_space[int(cluster_label)] = (avg_embedding, style_paragraph)
+    return interpretable_space
+def generate_explanations(args):
+    input_file = args.input_file
+    interp_space_path = args.interp_space_path
+    output_file = args.output_file
+    model_name  = args.model_name if args.model_name else 'AnnaWegmann/Style-Embedding'
+    instances_for_ex = json.load(open(input_file))
+    interp_space = json.load(open(interp_space_path))
+    output = []
+    for instance in instances_for_ex:
+        json_obj = {}
+        json_obj['Q_authorID'] = instance['Q_authorID']
+        json_obj['Q_fullText'] = instance['Q_fullText']
+        style_descirption, q_embeddings = find_closest_cluster_style(instance['Q_fullText'], interp_space, model_name=model_name)
+        json_obj['Q_top_style_feats'] = style_descirption
+        json_obj['a0_authorID'] = instance['a0_authorID']
+        json_obj['a0_fullText'] = instance['a0_fullText']
+        style_descirption, a0_embeddings = find_closest_cluster_style(instance['a0_fullText'], interp_space, model_name=model_name)
+        json_obj['a0_top_style_feats'] = style_descirption
+        json_obj['a1_authorID'] = instance['a1_authorID']
+        json_obj['a1_fullText'] = instance['a1_fullText']
+        style_descirption, a1_embeddings = find_closest_cluster_style(instance['a1_fullText'], interp_space, model_name=model_name)
+        json_obj['a1_top_style_feats'] = style_descirption
+        json_obj['a2_authorID'] = instance['a2_authorID']
+        json_obj['a2_fullText'] = instance['a2_fullText']
+        style_descirption, a2_embeddings = find_closest_cluster_style(instance['a2_fullText'], interp_space, model_name=model_name)
+        json_obj['a2_top_style_feats'] = style_descirption
+        # Compute pairwise similarity between q_embeddings and all a_embeddings
+        # Ensure embeddings are 2D arrays for cosine_similarity
+        q_emb_2d  = np.array(q_embeddings).reshape(1, -1)
+        a0_emb_2d = np.array(a0_embeddings).reshape(1, -1)
+        a1_emb_2d = np.array(a1_embeddings).reshape(1, -1)
+        a2_emb_2d = np.array(a2_embeddings).reshape(1, -1)
+        similarity_q_a0 = cosine_similarity(q_emb_2d, a0_emb_2d)[0][0]
+        similarity_q_a1 = cosine_similarity(q_emb_2d, a1_emb_2d)[0][0]
+        similarity_q_a2 = cosine_similarity(q_emb_2d, a2_emb_2d)[0][0]
+        ranked_candidates = [
+            {'authorID': instance['a0_authorID'], 'similarity': float(similarity_q_a0)},
+            {'authorID': instance['a1_authorID'], 'similarity': float(similarity_q_a1)},
+            {'authorID': instance['a2_authorID'], 'similarity': float(similarity_q_a2)},
+        ]
+        json_obj['latent_rank'] = np.argsort([x['similarity'] for x in ranked_candidates]).tolist()
+        json_obj['model_pred'] = 'Candidate {}'.format(json_obj['latent_rank'][0] + 1)
+        output.append(json_obj)
+    json.dump(output, open(output_file, 'w'), indent=4)
+def main():
+    """
+    Main function to generate and save the static interpretable space.
+    """
+    parser = argparse.ArgumentParser(
+        description="Build a static interpretable space from clustered author data."
+    )
+    parser.add_argument(
+        "task",
+        type=str,
+        help="task: one of the following: build_static_interp_space, generate_explanations",
+        choices=["build_static_interp_space", "generate_explanations"]
+    )
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the input clustered DataFrame (.pkl file)."
+    )
+    parser.add_argument(
+        "output_file",
+        type=str,
+        help="file to save the output"
+    )
+    parser.add_argument(
+        "--interp_space_path",
+        type=str,
+        help="Path to the input interpretable space(.pkl file)."
+    )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        help="style analysis model name"
+    )
+    args = parser.parse_args()
+    if args.task == "build_static_interp_space":
+        return build_and_save_static_interp_space(args)
+    elif args.task == "generate_explanations":
+        return generate_explanations(args)
+    else:
+        raise ValueError(f"Unknown task: {args.task}")
+def build_and_save_static_interp_space(args):
+    print(f"Loading clustered data from {args.input_file}...")
+    clustered_df = pd.read_pickle(args.input_file)
+    interpretable_space = build_static_interp_space(clustered_df)
+    print(f"\nSaving interpretable space to {args.output_file}...")
+    with open(args.output_file, 'w') as f:
+        json.dump(interpretable_space, f, indent=4)
+    print("Done.")
+if __name__ == "__main__":
+    main()

prepare_data.py CHANGED Viewed

@@ -44,6 +44,9 @@ def sample_ds(input_file, output_file, num_insts=10000, min_num_text_per_inst=0,
     df = pd.DataFrame(out_list)
     df.to_pickle(output_file)
 def get_reddit_data(input_path, random_seed=123, num_instances=100, num_documents_per_author=8, min_instance_len=10):
     df = pd.read_pickle(open(input_path, 'rb'))

     df = pd.DataFrame(out_list)
     df.to_pickle(output_file)
+    df = df.explode('fullText').reset_index()
+    df.to_json(output_file.replace('.pkl', '.json'))
 def get_reddit_data(input_path, random_seed=123, num_instances=100, num_documents_per_author=8, min_instance_len=10):
     df = pd.read_pickle(open(input_path, 'rb'))

utils/clustering_utils.py CHANGED Viewed

@@ -128,6 +128,7 @@ def clustering_author(background_corpus_df: pd.DataFrame,
         return background_corpus_df
     X = np.array(X_list) # Creates a 2D array from the list of 1D arrays
     if X.shape[0] == 1:
         print("Only one valid embedding found. Assigning cluster label 0 to it.")
@@ -279,6 +280,9 @@ def clustering_author(background_corpus_df: pd.DataFrame,
         print("No suitable DBSCAN clustering found meeting criteria. All processed embeddings marked as noise (-1).")
     background_corpus_df['cluster_label'] = final_labels_for_df
     return background_corpus_df

         return background_corpus_df
     X = np.array(X_list) # Creates a 2D array from the list of 1D arrays
+    original_embeddings_list = [embeddings_list[i] for i in original_indices]
     if X.shape[0] == 1:
         print("Only one valid embedding found. Assigning cluster label 0 to it.")
         print("No suitable DBSCAN clustering found meeting criteria. All processed embeddings marked as noise (-1).")
     background_corpus_df['cluster_label'] = final_labels_for_df
+    # restore the original embedding
+    print(original_embeddings_list[0].shape)
+    background_corpus_df[embedding_clm] = original_embeddings_list
     return background_corpus_df

utils/interp_space_utils.py CHANGED Viewed

@@ -25,6 +25,7 @@ from sklearn.decomposition import PCA
 CACHE_DIR = "datasets/embeddings_cache"
 ZOOM_CACHE = "datasets/zoom_cache/features_cache.json"
 REGION_CACHE = "datasets/region_cache/regions_cache.pkl"
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.makedirs(os.path.dirname(ZOOM_CACHE), exist_ok=True)
 os.makedirs(os.path.dirname(REGION_CACHE), exist_ok=True)
@@ -41,6 +42,9 @@ class FeatureIdentificationSchema(BaseModel):
 class SpanExtractionSchema(BaseModel):
     spans: dict[str, dict[str, list[str]]]  # {author_name: {feature: [spans]}}
 def compute_g2v_features(clustered_authors_df: pd.DataFrame, task_authors_df: pd.DataFrame=None, text_clm='fullText') -> pd.DataFrame:
@@ -398,7 +402,7 @@ def compute_clusters_style_representation_2(
     """
     Call openAI to analyze the common writing style features of the given list of texts
     """
-    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
     background_corpus_df = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
@@ -430,7 +434,7 @@ def compute_clusters_style_representation_2(
     else: # Else compute and cache
         response = client.chat.completions.create(
-            model="gpt-4o-mini",
             messages=[
                 {"role":"assistant","content":"You are a forensic linguistic who knows how to analyze similarites in writing styles."},
                 {"role":"user","content":prompt}],
@@ -472,7 +476,7 @@ def identify_style_features(author_texts: list[str], author_names: list[str], ma
         else:
             print(f"Cache miss. Computing features for authors: {author_names}")
-    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     prompt = f"""Identify {max_num_feats} writing style features that are common between the authors texts.
     Author Texts:
@@ -530,7 +534,7 @@ def extract_all_spans(authors_df: pd.DataFrame, features: list[str], cluster_lab
     For each author, use `generate_feature_spans_cached` to get feature->span mappings.
     Returns a dict: {author_name: {feature: [spans]}}
     """
-    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     spans_by_author = {}
@@ -552,7 +556,8 @@ def compute_clusters_style_representation_3(
     max_num_documents_per_author=10,
     max_num_authors=10,
     max_authors_for_span_extraction=4,
-    top_k: int = 10
     ):
     print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
@@ -567,6 +572,9 @@ def compute_clusters_style_representation_3(
     print(author_names)
     features = identify_style_features(author_texts, author_names, max_num_feats=max_num_feats)
     print("Features: ", features)
     # STEP 2: Prepare author pool for span extraction
     span_df = background_corpus_df.iloc[:max_authors_for_span_extraction]
@@ -577,34 +585,6 @@ def compute_clusters_style_representation_3(
     # Filter-in only task authors that are part of the current selection
     task_author_names = {'Mystery author', 'Candidate Author 1', 'Candidate Author 2', 'Candidate Author 3'}
-    #filtered_task_authors = {author: feat_map for author, feat_map in spans_by_author.items() if author in task_author_names.intersection(set(cluster_ids))}
-    # Build per-author sets of features that have at least one span
-    # author_present_feature_sets = [
-    #     {feature for feature, spans in feature_map.items() if spans and len(spans) > 0}
-    #     for _, feature_map in filtered_task_authors.items()
-    # ]
-    # print(filtered_task_authors.keys(), author_present_feature_sets)
-    # if len(author_present_feature_sets) > 0: # we have more than one task author
-    #     coverage_counter = Counter()
-    #     for present_set in author_present_feature_sets:
-    #         coverage_counter.update(present_set)
-    #     # Keep features present in at least `min_authors_required` authors
-    #     eligible_features = [feat for feat, cnt in coverage_counter.items() if cnt >= len(author_present_feature_sets)]
-    #     # Preserve original LLM feature ordering as a secondary key where possible
-    #     feature_original_index = {feat: idx for idx, feat in enumerate(features)} if features else {}
-    #     selected_features_ranked = sorted(
-    #         eligible_features,
-    #         key=lambda f: (-coverage_counter[f], feature_original_index.get(f, 10**9))
-    #     )[:int(top_k)]
-    # else:
-    #     selected_features_ranked = features
     feature_importance = {f : 0 for f in features}
@@ -627,6 +607,109 @@ def compute_clusters_style_representation_3(
         "spans": spans_by_author
     }
 def compute_clusters_g2v_representation(
     background_corpus_df: pd.DataFrame,
     author_ids: List[Any],

 CACHE_DIR = "datasets/embeddings_cache"
 ZOOM_CACHE = "datasets/zoom_cache/features_cache.json"
 REGION_CACHE = "datasets/region_cache/regions_cache.pkl"
+SUMMARY_CACHE = "datasets/summary_cache/summaries.json"
 os.makedirs(CACHE_DIR, exist_ok=True)
 os.makedirs(os.path.dirname(ZOOM_CACHE), exist_ok=True)
 os.makedirs(os.path.dirname(REGION_CACHE), exist_ok=True)
 class SpanExtractionSchema(BaseModel):
     spans: dict[str, dict[str, list[str]]]  # {author_name: {feature: [spans]}}
+class StyleSummarySchema(BaseModel):
+    summary_paragraph: str
 def compute_g2v_features(clustered_authors_df: pd.DataFrame, task_authors_df: pd.DataFrame=None, text_clm='fullText') -> pd.DataFrame:
     """
     Call openAI to analyze the common writing style features of the given list of texts
     """
+    client = OpenAI(base_url=os.getenv("OPENAI_BASE_URL", None), pi_key=os.getenv("OPENAI_API_KEY"))
     background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
     background_corpus_df = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
     else: # Else compute and cache
         response = client.chat.completions.create(
+            model="gpt-4o",
             messages=[
                 {"role":"assistant","content":"You are a forensic linguistic who knows how to analyze similarites in writing styles."},
                 {"role":"user","content":prompt}],
         else:
             print(f"Cache miss. Computing features for authors: {author_names}")
+    client = OpenAI(base_url=os.getenv("OPENAI_BASE_URL", None), api_key=os.getenv("OPENAI_API_KEY"))
     prompt = f"""Identify {max_num_feats} writing style features that are common between the authors texts.
     Author Texts:
     For each author, use `generate_feature_spans_cached` to get feature->span mappings.
     Returns a dict: {author_name: {feature: [spans]}}
     """
+    client = OpenAI(base_url=os.getenv("OPENAI_BASE_URL", None), api_key=os.getenv("OPENAI_API_KEY"))
     spans_by_author = {}
     max_num_documents_per_author=10,
     max_num_authors=10,
     max_authors_for_span_extraction=4,
+    top_k: int = 10,
+    return_only_feats= False,
     ):
     print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
     print(author_names)
     features = identify_style_features(author_texts, author_names, max_num_feats=max_num_feats)
+    if return_only_feats:
+        return features
     print("Features: ", features)
     # STEP 2: Prepare author pool for span extraction
     span_df = background_corpus_df.iloc[:max_authors_for_span_extraction]
     # Filter-in only task authors that are part of the current selection
     task_author_names = {'Mystery author', 'Candidate Author 1', 'Candidate Author 2', 'Candidate Author 3'}
     feature_importance = {f : 0 for f in features}
         "spans": spans_by_author
     }
+def summarize_style_features_to_paragraph(features: list[str]) -> str:
+    """
+    Takes a list of writing style features and uses an LLM to generate a
+    coherent, descriptive paragraph summarizing the style.
+    Args:
+        features (list[str]): A list of style features.
+    Returns:
+        str: A single paragraph summarizing the writing style.
+    """
+    if not features:
+        return "No style features were identified for this selection."
+    # Generate a cache key based on the sorted features to ensure consistency
+    feature_key = hashlib.md5(json.dumps(sorted(features)).encode()).hexdigest()
+    os.makedirs(os.path.dirname(SUMMARY_CACHE), exist_ok=True)
+    if os.path.exists(SUMMARY_CACHE):
+        with open(SUMMARY_CACHE, 'r') as f:
+            try:
+                cache = json.load(f)
+            except json.JSONDecodeError:
+                cache = {}
+    else:
+        cache = {}
+    if feature_key in cache:
+        print(f"Cache hit for style summary. Key: {feature_key}")
+        return cache[feature_key]
+    print(f"Cache miss for style summary. Generating new summary...")
+    client = OpenAI(base_url=os.getenv("OPENAI_BASE_URL", None), api_key=os.getenv("OPENAI_API_KEY"))
+    feature_list_str = "\n".join([f"- {feat}" for feat in features])
+    prompt = f"""You are a linguistic analyst. Your task is to synthesize the following list of writing style features into a single, coherent, and descriptive paragraph. The paragraph should flow naturally and explain the overall writing style of an author based on these features. Be concise and only mention the features without referring to example spans.
+Style Features:
+{feature_list_str}
+Please provide the summary as a single paragraph.
+"""
+    def _make_call():
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": prompt}],
+            response_format={"type": "json_schema", "json_schema": {"name": "StyleSummarySchema", "schema": to_strict_json_schema(StyleSummarySchema)}}
+        )
+        return json.loads(response.choices[0].message.content)
+    summary_paragraph = retry_call(_make_call, StyleSummarySchema).summary_paragraph
+    # Save to cache
+    cache[feature_key] = summary_paragraph
+    with open(SUMMARY_CACHE, 'w') as f:
+        json.dump(cache, f, indent=2)
+    return summary_paragraph
+def find_closest_cluster_style(texts: list[str], interp_space, model_name: str) -> str:
+    """
+    Computes the average embedding for a list of texts and finds the most similar
+    cluster from the interpretable space, returning its style description.
+    Args:
+        texts (list[str]): A list of texts for which to find a style description.
+        interp_space_path (str): Path to the interpretable_space.json file.
+        model_name (str): The name of the sentence transformer model to use for embeddings.
+    Returns:
+        str: The style description paragraph of the most similar cluster.
+    """
+    if not texts:
+        return "No texts provided for analysis."
+    # 2. Compute the average embedding for the input texts
+    # We create a temporary DataFrame to use the existing embedding generation utility
+    temp_df = pd.DataFrame([{'fullText': texts}])
+    input_embedding_list = generate_style_embedding(temp_df, 'fullText', model_name, dimensionality_reduction=False)
+    if not input_embedding_list:
+        return "Could not generate an embedding for the provided texts."
+    input_embedding = np.array(input_embedding_list[0]).reshape(1, -1)
+    # 3. Find the most similar cluster
+    cluster_embeddings = {int(k): np.array(v[0]) for k, v in interp_space.items()}
+    best_cluster_label = -1
+    max_similarity = -1
+    for label, cluster_emb in cluster_embeddings.items():
+        similarity = cosine_similarity(input_embedding, cluster_emb.reshape(1, -1))[0][0]
+        if similarity > max_similarity:
+            max_similarity = similarity
+            best_cluster_label = label
+    # 4. Return the style description of the closest cluster
+    return interp_space.get(str(best_cluster_label), [None, "Could not find a matching style description."])[1], input_embedding[0]
 def compute_clusters_g2v_representation(
     background_corpus_df: pd.DataFrame,
     author_ids: List[Any],