Spaces:

ExplainabiliyForAATeam
/

explainability-tool-for-aa

Running

Milad Alshomary commited on Sep 2

Commit

d1e7150

1 Parent(s): 23e934a

updates

Files changed (4) hide show

config/config.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 # config.yaml
-instances_to_explain_path: "./datasets/hrs_explanations_luar_clusters_2_35_balanced.json"
-instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_2_35_balanced.json?download=true"
 interp_space_path:    "./datasets/sentence_luar_interp_space_2_35/"
 interp_space_url:    "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
 gram2vec_feats_path:      "./datasets/gram2vec_feats.csv"
@@ -10,5 +10,5 @@ style_feat_clm:       "llm_tfidf_weights"
 top_k:                10
 only_llm_feats:       false
 only_gram2vec_feats:  false
-max_num_docs_per_authors: 1
-max_num_bg_authors: 1000

 # config.yaml
+instances_to_explain_path: "./datasets/hrs_explanations_luar_clusters_18_balanced.json"
+instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_18_balanced.json?download=true"
 interp_space_path:    "./datasets/sentence_luar_interp_space_2_35/"
 interp_space_url:    "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
 gram2vec_feats_path:      "./datasets/gram2vec_feats.csv"
 top_k:                10
 only_llm_feats:       false
 only_gram2vec_feats:  false
+max_num_docs_per_authors: 3
+max_num_bg_authors: 500

utils/interp_space_utils.py CHANGED Viewed

@@ -271,7 +271,7 @@ def cached_generate_style_embedding(background_corpus_df: pd.DataFrame,
     else:
         # Otherwise, compute, cache, and return
         print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
-        task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=True)
         # Create a clean column name from the model name
         col_name = f'{model_name.split("/")[-1]}_style_embedding'
         background_corpus_df[col_name] = task_and_background_embeddings

     else:
         # Otherwise, compute, cache, and return
         print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
+        task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=False)
         # Create a clean column name from the model name
         col_name = f'{model_name.split("/")[-1]}_style_embedding'
         background_corpus_df[col_name] = task_and_background_embeddings

utils/ui.py CHANGED Viewed

@@ -136,10 +136,10 @@ def update_task_display(mode, iid, instances, background_df, mystery_file, cand1
     task_authors_df['g2v_vector'] = task_authors_g2v
     print(f"Gram2Vec feature generation complete")
-    #if mode != "Predefined HRS Task":
-    # Computing predicted author by checking pairwise cosine similarity over luar embeddings
-    col_name = f'{model_name.split("/")[-1]}_style_embedding'
-    predicted_author = compute_predicted_author(task_authors_df, col_name)
     #generating html for the task
     header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)

     task_authors_df['g2v_vector'] = task_authors_g2v
     print(f"Gram2Vec feature generation complete")
+    if mode != "Predefined HRS Task":
+        # Computing predicted author by checking pairwise cosine similarity over luar embeddings
+        col_name = f'{model_name.split("/")[-1]}_style_embedding'
+        predicted_author = compute_predicted_author(task_authors_df, col_name)
     #generating html for the task
     header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)

utils/visualizations.py CHANGED Viewed

@@ -132,7 +132,7 @@ def compute_tsne_with_cache(embeddings: np.ndarray, cache_path: str = 'datasets/
     else:
         print("Computing t-SNE")
         tsne_result = TSNE(n_components=2, learning_rate='auto',
-                          init='random', perplexity=10, random_state=42,metric='cosine').fit_transform(embeddings)
         #tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
         cache[hash_key] = tsne_result

     else:
         print("Computing t-SNE")
         tsne_result = TSNE(n_components=2, learning_rate='auto',
+                          init='random', perplexity=10, random_state=42, metric='cosine').fit_transform(embeddings)
         #tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
         cache[hash_key] = tsne_result