Milad Alshomary
commited on
Commit
·
d1e7150
1
Parent(s):
23e934a
updates
Browse files- config/config.yaml +4 -4
- utils/interp_space_utils.py +1 -1
- utils/ui.py +4 -4
- utils/visualizations.py +1 -1
config/config.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# config.yaml
|
| 2 |
-
instances_to_explain_path: "./datasets/
|
| 3 |
-
instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/
|
| 4 |
interp_space_path: "./datasets/sentence_luar_interp_space_2_35/"
|
| 5 |
interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
|
| 6 |
gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
|
|
@@ -10,5 +10,5 @@ style_feat_clm: "llm_tfidf_weights"
|
|
| 10 |
top_k: 10
|
| 11 |
only_llm_feats: false
|
| 12 |
only_gram2vec_feats: false
|
| 13 |
-
max_num_docs_per_authors:
|
| 14 |
-
max_num_bg_authors:
|
|
|
|
| 1 |
# config.yaml
|
| 2 |
+
instances_to_explain_path: "./datasets/hrs_explanations_luar_clusters_18_balanced.json"
|
| 3 |
+
instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_18_balanced.json?download=true"
|
| 4 |
interp_space_path: "./datasets/sentence_luar_interp_space_2_35/"
|
| 5 |
interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
|
| 6 |
gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
|
|
|
|
| 10 |
top_k: 10
|
| 11 |
only_llm_feats: false
|
| 12 |
only_gram2vec_feats: false
|
| 13 |
+
max_num_docs_per_authors: 3
|
| 14 |
+
max_num_bg_authors: 500
|
utils/interp_space_utils.py
CHANGED
|
@@ -271,7 +271,7 @@ def cached_generate_style_embedding(background_corpus_df: pd.DataFrame,
|
|
| 271 |
else:
|
| 272 |
# Otherwise, compute, cache, and return
|
| 273 |
print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
|
| 274 |
-
task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=
|
| 275 |
# Create a clean column name from the model name
|
| 276 |
col_name = f'{model_name.split("/")[-1]}_style_embedding'
|
| 277 |
background_corpus_df[col_name] = task_and_background_embeddings
|
|
|
|
| 271 |
else:
|
| 272 |
# Otherwise, compute, cache, and return
|
| 273 |
print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
|
| 274 |
+
task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=False)
|
| 275 |
# Create a clean column name from the model name
|
| 276 |
col_name = f'{model_name.split("/")[-1]}_style_embedding'
|
| 277 |
background_corpus_df[col_name] = task_and_background_embeddings
|
utils/ui.py
CHANGED
|
@@ -136,10 +136,10 @@ def update_task_display(mode, iid, instances, background_df, mystery_file, cand1
|
|
| 136 |
task_authors_df['g2v_vector'] = task_authors_g2v
|
| 137 |
print(f"Gram2Vec feature generation complete")
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
#generating html for the task
|
| 145 |
header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)
|
|
|
|
| 136 |
task_authors_df['g2v_vector'] = task_authors_g2v
|
| 137 |
print(f"Gram2Vec feature generation complete")
|
| 138 |
|
| 139 |
+
if mode != "Predefined HRS Task":
|
| 140 |
+
# Computing predicted author by checking pairwise cosine similarity over luar embeddings
|
| 141 |
+
col_name = f'{model_name.split("/")[-1]}_style_embedding'
|
| 142 |
+
predicted_author = compute_predicted_author(task_authors_df, col_name)
|
| 143 |
|
| 144 |
#generating html for the task
|
| 145 |
header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)
|
utils/visualizations.py
CHANGED
|
@@ -132,7 +132,7 @@ def compute_tsne_with_cache(embeddings: np.ndarray, cache_path: str = 'datasets/
|
|
| 132 |
else:
|
| 133 |
print("Computing t-SNE")
|
| 134 |
tsne_result = TSNE(n_components=2, learning_rate='auto',
|
| 135 |
-
init='random', perplexity=10, random_state=42,metric='cosine').fit_transform(embeddings)
|
| 136 |
#tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
|
| 137 |
|
| 138 |
cache[hash_key] = tsne_result
|
|
|
|
| 132 |
else:
|
| 133 |
print("Computing t-SNE")
|
| 134 |
tsne_result = TSNE(n_components=2, learning_rate='auto',
|
| 135 |
+
init='random', perplexity=10, random_state=42, metric='cosine').fit_transform(embeddings)
|
| 136 |
#tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
|
| 137 |
|
| 138 |
cache[hash_key] = tsne_result
|