Milad Alshomary commited on
Commit
d1e7150
·
1 Parent(s): 23e934a
config/config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  # config.yaml
2
- instances_to_explain_path: "./datasets/hrs_explanations_luar_clusters_2_35_balanced.json"
3
- instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_2_35_balanced.json?download=true"
4
  interp_space_path: "./datasets/sentence_luar_interp_space_2_35/"
5
  interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
6
  gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
@@ -10,5 +10,5 @@ style_feat_clm: "llm_tfidf_weights"
10
  top_k: 10
11
  only_llm_feats: false
12
  only_gram2vec_feats: false
13
- max_num_docs_per_authors: 1
14
- max_num_bg_authors: 1000
 
1
  # config.yaml
2
+ instances_to_explain_path: "./datasets/hrs_explanations_luar_clusters_18_balanced.json"
3
+ instances_to_explain_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/hrs_explanations_luar_clusters_18_balanced.json?download=true"
4
  interp_space_path: "./datasets/sentence_luar_interp_space_2_35/"
5
  interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/sentence_luar_interp_space_2_35.zip?download=true"
6
  gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
 
10
  top_k: 10
11
  only_llm_feats: false
12
  only_gram2vec_feats: false
13
+ max_num_docs_per_authors: 3
14
+ max_num_bg_authors: 500
utils/interp_space_utils.py CHANGED
@@ -271,7 +271,7 @@ def cached_generate_style_embedding(background_corpus_df: pd.DataFrame,
271
  else:
272
  # Otherwise, compute, cache, and return
273
  print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
274
- task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=True)
275
  # Create a clean column name from the model name
276
  col_name = f'{model_name.split("/")[-1]}_style_embedding'
277
  background_corpus_df[col_name] = task_and_background_embeddings
 
271
  else:
272
  # Otherwise, compute, cache, and return
273
  print(f"Computing embeddings for {model_name} on column '{text_clm}', saving to {cache_path}")
274
+ task_and_background_embeddings = generate_style_embedding(background_corpus_df, text_clm, model_name, dimensionality_reduction=False)
275
  # Create a clean column name from the model name
276
  col_name = f'{model_name.split("/")[-1]}_style_embedding'
277
  background_corpus_df[col_name] = task_and_background_embeddings
utils/ui.py CHANGED
@@ -136,10 +136,10 @@ def update_task_display(mode, iid, instances, background_df, mystery_file, cand1
136
  task_authors_df['g2v_vector'] = task_authors_g2v
137
  print(f"Gram2Vec feature generation complete")
138
 
139
- #if mode != "Predefined HRS Task":
140
- # Computing predicted author by checking pairwise cosine similarity over luar embeddings
141
- col_name = f'{model_name.split("/")[-1]}_style_embedding'
142
- predicted_author = compute_predicted_author(task_authors_df, col_name)
143
 
144
  #generating html for the task
145
  header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)
 
136
  task_authors_df['g2v_vector'] = task_authors_g2v
137
  print(f"Gram2Vec feature generation complete")
138
 
139
+ if mode != "Predefined HRS Task":
140
+ # Computing predicted author by checking pairwise cosine similarity over luar embeddings
141
+ col_name = f'{model_name.split("/")[-1]}_style_embedding'
142
+ predicted_author = compute_predicted_author(task_authors_df, col_name)
143
 
144
  #generating html for the task
145
  header_html, mystery_html, candidate_htmls = task_HTML(mystery_txt, candidate_texts, predicted_author, ground_truth_author)
utils/visualizations.py CHANGED
@@ -132,7 +132,7 @@ def compute_tsne_with_cache(embeddings: np.ndarray, cache_path: str = 'datasets/
132
  else:
133
  print("Computing t-SNE")
134
  tsne_result = TSNE(n_components=2, learning_rate='auto',
135
- init='random', perplexity=10, random_state=42,metric='cosine').fit_transform(embeddings)
136
  #tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
137
 
138
  cache[hash_key] = tsne_result
 
132
  else:
133
  print("Computing t-SNE")
134
  tsne_result = TSNE(n_components=2, learning_rate='auto',
135
+ init='random', perplexity=10, random_state=42, metric='cosine').fit_transform(embeddings)
136
  #tsne_result = umap.UMAP(n_components=2, n_neighbors=30, min_dist=0.3, metric='cosine').fit_transform(embeddings)
137
 
138
  cache[hash_key] = tsne_result