Anisha Bhatnagar
commited on
Commit
·
8c133f5
1
Parent(s):
08f53a7
reduced logging
Browse files- utils/gram2vec_feat_utils.py +5 -3
- utils/visualizations.py +5 -5
utils/gram2vec_feat_utils.py
CHANGED
|
@@ -172,6 +172,8 @@ def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
|
|
| 172 |
# print(llm_style_feats_analysis)
|
| 173 |
print(f"{len(llm_style_feats_analysis['spans'].values())}")
|
| 174 |
author_list = list(llm_style_feats_analysis['spans'].values())
|
|
|
|
|
|
|
| 175 |
llm_spans_list = []
|
| 176 |
for i, (_, txt) in enumerate(texts):
|
| 177 |
print(f"{i}/{len(texts)}")
|
|
@@ -228,12 +230,12 @@ def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
|
|
| 228 |
bg_start = 4
|
| 229 |
bg_indices = list(range(bg_start, len(texts)))
|
| 230 |
kept_indices = [i for i in bg_indices if gram_spans_list[i]]
|
| 231 |
-
print(f"\n---> {kept_indices}")
|
| 232 |
filtered_texts_bg = [texts[i] for i in kept_indices]
|
| 233 |
filtered_llm_bg = [llm_spans_list[i] for i in kept_indices]
|
| 234 |
filtered_gram_bg = [gram_spans_list[i] for i in kept_indices]
|
| 235 |
|
| 236 |
-
print(filtered_texts_bg)
|
| 237 |
|
| 238 |
html_background_authors = create_html(
|
| 239 |
filtered_texts_bg,
|
|
@@ -279,7 +281,7 @@ def get_label(label: str, predicted_author=None, ground_truth_author=None, bg_id
|
|
| 279 |
def create_html(texts, llm_spans_list, gram_spans_list, selected_feature_llm, selected_feature_g2v, short=None, background = False, predicted_author=None, ground_truth_author=None):
|
| 280 |
html = []
|
| 281 |
for i, (label, txt) in enumerate(texts):
|
| 282 |
-
print(i, label, txt[:30])
|
| 283 |
label = get_label(label, predicted_author, ground_truth_author, i) if background else get_label(label, predicted_author, ground_truth_author)
|
| 284 |
combined = highlight_both_spans(txt, llm_spans_list[i], gram_spans_list[i])
|
| 285 |
notice = ""
|
|
|
|
| 172 |
# print(llm_style_feats_analysis)
|
| 173 |
print(f"{len(llm_style_feats_analysis['spans'].values())}")
|
| 174 |
author_list = list(llm_style_feats_analysis['spans'].values())
|
| 175 |
+
# print(f"Author list length: {len(author_list)}")
|
| 176 |
+
# print(f"Author list: {author_list}")
|
| 177 |
llm_spans_list = []
|
| 178 |
for i, (_, txt) in enumerate(texts):
|
| 179 |
print(f"{i}/{len(texts)}")
|
|
|
|
| 230 |
bg_start = 4
|
| 231 |
bg_indices = list(range(bg_start, len(texts)))
|
| 232 |
kept_indices = [i for i in bg_indices if gram_spans_list[i]]
|
| 233 |
+
# print(f"\n---> {kept_indices}")
|
| 234 |
filtered_texts_bg = [texts[i] for i in kept_indices]
|
| 235 |
filtered_llm_bg = [llm_spans_list[i] for i in kept_indices]
|
| 236 |
filtered_gram_bg = [gram_spans_list[i] for i in kept_indices]
|
| 237 |
|
| 238 |
+
# print(filtered_texts_bg)
|
| 239 |
|
| 240 |
html_background_authors = create_html(
|
| 241 |
filtered_texts_bg,
|
|
|
|
| 281 |
def create_html(texts, llm_spans_list, gram_spans_list, selected_feature_llm, selected_feature_g2v, short=None, background = False, predicted_author=None, ground_truth_author=None):
|
| 282 |
html = []
|
| 283 |
for i, (label, txt) in enumerate(texts):
|
| 284 |
+
# print(i, label, txt[:30])
|
| 285 |
label = get_label(label, predicted_author, ground_truth_author, i) if background else get_label(label, predicted_author, ground_truth_author)
|
| 286 |
combined = highlight_both_spans(txt, llm_spans_list[i], gram_spans_list[i])
|
| 287 |
notice = ""
|
utils/visualizations.py
CHANGED
|
@@ -309,7 +309,7 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
|
|
| 309 |
|
| 310 |
task_texts = [_to_text(x) for x in task_only_df['fullText'].tolist()]
|
| 311 |
|
| 312 |
-
print(f"task_texts: {task_texts}")
|
| 313 |
filtered_g2v_feats = []
|
| 314 |
for feat in g2v_feats:
|
| 315 |
try:
|
|
@@ -333,7 +333,7 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
|
|
| 333 |
HR_g2v_list = []
|
| 334 |
for feat in filtered_g2v_feats:
|
| 335 |
HR_g2v = get_fullform(feat[0])
|
| 336 |
-
print(f"\n\n feat: {feat} ---> Human Readable: {HR_g2v}")
|
| 337 |
if HR_g2v is None:
|
| 338 |
print(f"Skipping Gram2Vec feature without human readable form: {feat}")
|
| 339 |
else:
|
|
@@ -342,11 +342,11 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
|
|
| 342 |
HR_g2v_list = [("None", None)] + HR_g2v_list
|
| 343 |
|
| 344 |
print(f"[INFO] Found {len(llm_feats)} LLM features and {len(g2v_feats)} Gram2Vec features in the zoomed region.")
|
| 345 |
-
print(f"[INFO] unfiltered g2v features: {g2v_feats}")
|
| 346 |
|
| 347 |
print(f"[INFO] LLM features: {llm_feats}")
|
| 348 |
HR_g2v_list, _ = format_g2v_features_for_display(HR_g2v_list)
|
| 349 |
-
print(f"[INFO] Gram2Vec features: {HR_g2v_list}")
|
| 350 |
|
| 351 |
return (
|
| 352 |
gr.update(choices=llm_feats, value=llm_feats[0]),
|
|
@@ -386,7 +386,7 @@ def handle_zoom_with_retries(event_json, bg_proj, bg_lbls, clustered_authors_df,
|
|
| 386 |
def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_input, task_authors_df, background_authors_embeddings_df, pred_idx=None, gt_idx=None):
|
| 387 |
model_name = model_radio if model_radio != "Other" else custom_model_input
|
| 388 |
embedding_col_name = f'{model_name.split("/")[-1]}_style_embedding'
|
| 389 |
-
print(background_authors_embeddings_df.columns)
|
| 390 |
print("Generating cluster visualization")
|
| 391 |
iid = int(iid)
|
| 392 |
#interp = load_interp_space(cfg)
|
|
|
|
| 309 |
|
| 310 |
task_texts = [_to_text(x) for x in task_only_df['fullText'].tolist()]
|
| 311 |
|
| 312 |
+
print(f"len task_texts: {len(task_texts)}")
|
| 313 |
filtered_g2v_feats = []
|
| 314 |
for feat in g2v_feats:
|
| 315 |
try:
|
|
|
|
| 333 |
HR_g2v_list = []
|
| 334 |
for feat in filtered_g2v_feats:
|
| 335 |
HR_g2v = get_fullform(feat[0])
|
| 336 |
+
# print(f"\n\n feat: {feat} ---> Human Readable: {HR_g2v}")
|
| 337 |
if HR_g2v is None:
|
| 338 |
print(f"Skipping Gram2Vec feature without human readable form: {feat}")
|
| 339 |
else:
|
|
|
|
| 342 |
HR_g2v_list = [("None", None)] + HR_g2v_list
|
| 343 |
|
| 344 |
print(f"[INFO] Found {len(llm_feats)} LLM features and {len(g2v_feats)} Gram2Vec features in the zoomed region.")
|
| 345 |
+
# print(f"[INFO] unfiltered g2v features: {g2v_feats}")
|
| 346 |
|
| 347 |
print(f"[INFO] LLM features: {llm_feats}")
|
| 348 |
HR_g2v_list, _ = format_g2v_features_for_display(HR_g2v_list)
|
| 349 |
+
# print(f"[INFO] Gram2Vec features: {HR_g2v_list}")
|
| 350 |
|
| 351 |
return (
|
| 352 |
gr.update(choices=llm_feats, value=llm_feats[0]),
|
|
|
|
| 386 |
def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_input, task_authors_df, background_authors_embeddings_df, pred_idx=None, gt_idx=None):
|
| 387 |
model_name = model_radio if model_radio != "Other" else custom_model_input
|
| 388 |
embedding_col_name = f'{model_name.split("/")[-1]}_style_embedding'
|
| 389 |
+
# print(background_authors_embeddings_df.columns)
|
| 390 |
print("Generating cluster visualization")
|
| 391 |
iid = int(iid)
|
| 392 |
#interp = load_interp_space(cfg)
|