Anisha Bhatnagar commited on
Commit
8c133f5
·
1 Parent(s): 08f53a7

reduced logging

Browse files
utils/gram2vec_feat_utils.py CHANGED
@@ -172,6 +172,8 @@ def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
172
  # print(llm_style_feats_analysis)
173
  print(f"{len(llm_style_feats_analysis['spans'].values())}")
174
  author_list = list(llm_style_feats_analysis['spans'].values())
 
 
175
  llm_spans_list = []
176
  for i, (_, txt) in enumerate(texts):
177
  print(f"{i}/{len(texts)}")
@@ -228,12 +230,12 @@ def show_combined_spans_all(selected_feature_llm, selected_feature_g2v,
228
  bg_start = 4
229
  bg_indices = list(range(bg_start, len(texts)))
230
  kept_indices = [i for i in bg_indices if gram_spans_list[i]]
231
- print(f"\n---> {kept_indices}")
232
  filtered_texts_bg = [texts[i] for i in kept_indices]
233
  filtered_llm_bg = [llm_spans_list[i] for i in kept_indices]
234
  filtered_gram_bg = [gram_spans_list[i] for i in kept_indices]
235
 
236
- print(filtered_texts_bg)
237
 
238
  html_background_authors = create_html(
239
  filtered_texts_bg,
@@ -279,7 +281,7 @@ def get_label(label: str, predicted_author=None, ground_truth_author=None, bg_id
279
  def create_html(texts, llm_spans_list, gram_spans_list, selected_feature_llm, selected_feature_g2v, short=None, background = False, predicted_author=None, ground_truth_author=None):
280
  html = []
281
  for i, (label, txt) in enumerate(texts):
282
- print(i, label, txt[:30])
283
  label = get_label(label, predicted_author, ground_truth_author, i) if background else get_label(label, predicted_author, ground_truth_author)
284
  combined = highlight_both_spans(txt, llm_spans_list[i], gram_spans_list[i])
285
  notice = ""
 
172
  # print(llm_style_feats_analysis)
173
  print(f"{len(llm_style_feats_analysis['spans'].values())}")
174
  author_list = list(llm_style_feats_analysis['spans'].values())
175
+ # print(f"Author list length: {len(author_list)}")
176
+ # print(f"Author list: {author_list}")
177
  llm_spans_list = []
178
  for i, (_, txt) in enumerate(texts):
179
  print(f"{i}/{len(texts)}")
 
230
  bg_start = 4
231
  bg_indices = list(range(bg_start, len(texts)))
232
  kept_indices = [i for i in bg_indices if gram_spans_list[i]]
233
+ # print(f"\n---> {kept_indices}")
234
  filtered_texts_bg = [texts[i] for i in kept_indices]
235
  filtered_llm_bg = [llm_spans_list[i] for i in kept_indices]
236
  filtered_gram_bg = [gram_spans_list[i] for i in kept_indices]
237
 
238
+ # print(filtered_texts_bg)
239
 
240
  html_background_authors = create_html(
241
  filtered_texts_bg,
 
281
  def create_html(texts, llm_spans_list, gram_spans_list, selected_feature_llm, selected_feature_g2v, short=None, background = False, predicted_author=None, ground_truth_author=None):
282
  html = []
283
  for i, (label, txt) in enumerate(texts):
284
+ # print(i, label, txt[:30])
285
  label = get_label(label, predicted_author, ground_truth_author, i) if background else get_label(label, predicted_author, ground_truth_author)
286
  combined = highlight_both_spans(txt, llm_spans_list[i], gram_spans_list[i])
287
  notice = ""
utils/visualizations.py CHANGED
@@ -309,7 +309,7 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
309
 
310
  task_texts = [_to_text(x) for x in task_only_df['fullText'].tolist()]
311
 
312
- print(f"task_texts: {task_texts}")
313
  filtered_g2v_feats = []
314
  for feat in g2v_feats:
315
  try:
@@ -333,7 +333,7 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
333
  HR_g2v_list = []
334
  for feat in filtered_g2v_feats:
335
  HR_g2v = get_fullform(feat[0])
336
- print(f"\n\n feat: {feat} ---> Human Readable: {HR_g2v}")
337
  if HR_g2v is None:
338
  print(f"Skipping Gram2Vec feature without human readable form: {feat}")
339
  else:
@@ -342,11 +342,11 @@ def handle_zoom(event_json, bg_proj, bg_lbls, clustered_authors_df, task_authors
342
  HR_g2v_list = [("None", None)] + HR_g2v_list
343
 
344
  print(f"[INFO] Found {len(llm_feats)} LLM features and {len(g2v_feats)} Gram2Vec features in the zoomed region.")
345
- print(f"[INFO] unfiltered g2v features: {g2v_feats}")
346
 
347
  print(f"[INFO] LLM features: {llm_feats}")
348
  HR_g2v_list, _ = format_g2v_features_for_display(HR_g2v_list)
349
- print(f"[INFO] Gram2Vec features: {HR_g2v_list}")
350
 
351
  return (
352
  gr.update(choices=llm_feats, value=llm_feats[0]),
@@ -386,7 +386,7 @@ def handle_zoom_with_retries(event_json, bg_proj, bg_lbls, clustered_authors_df,
386
  def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_input, task_authors_df, background_authors_embeddings_df, pred_idx=None, gt_idx=None):
387
  model_name = model_radio if model_radio != "Other" else custom_model_input
388
  embedding_col_name = f'{model_name.split("/")[-1]}_style_embedding'
389
- print(background_authors_embeddings_df.columns)
390
  print("Generating cluster visualization")
391
  iid = int(iid)
392
  #interp = load_interp_space(cfg)
 
309
 
310
  task_texts = [_to_text(x) for x in task_only_df['fullText'].tolist()]
311
 
312
+ print(f"len task_texts: {len(task_texts)}")
313
  filtered_g2v_feats = []
314
  for feat in g2v_feats:
315
  try:
 
333
  HR_g2v_list = []
334
  for feat in filtered_g2v_feats:
335
  HR_g2v = get_fullform(feat[0])
336
+ # print(f"\n\n feat: {feat} ---> Human Readable: {HR_g2v}")
337
  if HR_g2v is None:
338
  print(f"Skipping Gram2Vec feature without human readable form: {feat}")
339
  else:
 
342
  HR_g2v_list = [("None", None)] + HR_g2v_list
343
 
344
  print(f"[INFO] Found {len(llm_feats)} LLM features and {len(g2v_feats)} Gram2Vec features in the zoomed region.")
345
+ # print(f"[INFO] unfiltered g2v features: {g2v_feats}")
346
 
347
  print(f"[INFO] LLM features: {llm_feats}")
348
  HR_g2v_list, _ = format_g2v_features_for_display(HR_g2v_list)
349
+ # print(f"[INFO] Gram2Vec features: {HR_g2v_list}")
350
 
351
  return (
352
  gr.update(choices=llm_feats, value=llm_feats[0]),
 
386
  def visualize_clusters_plotly(iid, cfg, instances, model_radio, custom_model_input, task_authors_df, background_authors_embeddings_df, pred_idx=None, gt_idx=None):
387
  model_name = model_radio if model_radio != "Other" else custom_model_input
388
  embedding_col_name = f'{model_name.split("/")[-1]}_style_embedding'
389
+ # print(background_authors_embeddings_df.columns)
390
  print("Generating cluster visualization")
391
  iid = int(iid)
392
  #interp = load_interp_space(cfg)