Milad Alshomary commited on
Commit
64aa784
·
1 Parent(s): f18e0e7
Files changed (1) hide show
  1. utils/interp_space_utils.py +2 -2
utils/interp_space_utils.py CHANGED
@@ -549,14 +549,14 @@ def compute_clusters_style_representation_3(
549
  cluster_ids: List[Any],
550
  cluster_label_clm_name: str = 'authorID',
551
  max_num_feats: int = 25,
552
- max_num_documents_per_author=1,
553
  max_num_authors=10,
554
  max_authors_for_span_extraction=4,
555
  top_k: int = 10
556
  ):
557
 
558
  print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
559
- # STEP 1: Identify features on 5 visible authors
560
  background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
561
  background_corpus_df_feat_id = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
562
 
 
549
  cluster_ids: List[Any],
550
  cluster_label_clm_name: str = 'authorID',
551
  max_num_feats: int = 25,
552
+ max_num_documents_per_author=10,
553
  max_num_authors=10,
554
  max_authors_for_span_extraction=4,
555
  top_k: int = 10
556
  ):
557
 
558
  print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
559
+ # STEP 1: Identify features on max_num_authors's max_num_documents_per_author number of documents
560
  background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
561
  background_corpus_df_feat_id = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
562