Milad Alshomary
commited on
Commit
·
64aa784
1
Parent(s):
f18e0e7
updates
Browse files
utils/interp_space_utils.py
CHANGED
|
@@ -549,14 +549,14 @@ def compute_clusters_style_representation_3(
|
|
| 549 |
cluster_ids: List[Any],
|
| 550 |
cluster_label_clm_name: str = 'authorID',
|
| 551 |
max_num_feats: int = 25,
|
| 552 |
-
max_num_documents_per_author=
|
| 553 |
max_num_authors=10,
|
| 554 |
max_authors_for_span_extraction=4,
|
| 555 |
top_k: int = 10
|
| 556 |
):
|
| 557 |
|
| 558 |
print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
|
| 559 |
-
# STEP 1: Identify features on
|
| 560 |
background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
|
| 561 |
background_corpus_df_feat_id = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
|
| 562 |
|
|
|
|
| 549 |
cluster_ids: List[Any],
|
| 550 |
cluster_label_clm_name: str = 'authorID',
|
| 551 |
max_num_feats: int = 25,
|
| 552 |
+
max_num_documents_per_author=10,
|
| 553 |
max_num_authors=10,
|
| 554 |
max_authors_for_span_extraction=4,
|
| 555 |
top_k: int = 10
|
| 556 |
):
|
| 557 |
|
| 558 |
print(f"Computing style representation for visible clusters: {len(cluster_ids)}")
|
| 559 |
+
# STEP 1: Identify features on max_num_authors's max_num_documents_per_author number of documents
|
| 560 |
background_corpus_df['fullText'] = background_corpus_df['fullText'].map(lambda x: '\n\n'.join(x[:max_num_documents_per_author]) if isinstance(x, list) else x)
|
| 561 |
background_corpus_df_feat_id = background_corpus_df[background_corpus_df[cluster_label_clm_name].isin(cluster_ids)]
|
| 562 |
|