Spaces:
Configuration error
Configuration error
Update cross_encoder_reranking_train.py
Browse files
cross_encoder_reranking_train.py
CHANGED
|
@@ -32,7 +32,7 @@ def rank_by_centrality(texts):
|
|
| 32 |
ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
|
| 33 |
return [text for text, _ in ranked]
|
| 34 |
|
| 35 |
-
def cluster_and_rank(texts, threshold=0.
|
| 36 |
if len(texts) < 2:
|
| 37 |
return texts
|
| 38 |
|
|
@@ -145,8 +145,8 @@ def extract_text(content_dict, text_type="full"):
|
|
| 145 |
filtered_dict = process_single_patent(content_dict)
|
| 146 |
all_text = []
|
| 147 |
# Start with abstract for better context at the beginning
|
| 148 |
-
if "pa01" in content_dict:
|
| 149 |
-
|
| 150 |
|
| 151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
| 152 |
# Add claims
|
|
|
|
| 32 |
ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
|
| 33 |
return [text for text, _ in ranked]
|
| 34 |
|
| 35 |
+
def cluster_and_rank(texts, threshold=0.75):
|
| 36 |
if len(texts) < 2:
|
| 37 |
return texts
|
| 38 |
|
|
|
|
| 145 |
filtered_dict = process_single_patent(content_dict)
|
| 146 |
all_text = []
|
| 147 |
# Start with abstract for better context at the beginning
|
| 148 |
+
# if "pa01" in content_dict:
|
| 149 |
+
# all_text.append(content_dict["pa01"])
|
| 150 |
|
| 151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
| 152 |
# Add claims
|