Spaces:

darpanaswal
/

Patent_Retrieval

Configuration error

darpanaswal commited on Apr 11, 2025

Commit

101821f

verified ·

1 Parent(s): f7855d2

Update cross_encoder_reranking_train.py

Files changed (1) hide show

cross_encoder_reranking_train.py CHANGED Viewed

@@ -32,7 +32,7 @@ def rank_by_centrality(texts):
     ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
     return [text for text, _ in ranked]
-def cluster_and_rank(texts, threshold=0.6):
     if len(texts) < 2:
         return texts
@@ -145,8 +145,8 @@ def extract_text(content_dict, text_type="full"):
         filtered_dict = process_single_patent(content_dict)
         all_text = []
         # Start with abstract for better context at the beginning
-        if "pa01" in content_dict:
-            all_text.append(content_dict["pa01"])
         # For claims, paragraphs and features, we take only the top-10 most relevant
         # Add claims

     ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
     return [text for text, _ in ranked]
+def cluster_and_rank(texts, threshold=0.75):
     if len(texts) < 2:
         return texts
         filtered_dict = process_single_patent(content_dict)
         all_text = []
         # Start with abstract for better context at the beginning
+        # if "pa01" in content_dict:
+        #     all_text.append(content_dict["pa01"])
         # For claims, paragraphs and features, we take only the top-10 most relevant
         # Add claims