fahmiaziz98 commited on
Commit
9e5acab
·
1 Parent(s): fb8f5fc
src/api/routers/rerank.py CHANGED
@@ -62,11 +62,8 @@ async def rerank_documents(
62
  )
63
 
64
  try:
65
- # Extract kwargs but exclude rerank-specific fields
66
  kwargs = extract_embedding_kwargs(request)
67
 
68
- # Remove fields that are already passed as positional arguments
69
- # to avoid "got multiple values for argument" error
70
  kwargs.pop("query", None)
71
  kwargs.pop("documents", None)
72
  kwargs.pop("top_k", None)
@@ -80,19 +77,10 @@ async def rerank_documents(
80
  detail=f"Model '{request.model_id}' is not a rerank model. Type: {config.type}",
81
  )
82
 
83
- # Debug logs BEFORE calling rank_document
84
- logger.debug(f"Rerank request - Query: '{request.query}'")
85
- logger.debug(f"Documents to rank: {len(valid_docs)}")
86
- if valid_docs:
87
- logger.debug(f"First document: {valid_docs[0][1][:100]}...")
88
- logger.debug(f"Top K: {request.top_k}")
89
-
90
  start = time.time()
91
 
92
- # Extract documents for ranking
93
  documents_list = [doc for _, doc in valid_docs]
94
-
95
- # Call rank_document - returns only top_k results
96
  ranking_results = model.rank_document(
97
  query=request.query,
98
  documents=documents_list,
@@ -102,18 +90,10 @@ async def rerank_documents(
102
 
103
  processing_time = time.time() - start
104
 
105
- # Debug logs AFTER rank_document
106
- logger.debug(f"Ranking returned {len(ranking_results)} results")
107
- if ranking_results:
108
- logger.debug(f"Top result score: {ranking_results[0]}")
109
-
110
- # Build results from ranking_results
111
- # ranking_results already contains top_k items with scores
112
  results = []
113
 
114
  for rank_result in ranking_results:
115
- # Get original index from valid_docs
116
- doc_idx = rank_result.get('corpus_id', 0) # Index in filtered list
117
  if doc_idx < len(valid_docs):
118
  original_idx = valid_docs[doc_idx][0] # Original index
119
  doc_text = documents_list[doc_idx]
 
62
  )
63
 
64
  try:
 
65
  kwargs = extract_embedding_kwargs(request)
66
 
 
 
67
  kwargs.pop("query", None)
68
  kwargs.pop("documents", None)
69
  kwargs.pop("top_k", None)
 
77
  detail=f"Model '{request.model_id}' is not a rerank model. Type: {config.type}",
78
  )
79
 
 
 
 
 
 
 
 
80
  start = time.time()
81
 
 
82
  documents_list = [doc for _, doc in valid_docs]
83
+
 
84
  ranking_results = model.rank_document(
85
  query=request.query,
86
  documents=documents_list,
 
90
 
91
  processing_time = time.time() - start
92
 
 
 
 
 
 
 
 
93
  results = []
94
 
95
  for rank_result in ranking_results:
96
+ doc_idx = rank_result.get('corpus_id', 0)
 
97
  if doc_idx < len(valid_docs):
98
  original_idx = valid_docs[doc_idx][0] # Original index
99
  doc_text = documents_list[doc_idx]
src/models/embeddings/rank.py CHANGED
@@ -115,8 +115,6 @@ class RerankModel:
115
  self.load()
116
 
117
  try:
118
- # model.rank returns List[Dict] with 'corpus_id' and 'score'
119
- # Already sorted by score (highest first) and limited to top_k
120
  ranking_results = self.model.rank(
121
  query,
122
  documents,
@@ -157,14 +155,11 @@ class RerankModel:
157
  if not rankings:
158
  return []
159
 
160
- # Extract raw scores
161
  raw_scores = [ranking["score"] for ranking in rankings]
162
 
163
- # Min-Max normalization
164
  min_score = min(raw_scores)
165
  max_score = max(raw_scores)
166
 
167
- # If all scores are the same, return max target value
168
  if max_score == min_score:
169
  return [
170
  {
@@ -174,7 +169,6 @@ class RerankModel:
174
  for r in rankings
175
  ]
176
 
177
- # Normalize to target range
178
  target_min, target_max = target_range
179
  normalized_rankings = []
180
 
 
115
  self.load()
116
 
117
  try:
 
 
118
  ranking_results = self.model.rank(
119
  query,
120
  documents,
 
155
  if not rankings:
156
  return []
157
 
 
158
  raw_scores = [ranking["score"] for ranking in rankings]
159
 
 
160
  min_score = min(raw_scores)
161
  max_score = max(raw_scores)
162
 
 
163
  if max_score == min_score:
164
  return [
165
  {
 
169
  for r in rankings
170
  ]
171
 
 
172
  target_min, target_max = target_range
173
  normalized_rankings = []
174