Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -312,54 +312,54 @@ class MistralRAGChatbot:
|
|
| 312 |
|
| 313 |
return reranked_docs
|
| 314 |
|
| 315 |
-
def reciprocal_rank_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
def weighted_score_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
def semantic_similarity_reranking(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
|
| 364 |
def build_prompt(self, context: str, user_query: str, response_style: str) -> str:
|
| 365 |
styles = {
|
|
|
|
| 312 |
|
| 313 |
return reranked_docs
|
| 314 |
|
| 315 |
+
# def reciprocal_rank_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 316 |
+
# k = 60
|
| 317 |
+
# method_ranks = {}
|
| 318 |
+
# fused_scores = {}
|
| 319 |
+
# for doc in docs:
|
| 320 |
+
# method = doc['method']
|
| 321 |
+
# if method not in method_ranks:
|
| 322 |
+
# method_ranks[method] = {doc['index']: 1}
|
| 323 |
+
# else:
|
| 324 |
+
# method_ranks[method][doc['index']] = len(method_ranks[method]) + 1
|
| 325 |
+
# for doc in docs:
|
| 326 |
+
# idx = doc['index']
|
| 327 |
+
# if idx not in fused_scores:
|
| 328 |
+
# fused_scores[idx] = sum(1 / (k + rank) for method_rank in method_ranks.values() for i, rank in method_rank.items() if i == idx)
|
| 329 |
+
# reranked_docs = sorted(docs, key=lambda x: fused_scores.get(x['index'], 0), reverse=True)
|
| 330 |
+
# for doc in reranked_docs:
|
| 331 |
+
# doc['rrf_score'] = fused_scores.get(doc['index'], 0)
|
| 332 |
+
# return reranked_docs
|
| 333 |
+
|
| 334 |
+
# def weighted_score_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 335 |
+
# method_weights = {
|
| 336 |
+
# 'annoy': 0.3,
|
| 337 |
+
# 'tfidf': 0.2,
|
| 338 |
+
# 'bm25': 0.2,
|
| 339 |
+
# 'word2vec': 0.1,
|
| 340 |
+
# 'euclidean': 0.1,
|
| 341 |
+
# 'jaccard': 0.1
|
| 342 |
+
# }
|
| 343 |
+
# fused_scores = {}
|
| 344 |
+
# for doc in docs:
|
| 345 |
+
# idx = doc['index']
|
| 346 |
+
# if idx not in fused_scores:
|
| 347 |
+
# fused_scores[idx] = doc['score'] * method_weights[doc['method']]
|
| 348 |
+
# else:
|
| 349 |
+
# fused_scores[idx] += doc['score'] * method_weights[doc['method']]
|
| 350 |
+
|
| 351 |
+
# reranked_docs = sorted(docs, key=lambda x: fused_scores[x['index']], reverse=True)
|
| 352 |
+
# for doc in reranked_docs:
|
| 353 |
+
# doc['wsf_score'] = fused_scores[doc['index']]
|
| 354 |
+
# return reranked_docs
|
| 355 |
+
|
| 356 |
+
# def semantic_similarity_reranking(self, user_query: str, docs: List[dict]) -> List[dict]:
|
| 357 |
+
# query_embedding = np.mean([self.word2vec_model.wv[token] for token in user_query.split() if token in self.word2vec_model.wv], axis=0)
|
| 358 |
+
# for doc in docs:
|
| 359 |
+
# doc_embedding = np.mean([self.word2vec_model.wv[token] for token in doc['text'].split() if token in self.word2vec_model.wv], axis=0)
|
| 360 |
+
# doc_embedding = doc_embedding if doc_embedding.shape == query_embedding.shape else np.zeros(query_embedding.shape)
|
| 361 |
+
# doc['semantic_score'] = cosine_similarity([query_embedding], [doc_embedding])[0][0]
|
| 362 |
+
# return sorted(docs, key=lambda x: x['semantic_score'], reverse=True)
|
| 363 |
|
| 364 |
def build_prompt(self, context: str, user_query: str, response_style: str) -> str:
|
| 365 |
styles = {
|