Spaces:

Rabbitt-AI
/

ChanceRAG

Sleeping

App Files Files Community

Rabbitt-AI commited on Oct 1, 2024

Commit

8c91fb0

verified ·

1 Parent(s): 7341dff

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -48

app.py CHANGED Viewed

@@ -312,54 +312,54 @@ class MistralRAGChatbot:
         return reranked_docs
-    def reciprocal_rank_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
-        k = 60
-        method_ranks = {}
-        fused_scores = {}
-        for doc in docs:
-            method = doc['method']
-            if method not in method_ranks:
-                method_ranks[method] = {doc['index']: 1}
-            else:
-                method_ranks[method][doc['index']] = len(method_ranks[method]) + 1
-        for doc in docs:
-            idx = doc['index']
-            if idx not in fused_scores:
-                fused_scores[idx] = sum(1 / (k + rank) for method_rank in method_ranks.values() for i, rank in method_rank.items() if i == idx)
-        reranked_docs = sorted(docs, key=lambda x: fused_scores.get(x['index'], 0), reverse=True)
-        for doc in reranked_docs:
-            doc['rrf_score'] = fused_scores.get(doc['index'], 0)
-        return reranked_docs
-    def weighted_score_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
-        method_weights = {
-            'annoy': 0.3,
-            'tfidf': 0.2,
-            'bm25': 0.2,
-            'word2vec': 0.1,
-            'euclidean': 0.1,
-            'jaccard': 0.1
-        }
-        fused_scores = {}
-        for doc in docs:
-            idx = doc['index']
-            if idx not in fused_scores:
-                fused_scores[idx] = doc['score'] * method_weights[doc['method']]
-            else:
-                fused_scores[idx] += doc['score'] * method_weights[doc['method']]
-        reranked_docs = sorted(docs, key=lambda x: fused_scores[x['index']], reverse=True)
-        for doc in reranked_docs:
-            doc['wsf_score'] = fused_scores[doc['index']]
-        return reranked_docs
-    def semantic_similarity_reranking(self, user_query: str, docs: List[dict]) -> List[dict]:
-        query_embedding = np.mean([self.word2vec_model.wv[token] for token in user_query.split() if token in self.word2vec_model.wv], axis=0)
-        for doc in docs:
-            doc_embedding = np.mean([self.word2vec_model.wv[token] for token in doc['text'].split() if token in self.word2vec_model.wv], axis=0)
-            doc_embedding = doc_embedding if doc_embedding.shape == query_embedding.shape else np.zeros(query_embedding.shape)
-            doc['semantic_score'] = cosine_similarity([query_embedding], [doc_embedding])[0][0]
-        return sorted(docs, key=lambda x: x['semantic_score'], reverse=True)
     def build_prompt(self, context: str, user_query: str, response_style: str) -> str:
         styles = {

         return reranked_docs
+    # def reciprocal_rank_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
+    #     k = 60
+    #     method_ranks = {}
+    #     fused_scores = {}
+    #     for doc in docs:
+    #         method = doc['method']
+    #         if method not in method_ranks:
+    #             method_ranks[method] = {doc['index']: 1}
+    #         else:
+    #             method_ranks[method][doc['index']] = len(method_ranks[method]) + 1
+    #     for doc in docs:
+    #         idx = doc['index']
+    #         if idx not in fused_scores:
+    #             fused_scores[idx] = sum(1 / (k + rank) for method_rank in method_ranks.values() for i, rank in method_rank.items() if i == idx)
+    #     reranked_docs = sorted(docs, key=lambda x: fused_scores.get(x['index'], 0), reverse=True)
+    #     for doc in reranked_docs:
+    #         doc['rrf_score'] = fused_scores.get(doc['index'], 0)
+    #     return reranked_docs
+    # def weighted_score_fusion(self, user_query: str, docs: List[dict]) -> List[dict]:
+    #     method_weights = {
+    #         'annoy': 0.3,
+    #         'tfidf': 0.2,
+    #         'bm25': 0.2,
+    #         'word2vec': 0.1,
+    #         'euclidean': 0.1,
+    #         'jaccard': 0.1
+    #     }
+    #     fused_scores = {}
+    #     for doc in docs:
+    #         idx = doc['index']
+    #         if idx not in fused_scores:
+    #             fused_scores[idx] = doc['score'] * method_weights[doc['method']]
+    #         else:
+    #             fused_scores[idx] += doc['score'] * method_weights[doc['method']]
+    #     reranked_docs = sorted(docs, key=lambda x: fused_scores[x['index']], reverse=True)
+    #     for doc in reranked_docs:
+    #         doc['wsf_score'] = fused_scores[doc['index']]
+    #     return reranked_docs
+    # def semantic_similarity_reranking(self, user_query: str, docs: List[dict]) -> List[dict]:
+    #     query_embedding = np.mean([self.word2vec_model.wv[token] for token in user_query.split() if token in self.word2vec_model.wv], axis=0)
+    #     for doc in docs:
+    #         doc_embedding = np.mean([self.word2vec_model.wv[token] for token in doc['text'].split() if token in self.word2vec_model.wv], axis=0)
+    #         doc_embedding = doc_embedding if doc_embedding.shape == query_embedding.shape else np.zeros(query_embedding.shape)
+    #         doc['semantic_score'] = cosine_similarity([query_embedding], [doc_embedding])[0][0]
+    #     return sorted(docs, key=lambda x: x['semantic_score'], reverse=True)
     def build_prompt(self, context: str, user_query: str, response_style: str) -> str:
         styles = {