Spaces:

cicboy
/

AI_Blog_Writer

Sleeping

App Files Files Community

cicboy commited on Oct 14, 2025

Commit

2237728

1 Parent(s): 9ab1b3b

update hybrid_retriever_tool file

Browse files

Files changed (1) hide show

tools/hybrid_retriever_tool.py +36 -0

tools/hybrid_retriever_tool.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pydantic import Field, PrivateAttr
 import os
 from html import unescape
 import re
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
@@ -89,6 +90,38 @@ class HybridRetrieverTool(RagTool):
         all_urls = list(dict.fromkeys(all_urls))[:5]
         return corpus, all_urls
     def _run(self, query: str, top_k: int = 8) -> str:
         """
         Run hybrid search: BM25 + semantic similarity.
@@ -119,6 +152,9 @@ class HybridRetrieverTool(RagTool):
         top_indices= np.argsort(hybrid_scores)[::-1][:top_k]
         top_passages = [corpus[i] for i in top_indices]
         return "\n\n".join(top_passages)
     def summarize_passages(self, topic: str, passages):

 import os
 from html import unescape
 import re
+import json
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
         all_urls = list(dict.fromkeys(all_urls))[:5]
         return corpus, all_urls
+    def _rerank(self, query:str, passages: list[str]) -> list[str]:
+                """Use LLM to rerank received passages for contextual relevance"""
+                try:
+                    prompt = f"""
+                You are a research assistant. Rank the following passages by how relevant they are to the topic:
+                "{query}"
+                Return a JSON array of the top 5 passages(most to least relevant).
+                Passages:
+                {json.dumps(passages, indent=2)}
+                """
+                    response = self. _client.chat.completions.create(
+                        model = "gpt-4o-mini",
+                        messages=[
+                            {"role": "system", "content": "You are an expert re-ranker for information retrieval."},
+                            {"role": "user", "content": prompt}
+                    ],
+                    temperature=0
+                )
+                    content = response.choices[0].message.conten.strip()
+                    try:
+                        ranked = json.loads(response.choices[0].message.content)
+                        # Keep only valid strings
+                        ranked = [p for p in ranked if isinstance(p, str)]
+                        return ranked if ranked else passages
+                    except json.JSONDecodeError:
+                        print("⚠️ Reranker returned non-JSON output, using original order.")
+                except Exception as e:
+                    print(f"Re-ranker failed: {e}")
+                    return passages
     def _run(self, query: str, top_k: int = 8) -> str:
         """
         Run hybrid search: BM25 + semantic similarity.
         top_indices= np.argsort(hybrid_scores)[::-1][:top_k]
         top_passages = [corpus[i] for i in top_indices]
+        #LLM-based re-ranker
+        reranked = self._rerank(query, top_passages)
         return "\n\n".join(top_passages)
     def summarize_passages(self, topic: str, passages):