Spaces:

cicboy
/

AI_Blog_Writer

Sleeping

App Files Files Community

cicboy commited on Oct 14, 2025

Commit

f158f29

1 Parent(s): 8b28e9a

update hybrid_retriever_tool file

Browse files

Files changed (1) hide show

tools/hybrid_retriever_tool.py +3 -38

tools/hybrid_retriever_tool.py CHANGED Viewed

@@ -8,7 +8,6 @@ from pydantic import Field, PrivateAttr
 import os
 from html import unescape
 import re
-import json
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
@@ -88,39 +87,8 @@ class HybridRetrieverTool(RagTool):
         #Deduplicate and keep top unique URLs
         all_urls = list(dict.fromkeys(all_urls))[:5]
-        return corpus, all_urls
-    def _rerank(self, query:str, passages: list[str]) -> list[str]:
-                """Use LLM to rerank received passages for contextual relevance"""
-                try:
-                    prompt = f"""
-                You are a research assistant. Rank the following passages by how relevant they are to the topic:
-                "{query}"
-                Return a JSON array of the top 5 passages(most to least relevant).
-                Passages:
-                {json.dumps(passages, indent=2)}
-                """
-                    response = self. _client.chat.completions.create(
-                        model = "gpt-4o-mini",
-                        messages=[
-                            {"role": "system", "content": "You are an expert re-ranker for information retrieval."},
-                            {"role": "user", "content": prompt}
-                    ],
-                    temperature=0
-                )
-                    content = response.choices[0].message.content.strip()
-                    try:
-                        ranked = json.loads(response.choices[0].message.content)
-                        # Keep only valid strings
-                        ranked = [p for p in ranked if isinstance(p, str)]
-                        return ranked if ranked else passages
-                    except json.JSONDecodeError:
-                        print("⚠️ Reranker returned non-JSON output, using original order.")
-                except Exception as e:
-                    print(f"Re-ranker failed: {e}")
-                    return passages
     def _run(self, query: str, top_k: int = 8) -> str:
         """
@@ -152,9 +120,6 @@ class HybridRetrieverTool(RagTool):
         top_indices= np.argsort(hybrid_scores)[::-1][:top_k]
         top_passages = [corpus[i] for i in top_indices]
-        #LLM-based re-ranker
-        reranked = self._rerank(query, top_passages)
         return "\n\n".join(top_passages)
     def summarize_passages(self, topic: str, passages):
@@ -214,7 +179,7 @@ Return output in Markdown format.
             summary = response.choices[0].message.content.strip()
             if unique_urls:
-                summary += "\n\n**Sources:**\n" + "\n".join(f"- {u}" for u in unique_urls)
             return summary

 import os
 from html import unescape
 import re
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
         #Deduplicate and keep top unique URLs
         all_urls = list(dict.fromkeys(all_urls))[:5]
+        print(f"[HybridRetrieverTool] Retrieved {len(corpus)} docs, {len(all_urls)} unique URLs for '{topic}'")
+        return corpus, all_urls
     def _run(self, query: str, top_k: int = 8) -> str:
         """
         top_indices= np.argsort(hybrid_scores)[::-1][:top_k]
         top_passages = [corpus[i] for i in top_indices]
         return "\n\n".join(top_passages)
     def summarize_passages(self, topic: str, passages):
             summary = response.choices[0].message.content.strip()
             if unique_urls:
+                summary += "\n\n**Sources:**\n" + "\n".join(f"- {u}" for u in unique_urls) + "\n"
             return summary