Spaces:

cicboy
/

AI_Blog_Writer

Sleeping

App Files Files Community

cicboy commited on Oct 14, 2025

Commit

5f02fec

1 Parent(s): f7a28da

update hybrid_retriever_tool file

Browse files

Files changed (1) hide show

tools/hybrid_retriever_tool.py +8 -5

tools/hybrid_retriever_tool.py CHANGED Viewed

@@ -7,10 +7,11 @@ from crewai_tools import RagTool
 import os
 class HybridRetrieverTool(RagTool):
-    name = "Hybrid Retriever Tool"
-    description = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
     def __init__(self, alpha=0.6):
         self.alpha = alpha
         self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
         self.tavily = TavilyClient(api_key=os.getenv("TAVILITY_API_KEY"))
@@ -26,17 +27,19 @@ class HybridRetrieverTool(RagTool):
                 corpus.append(content)
         return corpus
-    def _run(self, query, top_k=8):
         """
         Run hybrid search: BM25 + semantic similarity.
         """
         corpus = self._build_corpus(query)
         if not corpus:
             return "No relevant content found."
         bm25 = BM25Okapi([doc.split() for doc in corpus])
         bm25_scores = np.array(bm25.get(query.split()))
         emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)
         emb_query = self.embedder.encode(query, convert_to_numpy=True, normalize_embeddings=True)
         sem_scores = np.dot(emb_corpus, emb_query)
@@ -61,7 +64,7 @@ class HybridRetrieverTool(RagTool):
                 model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": "You are an expert summarizer."},
-                    {"role": "user", "content": f"Summarize these passages about {topic}"}
                 ],
                 temperature=0.3
             )

 import os
 class HybridRetrieverTool(RagTool):
+    name: str = "Hybrid Retriever Tool"
+    description: str = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
     def __init__(self, alpha=0.6):
+        super().__init__()
         self.alpha = alpha
         self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
         self.tavily = TavilyClient(api_key=os.getenv("TAVILITY_API_KEY"))
                 corpus.append(content)
         return corpus
+    def _run(self, query: str, top_k=8) -> str:
         """
         Run hybrid search: BM25 + semantic similarity.
         """
         corpus = self._build_corpus(query)
         if not corpus:
             return "No relevant content found."
+        # Lexical relevance
         bm25 = BM25Okapi([doc.split() for doc in corpus])
         bm25_scores = np.array(bm25.get(query.split()))
+        # semantic relevance
         emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)
         emb_query = self.embedder.encode(query, convert_to_numpy=True, normalize_embeddings=True)
         sem_scores = np.dot(emb_corpus, emb_query)
                 model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": "You are an expert summarizer."},
+                    {"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
                 ],
                 temperature=0.3
             )