Spaces:

cicboy
/

AI_Blog_Writer

Sleeping

cicboy commited on Oct 14, 2025

Commit

bb696dc

1 Parent(s): 5f02fec

update hybrid_retriever_tool and add pydantic as a dependency

Files changed (1) hide show

tools/hybrid_retriever_tool.py CHANGED Viewed

@@ -4,30 +4,27 @@ from sentence_transformers import SentenceTransformer
 from tavily import TavilyClient
 from openai import OpenAI
 from crewai_tools import RagTool
 import os
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
     description: str = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
-    def __init__(self, alpha=0.6):
-        super().__init__()
-        self.alpha = alpha
         self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
-        self.tavily = TavilyClient(api_key=os.getenv("TAVILITY_API_KEY"))
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     def _build_corpus(self, topic):
         """Fetch up-to-date search results."""
         results = self.tavily.search(query=topic, max_results=30)
-        corpus = []
-        for r in results.get("results", []):
-            content = r.get("content") or ""
-            if len(content.strip()) > 0:
-                corpus.append(content)
         return corpus
-    def _run(self, query: str, top_k=8) -> str:
         """
         Run hybrid search: BM25 + semantic similarity.
         """
@@ -37,7 +34,7 @@ class HybridRetrieverTool(RagTool):
         # Lexical relevance
         bm25 = BM25Okapi([doc.split() for doc in corpus])
-        bm25_scores = np.array(bm25.get(query.split()))
         # semantic relevance
         emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)

 from tavily import TavilyClient
 from openai import OpenAI
 from crewai_tools import RagTool
+from pydantic import Field
 import os
 class HybridRetrieverTool(RagTool):
     name: str = "Hybrid Retriever Tool"
     description: str = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
+    alpha: float = Field(default=0.6, description="Weight between semantic and lexical scores")
+    def __init__(self, **data):
+        super().__init__(**data)
         self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
+        self.tavily = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
     def _build_corpus(self, topic):
         """Fetch up-to-date search results."""
         results = self.tavily.search(query=topic, max_results=30)
+        corpus = [r.get("content", "").strip() for r in results.get("results", []) if r.get("content")]
         return corpus
+    def _run(self, query: str, top_k: int = 8) -> str:
         """
         Run hybrid search: BM25 + semantic similarity.
         """
         # Lexical relevance
         bm25 = BM25Okapi([doc.split() for doc in corpus])
+        bm25_scores = np.array(bm25.get_scores(query.split()))
         # semantic relevance
         emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)