cicboy commited on
Commit
5f02fec
·
1 Parent(s): f7a28da

update hybrid_retriever_tool file

Browse files
Files changed (1) hide show
  1. tools/hybrid_retriever_tool.py +8 -5
tools/hybrid_retriever_tool.py CHANGED
@@ -7,10 +7,11 @@ from crewai_tools import RagTool
7
  import os
8
 
9
  class HybridRetrieverTool(RagTool):
10
- name = "Hybrid Retriever Tool"
11
- description = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
12
 
13
  def __init__(self, alpha=0.6):
 
14
  self.alpha = alpha
15
  self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
16
  self.tavily = TavilyClient(api_key=os.getenv("TAVILITY_API_KEY"))
@@ -26,17 +27,19 @@ class HybridRetrieverTool(RagTool):
26
  corpus.append(content)
27
  return corpus
28
 
29
- def _run(self, query, top_k=8):
30
  """
31
  Run hybrid search: BM25 + semantic similarity.
32
  """
33
  corpus = self._build_corpus(query)
34
  if not corpus:
35
  return "No relevant content found."
36
-
 
37
  bm25 = BM25Okapi([doc.split() for doc in corpus])
38
  bm25_scores = np.array(bm25.get(query.split()))
39
 
 
40
  emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)
41
  emb_query = self.embedder.encode(query, convert_to_numpy=True, normalize_embeddings=True)
42
  sem_scores = np.dot(emb_corpus, emb_query)
@@ -61,7 +64,7 @@ class HybridRetrieverTool(RagTool):
61
  model="gpt-4o-mini",
62
  messages=[
63
  {"role": "system", "content": "You are an expert summarizer."},
64
- {"role": "user", "content": f"Summarize these passages about {topic}"}
65
  ],
66
  temperature=0.3
67
  )
 
7
  import os
8
 
9
  class HybridRetrieverTool(RagTool):
10
+ name: str = "Hybrid Retriever Tool"
11
+ description: str = "Combines BM25 keyword scoring with semantic similarity for hybrid retrieval"
12
 
13
  def __init__(self, alpha=0.6):
14
+ super().__init__()
15
  self.alpha = alpha
16
  self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
17
  self.tavily = TavilyClient(api_key=os.getenv("TAVILITY_API_KEY"))
 
27
  corpus.append(content)
28
  return corpus
29
 
30
+ def _run(self, query: str, top_k=8) -> str:
31
  """
32
  Run hybrid search: BM25 + semantic similarity.
33
  """
34
  corpus = self._build_corpus(query)
35
  if not corpus:
36
  return "No relevant content found."
37
+
38
+ # Lexical relevance
39
  bm25 = BM25Okapi([doc.split() for doc in corpus])
40
  bm25_scores = np.array(bm25.get(query.split()))
41
 
42
+ # semantic relevance
43
  emb_corpus = self.embedder.encode(corpus, convert_to_numpy=True, normalize_embeddings=True)
44
  emb_query = self.embedder.encode(query, convert_to_numpy=True, normalize_embeddings=True)
45
  sem_scores = np.dot(emb_corpus, emb_query)
 
64
  model="gpt-4o-mini",
65
  messages=[
66
  {"role": "system", "content": "You are an expert summarizer."},
67
+ {"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
68
  ],
69
  temperature=0.3
70
  )