cicboy commited on
Commit
8ec15d6
·
1 Parent(s): 506506f

update hybrid_retriever_tool file

Browse files
Files changed (1) hide show
  1. tools/hybrid_retriever_tool.py +19 -2
tools/hybrid_retriever_tool.py CHANGED
@@ -6,6 +6,7 @@ from openai import OpenAI
6
  from crewai_tools import RagTool
7
  from pydantic import Field, PrivateAttr
8
  import os
 
9
 
10
  class HybridRetrieverTool(RagTool):
11
  name: str = "Hybrid Retriever Tool"
@@ -60,12 +61,28 @@ class HybridRetrieverTool(RagTool):
60
  def summarize_passages(self, topic: str, passages):
61
  if isinstance(passages, str):
62
  passages = [passages]
63
- text_block = "\n".join(passages)
 
 
 
 
 
 
 
 
64
  try:
65
  response = self._client.chat.completions.create(
66
  model="gpt-4o-mini",
67
  messages=[
68
- {"role": "system", "content": "You are an expert summarizer."},
 
 
 
 
 
 
 
 
69
  {"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
70
  ],
71
  temperature=0.3
 
6
  from crewai_tools import RagTool
7
  from pydantic import Field, PrivateAttr
8
  import os
9
+ import re
10
 
11
  class HybridRetrieverTool(RagTool):
12
  name: str = "Hybrid Retriever Tool"
 
61
  def summarize_passages(self, topic: str, passages):
62
  if isinstance(passages, str):
63
  passages = [passages]
64
+ # 🧹 Clean each passage (remove links, HTML tags, redundant whitespace)
65
+ clean_passages = []
66
+ for p in passages:
67
+ p = re.sub(r"http\S+", "", p) # remove URLs
68
+ p = re.sub(r"\s+", " ", p).strip() # normalize spaces
69
+ p = re.sub(r"[^A-Za-z0-9.,!?;:()\-\s]", "", p) # strip stray symbols
70
+ clean_passages.append(p)
71
+ # Build condensed input (limit total tokens)
72
+ text_block = " ".join(clean_passages[:5])[:4000]
73
  try:
74
  response = self._client.chat.completions.create(
75
  model="gpt-4o-mini",
76
  messages=[
77
+ {
78
+ "role": "system",
79
+ "content": (
80
+ "You are a concise research summarizer. "
81
+ "Produce a 1–2 paragraph overview that highlights key facts, "
82
+ "themes, and findings relevant to the topic. "
83
+ "Exclude URLs, lists, HTML remnants, or boilerplate text."
84
+ ),
85
+ },
86
  {"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
87
  ],
88
  temperature=0.3