Spaces:
Sleeping
Sleeping
update hybrid_retriever_tool file
Browse files
tools/hybrid_retriever_tool.py
CHANGED
|
@@ -6,6 +6,7 @@ from openai import OpenAI
|
|
| 6 |
from crewai_tools import RagTool
|
| 7 |
from pydantic import Field, PrivateAttr
|
| 8 |
import os
|
|
|
|
| 9 |
|
| 10 |
class HybridRetrieverTool(RagTool):
|
| 11 |
name: str = "Hybrid Retriever Tool"
|
|
@@ -60,12 +61,28 @@ class HybridRetrieverTool(RagTool):
|
|
| 60 |
def summarize_passages(self, topic: str, passages):
|
| 61 |
if isinstance(passages, str):
|
| 62 |
passages = [passages]
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
response = self._client.chat.completions.create(
|
| 66 |
model="gpt-4o-mini",
|
| 67 |
messages=[
|
| 68 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
{"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
|
| 70 |
],
|
| 71 |
temperature=0.3
|
|
|
|
| 6 |
from crewai_tools import RagTool
|
| 7 |
from pydantic import Field, PrivateAttr
|
| 8 |
import os
|
| 9 |
+
import re
|
| 10 |
|
| 11 |
class HybridRetrieverTool(RagTool):
|
| 12 |
name: str = "Hybrid Retriever Tool"
|
|
|
|
| 61 |
def summarize_passages(self, topic: str, passages):
|
| 62 |
if isinstance(passages, str):
|
| 63 |
passages = [passages]
|
| 64 |
+
# 🧹 Clean each passage (remove links, HTML tags, redundant whitespace)
|
| 65 |
+
clean_passages = []
|
| 66 |
+
for p in passages:
|
| 67 |
+
p = re.sub(r"http\S+", "", p) # remove URLs
|
| 68 |
+
p = re.sub(r"\s+", " ", p).strip() # normalize spaces
|
| 69 |
+
p = re.sub(r"[^A-Za-z0-9.,!?;:()\-\s]", "", p) # strip stray symbols
|
| 70 |
+
clean_passages.append(p)
|
| 71 |
+
# Build condensed input (limit total tokens)
|
| 72 |
+
text_block = " ".join(clean_passages[:5])[:4000]
|
| 73 |
try:
|
| 74 |
response = self._client.chat.completions.create(
|
| 75 |
model="gpt-4o-mini",
|
| 76 |
messages=[
|
| 77 |
+
{
|
| 78 |
+
"role": "system",
|
| 79 |
+
"content": (
|
| 80 |
+
"You are a concise research summarizer. "
|
| 81 |
+
"Produce a 1–2 paragraph overview that highlights key facts, "
|
| 82 |
+
"themes, and findings relevant to the topic. "
|
| 83 |
+
"Exclude URLs, lists, HTML remnants, or boilerplate text."
|
| 84 |
+
),
|
| 85 |
+
},
|
| 86 |
{"role": "user", "content": f"Summarize these passages about {topic}:\n\n{text_block}"}
|
| 87 |
],
|
| 88 |
temperature=0.3
|