Spaces:
Sleeping
Sleeping
File size: 5,061 Bytes
b27eb78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
from openai import OpenAI
from opik import track
from smolagents import Tool
from second_brain_online.config import settings
class HuggingFaceEndpointSummarizerTool(Tool):
name = "huggingface_summarizer"
description = """Use this tool to summarize a piece of text. Especially useful when you need to summarize a document."""
inputs = {
"text": {
"type": "string",
"description": """The text to summarize.""",
}
}
output_type = "string"
SYSTEM_PROMPT = """
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
You are a helpful assistant specialized in summarizing documents. Generate a concise TL;DR summary in markdown format having a maximum of 512 characters of the key findings from the provided documents, highlighting the most significant insights
### Input:
{content}
### Response:
"""
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
assert settings.HUGGINGFACE_ACCESS_TOKEN is not None, (
"HUGGINGFACE_ACCESS_TOKEN is required to use the dedicated endpoint. Add it to the .env file."
)
assert settings.HUGGINGFACE_DEDICATED_ENDPOINT is not None, (
"HUGGINGFACE_DEDICATED_ENDPOINT is required to use the dedicated endpoint. Add it to the .env file."
)
self.__client = OpenAI(
base_url=settings.HUGGINGFACE_DEDICATED_ENDPOINT,
api_key=settings.HUGGINGFACE_ACCESS_TOKEN,
)
@track
def forward(self, text: str) -> str:
result = self.__client.chat.completions.create(
model="tgi",
messages=[
{
"role": "user",
"content": self.SYSTEM_PROMPT.format(content=text),
},
],
)
return result.choices[0].message.content
class OpenAISummarizerTool(Tool):
name = "openai_summarizer"
description = """Use this tool to summarize search results in XML format. This tool is especially useful when you need to analyze multiple documents from search results. The tool will parse XML search results, identify topics that are directly relevant to the user's query, and create a focused summary with document references. It filters out irrelevant topics to ensure the summary directly answers the user's question."""
inputs = {
"text": {
"type": "string",
"description": """The text to summarize.""",
}
}
output_type = "string"
SYSTEM_PROMPT = """You are an expert document analyst specialized in query-focused summarization.
Your task is to analyze search results and create a focused summary that directly answers the user's question.
When you receive XML search results, you should:
1. Parse ALL documents from the XML structure
2. Identify topics that are directly relevant to the user's query
3. Filter out irrelevant topics that don't relate to the question
4. Group related information by relevant topics
5. Extract key insights that directly answer the user's question
6. Include document references with titles and dates when available
Analysis Guidelines:
- Focus on information that directly answers the user's question
- Only include topics that are relevant to the query
- Use specific document titles and dates from the XML metadata when available
- Ignore irrelevant information like cookie policies, privacy policies, HTTP errors, etc.
- Create a well-structured, readable summary
- Group similar topics together when appropriate
Document content:
{content}
Generate a focused summary that directly answers the user's question, organized by relevant topics with document references. Exclude any topics that don't directly relate to the question."""
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.__client = OpenAI(
base_url="https://api.openai.com/v1",
api_key=settings.OPENAI_API_KEY,
)
@track
def forward(self, text: str) -> str:
result = self.__client.chat.completions.create(
model=settings.OPENAI_MODEL_ID,
messages=[
{
"role": "system",
"content": "You are an expert document analyst specialized in query-focused topic-based summarization. You excel at parsing XML search results, identifying relevant topics, and creating structured summaries with proper document references."
},
{
"role": "user",
"content": self.SYSTEM_PROMPT.format(content=text),
},
],
temperature=0.1, # Lower temperature for more consistent, focused output
max_tokens=2000, # Increased token limit for more detailed summaries
)
return result.choices[0].message.content
|