File size: 5,061 Bytes
b27eb78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from openai import OpenAI
from opik import track
from smolagents import Tool

from second_brain_online.config import settings


class HuggingFaceEndpointSummarizerTool(Tool):
    name = "huggingface_summarizer"
    description = """Use this tool to summarize a piece of text. Especially useful when you need to summarize a document."""

    inputs = {
        "text": {
            "type": "string",
            "description": """The text to summarize.""",
        }
    }
    output_type = "string"

    SYSTEM_PROMPT = """
    
    Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    You are a helpful assistant specialized in summarizing documents. Generate a concise TL;DR summary in markdown format having a maximum of 512 characters of the key findings from the provided documents, highlighting the most significant insights

    ### Input:
    {content}

    ### Response:
    """

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

        assert settings.HUGGINGFACE_ACCESS_TOKEN is not None, (
            "HUGGINGFACE_ACCESS_TOKEN is required to use the dedicated endpoint. Add it to the .env file."
        )
        assert settings.HUGGINGFACE_DEDICATED_ENDPOINT is not None, (
            "HUGGINGFACE_DEDICATED_ENDPOINT is required to use the dedicated endpoint. Add it to the .env file."
        )

        self.__client = OpenAI(
            base_url=settings.HUGGINGFACE_DEDICATED_ENDPOINT,
            api_key=settings.HUGGINGFACE_ACCESS_TOKEN,
        )

    @track
    def forward(self, text: str) -> str:
        result = self.__client.chat.completions.create(
            model="tgi",
            messages=[
                {
                    "role": "user",
                    "content": self.SYSTEM_PROMPT.format(content=text),
                },
            ],
        )

        return result.choices[0].message.content


class OpenAISummarizerTool(Tool):
    name = "openai_summarizer"
    description = """Use this tool to summarize search results in XML format. This tool is especially useful when you need to analyze multiple documents from search results. The tool will parse XML search results, identify topics that are directly relevant to the user's query, and create a focused summary with document references. It filters out irrelevant topics to ensure the summary directly answers the user's question."""

    inputs = {
        "text": {
            "type": "string",
            "description": """The text to summarize.""",
        }
    }
    output_type = "string"

    SYSTEM_PROMPT = """You are an expert document analyst specialized in query-focused summarization.

Your task is to analyze search results and create a focused summary that directly answers the user's question.

When you receive XML search results, you should:
1. Parse ALL documents from the XML structure
2. Identify topics that are directly relevant to the user's query
3. Filter out irrelevant topics that don't relate to the question
4. Group related information by relevant topics
5. Extract key insights that directly answer the user's question
6. Include document references with titles and dates when available

Analysis Guidelines:
- Focus on information that directly answers the user's question
- Only include topics that are relevant to the query
- Use specific document titles and dates from the XML metadata when available
- Ignore irrelevant information like cookie policies, privacy policies, HTTP errors, etc.
- Create a well-structured, readable summary
- Group similar topics together when appropriate

Document content:
{content}

Generate a focused summary that directly answers the user's question, organized by relevant topics with document references. Exclude any topics that don't directly relate to the question."""

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

        self.__client = OpenAI(
            base_url="https://api.openai.com/v1",
            api_key=settings.OPENAI_API_KEY,
        )

    @track
    def forward(self, text: str) -> str:
        result = self.__client.chat.completions.create(
            model=settings.OPENAI_MODEL_ID,
            messages=[
                {
                    "role": "system",
                    "content": "You are an expert document analyst specialized in query-focused topic-based summarization. You excel at parsing XML search results, identifying relevant topics, and creating structured summaries with proper document references."
                },
                {
                    "role": "user",
                    "content": self.SYSTEM_PROMPT.format(content=text),
                },
            ],
            temperature=0.1,  # Lower temperature for more consistent, focused output
            max_tokens=2000,  # Increased token limit for more detailed summaries
        )

        return result.choices[0].message.content