Spaces:

chinmayjha
/

context-ai

Sleeping

App Files Files Community

context-ai / src /second_brain_online /application /agents /tools /summarizer.py

chinmayjha

Improve RAG agent response quality and UX

150cd80 unverified 2 months ago

raw

history blame

6.68 kB

	from openai import OpenAI
	from opik import track
	from smolagents import Tool

	from second_brain_online.config import settings


	class HuggingFaceEndpointSummarizerTool(Tool):
	name = "huggingface_summarizer"
	description = """Use this tool to summarize a piece of text. Especially useful when you need to summarize a document."""

	inputs = {
	"text": {
	"type": "string",
	"description": """The text to summarize.""",
	}
	}
	output_type = "string"

	SYSTEM_PROMPT = """

	Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	You are a helpful assistant specialized in summarizing documents. Generate a concise TL;DR summary in markdown format having a maximum of 512 characters of the key findings from the provided documents, highlighting the most significant insights

	### Input:
	{content}

	### Response:
	"""

	def __init__(self, args, *kwargs) -> None:
	super().__init__(args, *kwargs)

	assert settings.HUGGINGFACE_ACCESS_TOKEN is not None, (
	"HUGGINGFACE_ACCESS_TOKEN is required to use the dedicated endpoint. Add it to the .env file."
	)
	assert settings.HUGGINGFACE_DEDICATED_ENDPOINT is not None, (
	"HUGGINGFACE_DEDICATED_ENDPOINT is required to use the dedicated endpoint. Add it to the .env file."
	)

	self.__client = OpenAI(
	base_url=settings.HUGGINGFACE_DEDICATED_ENDPOINT,
	api_key=settings.HUGGINGFACE_ACCESS_TOKEN,
	)

	@track
	def forward(self, text: str) -> str:
	result = self.__client.chat.completions.create(
	model="tgi",
	messages=[
	{
	"role": "user",
	"content": self.SYSTEM_PROMPT.format(content=text),
	},
	],
	)

	return result.choices[0].message.content


	class OpenAISummarizerTool(Tool):
	name = "answer_with_sources"
	description = """Use this tool to generate the complete final answer to the user's question based on search results.

	After retrieving documents with mongodb_vector_search_retriever, use this tool to synthesize a comprehensive answer with a Sources section.

	CRITICAL: This tool's output is the complete answer - after getting results from this tool, you MUST call the built-in final_answer tool and pass this output EXACTLY as-is without any modifications."""

	inputs = {
	"search_results": {
	"type": "string",
	"description": """The complete search results from mongodb_vector_search_retriever to analyze and synthesize into an answer. Pass the ENTIRE output from the retriever tool.""",
	}
	}
	output_type = "string"

	SYSTEM_PROMPT = """Based on the search results below, create a comprehensive answer to the user's question.

	{content}

	Create a two-part response:

	1. ANSWER (with inline citations):
	- Focus on the core issues, concerns, or highlights identified
	- DO NOT mention specific customer names or personal identifiers
	- Group related insights by topic with bullet points
	- Be concise and general, highlighting the problem/concern rather than individuals
	- Add INLINE CITATIONS at the end of each point using format: [Doc X]
	- Number each unique document sequentially (Doc 1, Doc 2, etc.)

	Example:
	• Organizations are planning phone number porting transitions, but custom porting is expensive (~$1,000) and should be done in bulk [Doc 1]
	• Questions about additional license requirements for integrations ($45 per user) [Doc 1]
	• Ringtone volume issues in embedded Salesforce app [Doc 2]

	2. 📚 Sources (at the end):
	- List ONLY UNIQUE documents (de-duplicate by Document ID)
	- Number each unique source to match the inline citations (Doc 1, Doc 2, etc.)
	- Format URLs as markdown links: [View Chat](url) or [View Recording](url)

	For EACH unique document, use this EXACT structure with proper spacing and NO bold/italic formatting:

	Doc X: [Title (Date)]
	Source: [Type] \| Document ID: [ID] \| [Hyperlinked URL if available] \| [User ID if available]

	Summary: [One-line summary of the conversation]

	Key Findings:
	- [Type/Impact] Finding text here
	- [Type/Impact] Finding text here

	Example:

	Doc 1: JustCall Checkin (2025-10-07)
	Source: Justcall Meeting Recordings \| Document ID: 4f6f9cee4f

	Summary: Discussion about phone number porting timeline and costs

	Key Findings:
	- [Technical Issue/High] Custom porting is expensive at $1,000 per request
	- [Feature Request/Medium] Need bulk porting option to reduce costs

	Doc 2: Intercom Conversation (2025-10-05)
	Source: Intercom Chats \| Document ID: 7a6678783fea06d \| [View Chat](https://app.intercom.com/...) \| User ID: 432830

	Summary: Customer requesting billing discount due to service interruption

	Key Findings:
	- [Pricing Concern/High] Request for discount due to porting delays
	- [Policy Gap/Medium] No current policy for inactivity-based discounts

	Provide a focused answer with inline citations followed by the well-formatted Sources section with conversation insights."""

	def __init__(self, args, *kwargs) -> None:
	super().__init__(args, *kwargs)

	self.__client = OpenAI(
	base_url="https://api.openai.com/v1",
	api_key=settings.OPENAI_API_KEY,
	)

	def forward(self, search_results: str) -> str:
	"""Generate final answer with sources based on search results.

	Args:
	search_results: The complete search results to analyze (includes the original query)

	Returns:
	Complete answer with Sources section
	"""

	result = self.__client.chat.completions.create(
	model=settings.OPENAI_MODEL_ID,
	messages=[
	{
	"role": "system",
	"content": "You are an expert analyst. Answer the user's question based on the search results provided. Create a comprehensive answer with a Sources section."
	},
	{
	"role": "user",
	"content": self.SYSTEM_PROMPT.format(content=search_results),
	},
	],
	temperature=0.0, # Deterministic output
	max_tokens=1500, # Reduced for faster response
	timeout=45.0, # Reduced timeout
	)

	return result.choices[0].message.content