rohitchandra commited on
Commit
d6ea378
·
1 Parent(s): 597af3c

tract pdfs with git lfs

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .env
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11.8
README.md CHANGED
@@ -6,6 +6,7 @@ colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.44.1
8
  app_file: app.py
 
9
  pinned: false
10
  license: mit
11
  ---
 
6
  sdk: gradio
7
  sdk_version: 5.44.1
8
  app_file: app.py
9
+ python_version: 3.11.8
10
  pinned: false
11
  license: mit
12
  ---
agents/__init__.py ADDED
File without changes
agents/factory.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Factory for creating agent implementations.
2
+
3
+ This module contains factory methods for creating the appropriate chat implementation
4
+ based on the selected agent mode.
5
+ """
6
+
7
+ from enum import Enum
8
+ from nexus_ai.core.chat_interface import ChatInterface
9
+ from nexus_ai.agents.research_agent import DeepResearchChat
10
+ from nexus_ai.agents.rag_agent import AgenticRAGChat
11
+ from nexus_ai.agents.tool_agent import ToolUsingAgentChat
12
+ from nexus_ai.agents.unified_chat import UnifiedChat
13
+
14
+
15
+ class AgentMode(Enum):
16
+ """Enum for different agent implementation modes."""
17
+ TOOL_AGENT = "tool"
18
+ RAG_AGENT = "rag"
19
+ RESEARCH_AGENT = "research"
20
+ UNIFIED = "unified" # Default unified mode
21
+
22
+
23
+ def create_chat_implementation(mode: AgentMode = AgentMode.UNIFIED) -> ChatInterface:
24
+ """Create a chat implementation for the specified agent mode.
25
+
26
+ Args:
27
+ mode: Which agent implementation to use (defaults to UNIFIED)
28
+
29
+ Returns:
30
+ ChatInterface: The initialized chat implementation
31
+ """
32
+ if mode == AgentMode.TOOL_AGENT:
33
+ return ToolUsingAgentChat()
34
+ elif mode == AgentMode.RAG_AGENT:
35
+ return AgenticRAGChat()
36
+ elif mode == AgentMode.RESEARCH_AGENT:
37
+ return DeepResearchChat()
38
+ else:
39
+ # Default to unified mode
40
+ return UnifiedChat()
agents/prompts.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prompts for various agents in the Nexus AI system.
2
+
3
+ This module contains all the prompt templates used by different agents
4
+ for their specific tasks.
5
+ """
6
+
7
+ from langchain_core.prompts import PromptTemplate
8
+
9
+ # ============================================================================
10
+ # RAG Agent Prompts
11
+ # ============================================================================
12
+
13
+ DOCUMENT_EVALUATOR_PROMPT = PromptTemplate.from_template(
14
+ """
15
+ You are a grader assessing relevance and completeness of retrieved documents
16
+ to answer a user question.
17
+
18
+ Here is the user question: {question}
19
+
20
+ Here are the retrieved documents:
21
+ {retrieved_docs}
22
+
23
+ Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
24
+ If the document contains keyword(s) or semantic meaning related to the user question, and is useful
25
+ to answer the user question, grade it as relevant.
26
+
27
+ If the answer is NO, then provide feedback on what information is missing from the document and
28
+ what additional information is needed.
29
+ """
30
+ )
31
+
32
+ DOCUMENT_SYNTHESIZER_PROMPT = PromptTemplate.from_template(
33
+ """
34
+ You are a document synthesizer. Create a comprehensive answer using
35
+ the retrieved documents. Focus on accuracy and clarity.
36
+
37
+ Here is the user question: {question}
38
+
39
+ Here are the retrieved documents:
40
+ {retrieved_docs}
41
+
42
+ Provide a detailed and accurate answer based solely on the information in the documents.
43
+ If the documents don't contain enough information to fully answer the question,
44
+ clearly state what information is available and what is missing.
45
+ """
46
+ )
47
+
48
+ QUERY_REWRITER_PROMPT = PromptTemplate.from_template(
49
+ """
50
+ You are a query rewriter. Rewrite the user question based on the feedback.
51
+ The new query should maintain the same semantic meaning as the original
52
+ query but augment it with more specific information to improve retrieval.
53
+
54
+ The new query should not be very long - it should be a single sentence since
55
+ it'll be used to query the vector database or a web search.
56
+
57
+ Here is the user question: {question}
58
+ Here is the previously retrieved documents: {retrieved_docs}
59
+ Here is the feedback: {feedback}
60
+
61
+ New query:
62
+ """
63
+ )
64
+
65
+ # ============================================================================
66
+ # Deep Research Agent Prompts
67
+ # ============================================================================
68
+
69
+ RESEARCH_MANAGER_PROMPT = PromptTemplate.from_template(
70
+ """
71
+ You are a Research Manager responsible for planning comprehensive research reports.
72
+
73
+ Your task is to:
74
+ 1. Take a broad research topic
75
+ 2. Break it down into 3-5 specific research questions/sections
76
+ 3. Create a research plan with a clear structure
77
+
78
+ For each research question, provide:
79
+ - A clear title
80
+ - A description of what should be researched
81
+
82
+ DO NOT conduct the actual research. You are only planning the structure.
83
+
84
+ The report structure should follow:
85
+ - Executive Summary
86
+ - Key Findings
87
+ - Detailed Analysis (sections for each research question)
88
+ - Limitations and Further Research
89
+
90
+ Return your answer as a structured research plan.
91
+
92
+ Research Topic: {topic}
93
+ """
94
+ )
95
+
96
+ RESEARCH_SPECIALIST_PROMPT = PromptTemplate.from_template(
97
+ """
98
+ You are a Specialized Research Agent responsible for thoroughly researching a specific topic section.
99
+
100
+ Process:
101
+ 1. Analyze the research question and description
102
+ 2. Generate effective search queries to gather information
103
+ 3. Use the web_search tool to find relevant information
104
+ 4. Synthesize findings into a comprehensive section
105
+ 5. Include proper citations to your sources
106
+
107
+ Your response should be:
108
+ - Thorough (at least 500 words)
109
+ - Well-structured with subsections
110
+ - Based on factual information (not made up)
111
+ - Include proper citations to sources
112
+
113
+ Always critically evaluate information and ensure you cover the topic comprehensively.
114
+
115
+ Research Question: {question}
116
+ Description: {description}
117
+ """
118
+ )
119
+
120
+ REPORT_FINALIZER_PROMPT = PromptTemplate.from_template(
121
+ """
122
+ You are a Report Finalizer responsible for completing a research report.
123
+
124
+ Based on the detailed analysis sections that have been researched, you need to generate:
125
+
126
+ 1. Executive Summary (Brief overview of the entire report, ~150 words)
127
+ 2. Key Findings (3-5 most important insights, in bullet points)
128
+ 3. Limitations and Further Research (Identify gaps and suggest future areas of study)
129
+
130
+ Your content should be:
131
+ - Concise and clear
132
+ - Properly formatted
133
+ - Based strictly on the researched content
134
+
135
+ Do not introduce new information not found in the research.
136
+
137
+ Research Topic: {topic}
138
+
139
+ Detailed Analysis Sections:
140
+ {detailed_analysis}
141
+
142
+ Generate the Executive Summary, Key Findings, and Limitations sections to complete the report.
143
+ """
144
+ )
145
+
146
+ # ============================================================================
147
+ # Tool Agent Prompts (if needed in the future)
148
+ # ============================================================================
149
+
150
+ TOOL_SELECTION_PROMPT = PromptTemplate.from_template(
151
+ """
152
+ You are an intelligent assistant with access to various tools.
153
+ Based on the user's query, select and use the appropriate tool(s) to provide an accurate response.
154
+
155
+ Available tools:
156
+ - Calculator: For mathematical computations
157
+ - DateTime: For date and time related queries
158
+ - Weather: For weather information
159
+
160
+ User Query: {query}
161
+
162
+ Think step by step about which tool(s) to use and how to best answer the query.
163
+ """
164
+ )
165
+
166
+ # ============================================================================
167
+ # Query Classification Prompt (used in unified_chat.py)
168
+ # ============================================================================
169
+
170
+ QUERY_CLASSIFIER_PROMPT = PromptTemplate.from_template(
171
+ """
172
+ You are a query classifier that determines which system should handle a user's query.
173
+
174
+ Analyze the user's query and classify it into one of these categories:
175
+
176
+ 1. SIMPLE_TOOL - Use for:
177
+ - Mathematical calculations or expressions
178
+ - Date/time queries
179
+ - Weather queries
180
+ - Any query that can be answered with a simple tool call
181
+
182
+ 2. AGENTIC_RAG - Use for:
183
+ - Questions about specific documents
184
+ - Queries requiring document retrieval
185
+ - Questions about content from your knowledge base
186
+
187
+ 3. DEEP_RESEARCH - Use for:
188
+ - Requests for comprehensive research or analysis
189
+ - Topics requiring multiple sources and detailed investigation
190
+ - Keywords: "deep dive", "comprehensive analysis", "research", "detailed report"
191
+
192
+ 4. GENERAL - Use for:
193
+ - General conversation and questions
194
+ - Simple factual queries
195
+ - Anything that doesn't fit the above categories
196
+
197
+ Return ONLY one of these exact words: SIMPLE_TOOL, AGENTIC_RAG, DEEP_RESEARCH, or GENERAL
198
+
199
+ User Query: {query}
200
+
201
+ Classification:
202
+ """
203
+ )
agents/rag_agent.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agentic RAG implementation.
2
+
3
+ This implementation focuses on:
4
+ - Building an Agentic RAG system with dynamic search strategy
5
+ - Using LangGraph for controlling the RAG workflow
6
+ - Evaluating retrieved information quality
7
+ """
8
+
9
+ import os.path as osp
10
+ from typing import Dict, List, Optional, Any
11
+ from langchain_core.messages import HumanMessage, AIMessage
12
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
13
+ from langchain_core.documents import Document
14
+ from pydantic import BaseModel, Field
15
+
16
+ from langgraph.graph import StateGraph, END, START, MessagesState
17
+ from langgraph.graph.message import add_messages
18
+
19
+ # For document retrieval
20
+ from langchain_core.vectorstores import InMemoryVectorStore
21
+ from langchain_community.tools.tavily_search import TavilySearchResults
22
+ from langchain_core.tools import tool
23
+ from langchain_core.tools.retriever import create_retriever_tool
24
+ from langchain_community.document_loaders import PyPDFLoader
25
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
26
+ from core.chat_interface import ChatInterface
27
+ from agents.prompts import (
28
+ DOCUMENT_EVALUATOR_PROMPT,
29
+ DOCUMENT_SYNTHESIZER_PROMPT,
30
+ QUERY_REWRITER_PROMPT,
31
+ )
32
+ from langchain_core.prompts import PromptTemplate
33
+ from langgraph.prebuilt import ToolNode, tools_condition
34
+ from dotenv import load_dotenv
35
+ load_dotenv()
36
+
37
+ # NOTE: Update this to the path of your documents
38
+ # For Hugging Face Spaces, use: "/home/user/app/documents/"
39
+ # For local development, use your local path
40
+ BASE_DIR = "data/"
41
+
42
+ # Add your document files here
43
+ FILE_PATHS = [
44
+ # Example for OPM documents (you can replace with your own documents):
45
+ osp.join(BASE_DIR, "2019-annual-performance-report.pdf"),
46
+ osp.join(BASE_DIR, "2020-annual-performance-report.pdf"),
47
+ osp.join(BASE_DIR, "2021-annual-performance-report.pdf"),
48
+ osp.join(BASE_DIR, "2022-annual-performance-report.pdf"),
49
+
50
+
51
+ ]
52
+
53
+
54
+ class DocumentEvaluation(BaseModel):
55
+ """Evaluation result for retrieved documents."""
56
+ is_sufficient: bool = Field(description="Whether the documents provide sufficient information")
57
+ feedback: str = Field(description="Feedback about the document quality and what's missing")
58
+
59
+
60
+ class AgenticRAGState(MessagesState):
61
+ """State for the Agentic RAG workflow using MessagesState as base."""
62
+ # MessagesState already handles messages with add_messages reducer
63
+ feedback: str = ""
64
+ is_sufficient: bool = False
65
+ retry_count: int = 0 # Track number of retries to prevent infinite loops
66
+ max_retries: int = 3 # Maximum number of query rewrites allowed
67
+
68
+
69
+ class AgenticRAGChat(ChatInterface):
70
+ """Agentic RAG implementation with dynamic retrieval and evaluation."""
71
+
72
+ def __init__(self):
73
+ self.llm = None
74
+ self.embeddings = None
75
+ self.evaluator_llm = None
76
+ self.vector_store = None
77
+ self.tools = []
78
+ self.graph = None
79
+
80
+ def initialize(self) -> None:
81
+ """Initialize components for the Agentic RAG system."""
82
+ # Initialize models
83
+ self.llm = ChatOpenAI(model="gpt-4o", temperature=0)
84
+ self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
85
+ self.evaluator_llm = self.llm.with_structured_output(DocumentEvaluation)
86
+
87
+ # Check if documents are configured
88
+ if FILE_PATHS and all(osp.exists(f) for f in FILE_PATHS):
89
+ # Load documents and create vector store
90
+ docs = self._load_and_process_documents()
91
+ print(f"Loading {len(docs)} documents into vector store")
92
+ self.vector_store = InMemoryVectorStore(embedding=self.embeddings)
93
+ self.vector_store.add_documents(docs)
94
+ else:
95
+ print("Warning: No documents configured for RAG. Add document paths to FILE_PATHS.")
96
+ # Create empty vector store
97
+ self.vector_store = InMemoryVectorStore(embedding=self.embeddings)
98
+
99
+ # Create tools
100
+ self.tools = self._create_tools()
101
+
102
+ # Create the graph
103
+ self.graph = self._create_graph()
104
+
105
+ def _load_and_process_documents(self) -> List[Document]:
106
+ """Load and process documents for RAG."""
107
+ docs = []
108
+ for file_path in FILE_PATHS:
109
+ if not osp.exists(file_path):
110
+ print(f"Warning: File not found - {file_path}")
111
+ continue
112
+
113
+ print(f"Loading document from {file_path}")
114
+ try:
115
+ loader = PyPDFLoader(file_path)
116
+ page_docs = loader.load()
117
+
118
+ # Combine all pages and split into chunks
119
+ combined_doc = "\n".join([doc.page_content for doc in page_docs])
120
+
121
+ # Use RecursiveCharacterTextSplitter for better chunking
122
+ text_splitter = RecursiveCharacterTextSplitter(
123
+ chunk_size=1000,
124
+ chunk_overlap=200,
125
+ separators=["\n\n", "\n", ".", " ", ""]
126
+ )
127
+ chunks = text_splitter.split_text(combined_doc)
128
+
129
+ # Convert chunks to Document objects with metadata
130
+ docs.extend([
131
+ Document(
132
+ page_content=chunk,
133
+ metadata={"source": osp.basename(file_path)}
134
+ ) for chunk in chunks
135
+ ])
136
+ except Exception as e:
137
+ print(f"Error loading {file_path}: {e}")
138
+
139
+ return docs
140
+
141
+ def _create_tools(self) -> List[Any]:
142
+ """Create retriever and search tools."""
143
+ tools = []
144
+
145
+ # Create retriever tool if we have documents
146
+ if self.vector_store:
147
+ retriever = self.vector_store.as_retriever(
148
+ search_type="similarity",
149
+ search_kwargs={"k": 3}
150
+ )
151
+ retriever_tool = create_retriever_tool(
152
+ retriever,
153
+ name="search_documents",
154
+ description=(
155
+ "Search through the document database. "
156
+ "Use this for questions about content in the loaded documents."
157
+ )
158
+ )
159
+ tools.append(retriever_tool)
160
+
161
+ # Create web search tool
162
+ @tool("web_search")
163
+ def search_web(query: str) -> list[dict]:
164
+ """
165
+ Search the web for the latest information on any topic.
166
+
167
+ Args:
168
+ query: The search query to look up
169
+
170
+ Returns:
171
+ List of search results with title, content, and URL
172
+ """
173
+ search = TavilySearchResults(max_results=3)
174
+ return search.invoke(query)
175
+
176
+ tools.append(search_web)
177
+
178
+ return tools
179
+
180
+ def _generate_query_or_respond(self, state: AgenticRAGState):
181
+ """Generate a query or respond based on the current state."""
182
+ print("Generating query or responding...")
183
+
184
+ prompt = PromptTemplate.from_template(
185
+ """
186
+ You are a helpful assistant that can answer questions using the provided tools.
187
+
188
+ Available tools:
189
+ - search_documents: Search through loaded documents
190
+ - web_search: Search the web for information
191
+
192
+ Based on the user's query, decide whether to use tools or respond directly.
193
+ Use tools when you need specific information to answer the question accurately.
194
+
195
+ Query: {question}
196
+ """
197
+ )
198
+
199
+ # Get the latest message (either original or rewritten query)
200
+ question = state["messages"][-1].content
201
+ chain = prompt | self.llm.bind_tools(self.tools)
202
+
203
+ response = chain.invoke({"question": question})
204
+ return {"messages": [response]}
205
+
206
+ def _evaluate_documents(self, state: AgenticRAGState):
207
+ """Evaluate the documents retrieved from the retriever tool."""
208
+ print("Evaluating documents...")
209
+
210
+ # Get original user question and retrieved docs
211
+ user_question = state["messages"][0].content
212
+ retrieved_docs = state["messages"][-1].content
213
+
214
+ chain = DOCUMENT_EVALUATOR_PROMPT | self.evaluator_llm
215
+ evaluation = chain.invoke({
216
+ "question": user_question,
217
+ "retrieved_docs": retrieved_docs
218
+ })
219
+
220
+ print(f"Evaluation result: {evaluation}")
221
+ return {
222
+ "is_sufficient": evaluation.is_sufficient,
223
+ "feedback": evaluation.feedback
224
+ }
225
+
226
+ def _synthesize_answer(self, state: AgenticRAGState):
227
+ """Synthesize the final answer from retrieved documents."""
228
+ print("Synthesizing answer...")
229
+
230
+ user_question = state["messages"][0].content
231
+ retrieved_docs = state["messages"][-1].content
232
+
233
+ chain = DOCUMENT_SYNTHESIZER_PROMPT | self.llm
234
+ answer = chain.invoke({
235
+ "question": user_question,
236
+ "retrieved_docs": retrieved_docs
237
+ })
238
+
239
+ return {"messages": [answer]}
240
+
241
+ def _query_rewriter(self, state: AgenticRAGState):
242
+ """Rewrite the query based on evaluation feedback."""
243
+ print("Rewriting query...")
244
+
245
+ user_question = state["messages"][0].content
246
+ retrieved_docs = state["messages"][-1].content
247
+ feedback = state["feedback"]
248
+
249
+ chain = QUERY_REWRITER_PROMPT | self.llm
250
+ new_query = chain.invoke({
251
+ "question": user_question,
252
+ "feedback": feedback,
253
+ "retrieved_docs": retrieved_docs
254
+ })
255
+
256
+ print(f"Rewritten query: {new_query.content}")
257
+ return {"messages": [new_query]}
258
+
259
+ def _create_graph(self) -> Any:
260
+ """Create the agentic RAG graph."""
261
+ # Create the graph builder
262
+ graph_builder = StateGraph(AgenticRAGState)
263
+
264
+ # Add nodes
265
+ graph_builder.add_node("generate_query_or_respond", self._generate_query_or_respond)
266
+ graph_builder.add_node("retrieve_documents", ToolNode(self.tools))
267
+ graph_builder.add_node("evaluate_documents", self._evaluate_documents)
268
+ graph_builder.add_node("synthesize_answer", self._synthesize_answer)
269
+ graph_builder.add_node("query_rewriter", self._query_rewriter)
270
+
271
+ # Add edges
272
+ graph_builder.add_edge(START, "generate_query_or_respond")
273
+
274
+ # Conditional edge: if tools were called, retrieve documents; else end
275
+ graph_builder.add_conditional_edges(
276
+ "generate_query_or_respond",
277
+ tools_condition,
278
+ {
279
+ "tools": "retrieve_documents",
280
+ END: END,
281
+ },
282
+ )
283
+
284
+ # After retrieval, evaluate documents
285
+ graph_builder.add_edge("retrieve_documents", "evaluate_documents")
286
+
287
+ # Conditional edge: if sufficient, synthesize; else rewrite query
288
+ graph_builder.add_conditional_edges(
289
+ "evaluate_documents",
290
+ lambda x: "synthesize_answer" if x["is_sufficient"] else "query_rewriter",
291
+ {
292
+ "synthesize_answer": "synthesize_answer",
293
+ "query_rewriter": "query_rewriter",
294
+ },
295
+ )
296
+
297
+ # After rewriting, generate new query
298
+ graph_builder.add_edge("query_rewriter", "generate_query_or_respond")
299
+
300
+ # After synthesizing, end
301
+ graph_builder.add_edge("synthesize_answer", END)
302
+
303
+ return graph_builder.compile()
304
+
305
+
306
+ def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
307
+ """Convert chat history to LangChain message format.
308
+
309
+ Args:
310
+ chat_history: List of dicts with 'role' and 'content' keys
311
+
312
+ Returns:
313
+ List of LangChain message objects
314
+ """
315
+ messages = []
316
+ if chat_history:
317
+ for msg in chat_history:
318
+ if msg["role"] == "user":
319
+ messages.append(HumanMessage(content=msg["content"]))
320
+ elif msg["role"] == "assistant":
321
+ messages.append(AIMessage(content=msg["content"]))
322
+ return messages
323
+
324
+
325
+
326
+ def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
327
+ """Process a message using the Agentic RAG system."""
328
+ print("\n=== STARTING AGENTIC RAG QUERY ===")
329
+ print(f"Query: {message}")
330
+
331
+ # Convert chat history to messages
332
+ history_messages = self._convert_history_to_messages(chat_history)
333
+
334
+ # Add the current message
335
+ history_messages.append(HumanMessage(content=message))
336
+
337
+ # Create initial state with full conversation history
338
+ state = AgenticRAGState(
339
+ messages=history_messages, # Include full history instead of just current message
340
+ feedback="",
341
+ is_sufficient=False,
342
+ retry_count=0,
343
+ max_retries=3
344
+ )
345
+
346
+ # state = AgenticRAGState(
347
+ # messages=[HumanMessage(content=message)],
348
+ # feedback="",
349
+ # is_sufficient=False,
350
+ # retry_count=0,
351
+ # max_retries=3 # Limit to 3 retries to prevent infinite loops
352
+ # )
353
+
354
+ try:
355
+ # Run the workflow
356
+ # Run the workflow with increased recursion limit
357
+ config = {"recursion_limit": 3} # Increased but reasonable limit
358
+ result = self.graph.invoke(state, config=config)
359
+
360
+ print("\n=== RAG QUERY COMPLETED ===")
361
+
362
+ # Return the final answer
363
+ if result.get("messages"):
364
+ final_message = result["messages"][-1]
365
+ if hasattr(final_message, 'content'):
366
+ return final_message.content
367
+ else:
368
+ return str(final_message)
369
+ else:
370
+ return "I couldn't find relevant information to answer your question."
371
+
372
+ except Exception as e:
373
+ print(f"Error in RAG processing: {e}")
374
+ # Provide a more helpful fallback response
375
+ if "recursion" in str(e).lower():
376
+ return ("I had difficulty finding the exact information you're looking for in the documents. "
377
+ "Based on the available documents, I can see references to various offices and services, "
378
+ "but I couldn't find specific details about Mission Support Services. "
379
+ "You might want to try asking about a specific aspect or department.")
380
+ return f"I encountered an error while searching for information: {str(e)}"
agents/research_agent.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deep Research Multi-Agent System implementation.
2
+
3
+ This implementation focuses on:
4
+ - Building a multi-agent system for comprehensive research
5
+ - Using LangGraph with MessagesState for proper state management
6
+ - Synthesizing research findings into structured reports
7
+ """
8
+
9
+ from typing import Dict, List, Optional, Any, Annotated
10
+ from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain_community.tools.tavily_search import TavilySearchResults
15
+ from langchain_core.tools import tool
16
+ from langgraph.graph import StateGraph, END, START, MessagesState
17
+ from langgraph.prebuilt import create_react_agent
18
+ from pydantic import BaseModel, Field
19
+
20
+ from core.chat_interface import ChatInterface
21
+ from opik.integrations.langchain import OpikTracer
22
+ from agents.prompts import (
23
+ RESEARCH_MANAGER_PROMPT,
24
+ REPORT_FINALIZER_PROMPT,
25
+ )
26
+ from dotenv import load_dotenv
27
+ load_dotenv()
28
+
29
+
30
+ class ResearchQuestion(BaseModel):
31
+ """A research question with a title and description."""
32
+ title: str = Field(description="The title of the research question/section")
33
+ description: str = Field(description="Description of what to research for this section")
34
+ completed: bool = Field(default=False, description="Whether research has been completed for this section")
35
+
36
+
37
+ class ResearchPlan(BaseModel):
38
+ """The overall research plan created by the Research Manager."""
39
+ topic: str = Field(description="The main research topic")
40
+ questions: List[ResearchQuestion] = Field(description="The list of research questions to investigate")
41
+ current_question_index: int = Field(default=0, description="Index of the current question being researched")
42
+
43
+
44
+ class Report(BaseModel):
45
+ """The final research report structure."""
46
+ executive_summary: Optional[str] = Field(default=None, description="Executive summary of the research")
47
+ key_findings: Optional[str] = Field(default=None, description="Key findings from the research")
48
+ detailed_analysis: List[Dict[str, Any]] = Field(default_factory=list, description="Detailed analysis sections")
49
+ limitations: Optional[str] = Field(default=None, description="Limitations and further research")
50
+
51
+
52
+ class ResearchState(MessagesState):
53
+ """State tracking for the deep research workflow using MessagesState as base."""
54
+ research_plan: Optional[ResearchPlan] = None
55
+ report: Optional[Report] = None
56
+ next_step: str = "research_manager"
57
+
58
+ # MessagesState already handles messages with add_messages reducer
59
+
60
+
61
+ class DeepResearchChat(ChatInterface):
62
+ """Deep research implementation using multi-agent system with proper state management."""
63
+
64
+ def __init__(self):
65
+ self.llm = None
66
+ self.research_manager = None
67
+ self.specialized_research_agent = None
68
+ self.finalizer = None
69
+ self.workflow = None
70
+ self.tavily_search_tool = None
71
+
72
+ def initialize(self) -> None:
73
+ """Initialize components for the deep research system."""
74
+ # Initialize LLM model
75
+ self.llm = ChatOpenAI(model="gpt-4o", temperature=0)
76
+
77
+ # Create Tavily search tool for agents
78
+ self.tavily_search_tool = TavilySearchResults(max_results=5)
79
+
80
+ # Create components
81
+ self.research_manager = self._create_research_manager()
82
+ self.specialized_research_agent = self._create_specialized_research_agent()
83
+ self.finalizer = self._create_finalizer()
84
+
85
+ # Create the workflow graph using these agents
86
+ self.workflow = self._create_workflow()
87
+
88
+ # Optional: Create Opik Tracer for monitoring
89
+ try:
90
+ self.tracer = OpikTracer(
91
+ graph=self.workflow.get_graph(xray=True),
92
+ project_name="nexus-research-workflow"
93
+ )
94
+ except:
95
+ self.tracer = None
96
+ print("Opik tracer not available, continuing without monitoring")
97
+
98
+ def _create_research_manager(self) -> Any:
99
+ """Create the research manager agent."""
100
+ research_manager = (
101
+ RESEARCH_MANAGER_PROMPT
102
+ | self.llm.with_structured_output(ResearchPlan)
103
+ )
104
+
105
+ return research_manager
106
+
107
+ def _create_specialized_research_agent(self) -> Any:
108
+ """Create specialized research agents."""
109
+ # Create search tool for the agent
110
+ @tool("web_search")
111
+ def search_web(query: str) -> str:
112
+ """Search the web for information on the research topic."""
113
+ results = self.tavily_search_tool.invoke(query)
114
+ formatted_results = []
115
+
116
+ for i, result in enumerate(results, 1):
117
+ formatted_results.append(f"Result {i}:")
118
+ formatted_results.append(f"Title: {result.get('title', 'N/A')}")
119
+ formatted_results.append(f"Content: {result.get('content', 'N/A')}")
120
+ formatted_results.append(f"URL: {result.get('url', 'N/A')}")
121
+ formatted_results.append("")
122
+
123
+ return "\n".join(formatted_results)
124
+
125
+ # Create the specialized agent
126
+ tools = [search_web]
127
+
128
+ # Define the system message for the specialized research agent
129
+ system_message = """You are a Specialized Research Agent responsible for thoroughly researching a specific topic section.
130
+
131
+ Process:
132
+ 1. Analyze the research question and description
133
+ 2. Generate effective search queries to gather information
134
+ 3. Use the web_search tool to find relevant information
135
+ 4. Synthesize findings into a comprehensive section
136
+ 5. Include proper citations to your sources
137
+
138
+ Your response should be:
139
+ - Thorough (at least 500 words)
140
+ - Well-structured with subsections
141
+ - Based on factual information (not made up)
142
+ - Include proper citations to sources
143
+
144
+ Always critically evaluate information and ensure you cover the topic comprehensively.
145
+ """
146
+
147
+ # Create the specialized research agent
148
+ specialized_agent = create_react_agent(
149
+ model=self.llm,
150
+ tools=tools,
151
+ prompt=system_message
152
+ )
153
+
154
+ return specialized_agent
155
+
156
+ def _create_finalizer(self) -> Any:
157
+ """Create the finalizer component."""
158
+ finalizer = REPORT_FINALIZER_PROMPT | self.llm | StrOutputParser()
159
+ return finalizer
160
+
161
+ def _create_workflow(self) -> Any:
162
+ """Create the multi-agent deep research workflow with proper MessagesState usage."""
163
+ # Create a state graph
164
+ workflow = StateGraph(ResearchState)
165
+
166
+ # Define the nodes
167
+
168
+ # Research Manager Node
169
+ def research_manager_node(state: ResearchState) -> Dict:
170
+ """Create the research plan and update messages."""
171
+ print("\n=== RESEARCH MANAGER NODE ===")
172
+
173
+ # Get the topic from the LAST user message (not first)
174
+ # This handles conversation context better
175
+ user_messages = [msg for msg in state["messages"] if isinstance(msg, HumanMessage)]
176
+ topic = user_messages[-1].content if user_messages else state["messages"][-1].content
177
+
178
+ print(f"Planning research for topic: {topic}")
179
+
180
+ # Generate research plan
181
+ research_plan = self.research_manager.invoke({"topic": topic})
182
+ print(f"Created research plan with {len(research_plan.questions)} questions")
183
+
184
+ # Initialize empty report structure
185
+ report = Report(
186
+ detailed_analysis=[
187
+ {"title": q.title, "content": None, "sources": []}
188
+ for q in research_plan.questions
189
+ ]
190
+ )
191
+
192
+ # Add planning message to state
193
+ planning_msg = AIMessage(
194
+ content=f"Research plan created with {len(research_plan.questions)} sections to investigate."
195
+ )
196
+
197
+ return {
198
+ "messages": [planning_msg],
199
+ "research_plan": research_plan,
200
+ "report": report,
201
+ }
202
+
203
+ # Specialized Research Node
204
+ def specialized_research_node(state: ResearchState) -> Dict:
205
+ """Conduct research on the current question and update messages."""
206
+ print("\n=== SPECIALIZED RESEARCH NODE ===")
207
+
208
+ research_plan = state["research_plan"]
209
+ assert research_plan is not None, "Research plan is None"
210
+ current_index = research_plan.current_question_index
211
+
212
+ if current_index >= len(research_plan.questions):
213
+ print("All research questions completed")
214
+ return {}
215
+
216
+ current_question = research_plan.questions[current_index]
217
+ print(f"Researching question {current_index + 1}/{len(research_plan.questions)}: "
218
+ f"{current_question.title}")
219
+
220
+ # Create input for the specialized agent
221
+ research_input = {
222
+ "messages": [
223
+ ("user", f"""Research the following topic thoroughly:
224
+
225
+ Topic: {current_question.title}
226
+
227
+ Description: {current_question.description}
228
+
229
+ Provide a detailed analysis with proper citations to sources.
230
+ """)
231
+ ]
232
+ }
233
+
234
+ # Invoke the specialized agent
235
+ result = self.specialized_research_agent.invoke(research_input)
236
+
237
+ # Extract content from the result
238
+ last_message = result["messages"][-1]
239
+ if isinstance(last_message, tuple):
240
+ content = last_message[1]
241
+ else:
242
+ content = last_message.content
243
+
244
+ # Parse out sources from the content
245
+ sources = []
246
+ for line in content.split("\n"):
247
+ if "http" in line and "://" in line:
248
+ sources.append(line.strip())
249
+
250
+ # Update the research plan
251
+ research_plan.questions[current_index].completed = True
252
+
253
+ # Update the report
254
+ report = state["report"]
255
+ assert report is not None, "Report is None"
256
+ report.detailed_analysis[current_index]["content"] = content
257
+ report.detailed_analysis[current_index]["sources"] = sources
258
+
259
+ # Move to the next question
260
+ research_plan.current_question_index += 1
261
+
262
+ # Add research progress message
263
+ progress_msg = AIMessage(
264
+ content=f"Completed research for section: {current_question.title}"
265
+ )
266
+
267
+ return {
268
+ "messages": [progress_msg],
269
+ "research_plan": research_plan,
270
+ "report": report,
271
+ }
272
+
273
+ # Research Evaluator Node
274
+ def evaluator_node(state: ResearchState) -> Dict:
275
+ """Evaluate the research progress and determine next steps."""
276
+ print("\n=== EVALUATOR NODE ===")
277
+
278
+ research_plan = state["research_plan"]
279
+ assert research_plan is not None, "Research plan is None"
280
+
281
+ # Check if we've completed all questions
282
+ all_completed = research_plan.current_question_index >= len(research_plan.questions)
283
+
284
+ if all_completed:
285
+ print("All research questions have been addressed. Moving to finalizer.")
286
+ eval_msg = AIMessage(content="All research sections completed. Finalizing report...")
287
+ return {
288
+ "messages": [eval_msg],
289
+ "next_step": "finalize"
290
+ }
291
+ else:
292
+ # We have more sections to research
293
+ next_section = research_plan.questions[research_plan.current_question_index].title
294
+ print(f"More research needed. Moving to next section: {next_section}")
295
+ eval_msg = AIMessage(content=f"Moving to research section: {next_section}")
296
+ return {
297
+ "messages": [eval_msg],
298
+ "next_step": "research"
299
+ }
300
+
301
+ # Finalizer Node
302
+ def finalizer_node(state: ResearchState) -> Dict:
303
+ """Finalize the research report and update messages."""
304
+ print("\n=== FINALIZER NODE ===")
305
+
306
+ research_plan = state["research_plan"]
307
+ report = state["report"]
308
+ assert report is not None, "Report is None"
309
+ assert research_plan is not None, "Research plan is None"
310
+
311
+ # Prepare the detailed analysis for the finalizer
312
+ detailed_analysis = "\n\n".join([
313
+ f"## {section['title']}\n{section['content']}"
314
+ for section in report.detailed_analysis
315
+ if section['content'] is not None
316
+ ])
317
+
318
+ # Generate the final sections
319
+ final_sections = self.finalizer.invoke({
320
+ "topic": research_plan.topic,
321
+ "detailed_analysis": detailed_analysis
322
+ })
323
+
324
+ # Parse the final sections
325
+ sections = final_sections.split("\n\n")
326
+
327
+ # Update the report
328
+ if len(sections) >= 3:
329
+ report.executive_summary = sections[0].replace("# Executive Summary", "").strip()
330
+ report.key_findings = sections[1].replace("# Key Findings", "").strip()
331
+ report.limitations = sections[2].replace("# Limitations and Further Research", "").strip()
332
+
333
+ # Format the final report
334
+ report_message = self._format_report(report)
335
+
336
+ return {
337
+ "messages": [report_message],
338
+ }
339
+
340
+ # Add nodes to the graph
341
+ workflow.add_node("research_manager", research_manager_node)
342
+ workflow.add_node("specialized_research", specialized_research_node)
343
+ workflow.add_node("evaluate", evaluator_node)
344
+ workflow.add_node("finalizer", finalizer_node)
345
+
346
+ # Add edges
347
+ workflow.add_edge(START, "research_manager")
348
+ workflow.add_edge("research_manager", "specialized_research")
349
+ workflow.add_edge("specialized_research", "evaluate")
350
+
351
+ # Add conditional edges from evaluator
352
+ workflow.add_conditional_edges(
353
+ "evaluate",
354
+ lambda x: x["next_step"],
355
+ {
356
+ "research": "specialized_research",
357
+ "finalize": "finalizer"
358
+ }
359
+ )
360
+ workflow.add_edge("finalizer", END)
361
+
362
+ # Compile the workflow
363
+ return workflow.compile()
364
+
365
+ def _format_report(self, report: Report) -> AIMessage:
366
+ """Format the research report for presentation."""
367
+ sections = [
368
+ "# Research Report\n",
369
+
370
+ "## Executive Summary\n" + (report.executive_summary or "N/A"),
371
+
372
+ "## Key Findings\n" + (report.key_findings or "N/A"),
373
+
374
+ "## Detailed Analysis"
375
+ ]
376
+
377
+ # Add detailed analysis sections
378
+ for section in report.detailed_analysis:
379
+ if section["content"]:
380
+ sections.append(f"### {section['title']}\n{section['content']}")
381
+
382
+ if section["sources"]:
383
+ sources = "\n".join([f"- {source}" for source in section["sources"]])
384
+ sections.append(f"**Sources:**\n{sources}")
385
+
386
+ # Add limitations
387
+ sections.append("## Limitations and Further Research\n" + (report.limitations or "N/A"))
388
+
389
+ return AIMessage(content="\n\n".join(sections))
390
+
391
+ def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
392
+ """Convert chat history to LangChain message format.
393
+
394
+ Args:
395
+ chat_history: List of dicts with 'role' and 'content' keys
396
+
397
+ Returns:
398
+ List of LangChain message objects
399
+ """
400
+ messages = []
401
+ if chat_history:
402
+ for msg in chat_history:
403
+ if msg["role"] == "user":
404
+ messages.append(HumanMessage(content=msg["content"]))
405
+ elif msg["role"] == "assistant":
406
+ messages.append(AIMessage(content=msg["content"]))
407
+ return messages
408
+
409
+
410
+ def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
411
+ """Process a message using the deep research system with proper state management."""
412
+ print("\n=== STARTING DEEP RESEARCH ===")
413
+ print(f"Research Topic: {message}")
414
+
415
+ # Convert chat history to messages
416
+ history_messages = self._convert_history_to_messages(chat_history)
417
+
418
+ # Add the current message
419
+ history_messages.append(HumanMessage(content=message))
420
+
421
+ # Create initial state with full conversation history
422
+ initial_state = ResearchState(
423
+ messages=history_messages, # Include full history instead of just current message
424
+ research_plan=None,
425
+ report=None,
426
+ next_step="research_manager"
427
+ )
428
+ # # Create initial state using MessagesState
429
+ # initial_state = ResearchState(
430
+ # messages=[HumanMessage(content=message)],
431
+ # research_plan=None,
432
+ # report=None,
433
+ # next_step="research_manager"
434
+ # )
435
+
436
+ # Run workflow with optional tracing
437
+ config = {"callbacks": [self.tracer]} if self.tracer else {}
438
+ result = self.workflow.invoke(initial_state, config=config)
439
+
440
+ print("\n=== RESEARCH COMPLETED ===")
441
+
442
+ # Write the final report to a file
443
+ try:
444
+ with open("final_report.md", "w") as f:
445
+ f.write(result["messages"][-1].content)
446
+ print("Report saved to final_report.md")
447
+ except Exception as e:
448
+ print(f"Could not save report to file: {e}")
449
+
450
+ # Return the final report
451
+ return result["messages"][-1].content
agents/tool_agent.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tool-Using Agent implementation.
2
+
3
+ This implementation focuses on:
4
+ - Converting tools to use with LangGraph
5
+ - Using the ReAct pattern for autonomous tool selection
6
+ - Handling calculator, datetime, and weather queries
7
+ """
8
+
9
+ from typing import Dict, List, Optional, Any, Annotated
10
+ import io
11
+ import contextlib
12
+ from langchain_core.tools import tool
13
+ from langchain.chat_models import init_chat_model
14
+ from langgraph.prebuilt import create_react_agent
15
+ from langchain_community.tools.tavily_search import TavilySearchResults
16
+ from langchain_core.messages import HumanMessage, AIMessage
17
+
18
+ from core.chat_interface import ChatInterface
19
+ from tools.calculator import Calculator
20
+ from dotenv import load_dotenv
21
+ load_dotenv()
22
+
23
+
24
+ class ToolUsingAgentChat(ChatInterface):
25
+ """Tool-using agent implementation with calculator, datetime, and weather tools."""
26
+
27
+ def __init__(self):
28
+ self.llm = None
29
+ self.tools = []
30
+ self.graph = None
31
+
32
+ def initialize(self) -> None:
33
+ """Initialize components for the tool-using agent.
34
+
35
+ This sets up:
36
+ - The chat model
37
+ - Tools for calculator, DateTime, and weather
38
+ - The ReAct agent using LangGraph
39
+ """
40
+ # Initialize chat model
41
+ self.llm = init_chat_model("gpt-4o", model_provider="openai")
42
+
43
+ # Create tools
44
+ self.tools = self._create_tools()
45
+
46
+ # Create the ReAct agent graph with the tools
47
+ # The agent will autonomously decide which tools to use based on the query
48
+ self.graph = create_react_agent(
49
+ model=self.llm,
50
+ tools=self.tools,
51
+ )
52
+
53
+ def _create_tools(self) -> List[Any]:
54
+ """Create and return the list of tools for the agent.
55
+
56
+ Returns:
57
+ List: List of tool objects
58
+ """
59
+ # Calculator tool for mathematical operations
60
+ @tool
61
+ def calculator(expression: Annotated[str, "The mathematical expression to evaluate"]) -> str:
62
+ """Evaluate a mathematical expression using basic arithmetic operations (+, -, *, /, %, //).
63
+ Examples: '5 + 3', '10 * (2 + 3)', '15 / 3', '17 % 5', '20 // 3'
64
+ """
65
+ result = Calculator.evaluate_expression(expression)
66
+ if isinstance(result, str) and result.startswith("Error"):
67
+ raise ValueError(result)
68
+ return str(result)
69
+
70
+ # DateTime tool for date/time operations
71
+ @tool
72
+ def execute_datetime_code(code: Annotated[str, "Python code to execute for datetime operations"]) -> str:
73
+ """Execute Python code for datetime operations. The code should use datetime or time modules.
74
+ Examples:
75
+ - 'print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))'
76
+ - 'print(datetime.datetime.now().strftime("%A, %B %d, %Y"))'
77
+ - 'print(datetime.datetime.now().year)'
78
+ - 'print((datetime.datetime(2025, 12, 25) - datetime.datetime.now()).days)'
79
+ """
80
+ output_buffer = io.StringIO()
81
+ code = f"import datetime\nimport time\n{code}"
82
+ try:
83
+ with contextlib.redirect_stdout(output_buffer):
84
+ exec(code)
85
+ return output_buffer.getvalue().strip()
86
+ except Exception as e:
87
+ raise ValueError(f"Error executing datetime code: {str(e)}")
88
+
89
+ # Weather tool using Tavily search
90
+ @tool
91
+ def get_weather(location: Annotated[str, "The location to get weather for (city, country)"]) -> str:
92
+ """Get the current weather for a given location using web search.
93
+ Examples: 'New York, USA', 'London, UK', 'Tokyo, Japan', 'Paris, France'
94
+ """
95
+ search = TavilySearchResults(max_results=3)
96
+ query = f"current weather temperature conditions {location} today"
97
+ results = search.invoke(query)
98
+
99
+ if not results:
100
+ return f"Could not find weather information for {location}"
101
+
102
+ # Combine results for better coverage
103
+ weather_info = []
104
+ for result in results[:2]: # Use top 2 results
105
+ content = result.get("content", "")
106
+ if content:
107
+ weather_info.append(content)
108
+
109
+ if weather_info:
110
+ return " ".join(weather_info)
111
+ else:
112
+ return f"Could not find detailed weather information for {location}"
113
+
114
+ return [calculator, execute_datetime_code, get_weather]
115
+
116
+ def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
117
+ """Convert chat history to LangChain message format.
118
+
119
+ Args:
120
+ chat_history: List of dicts with 'role' and 'content' keys
121
+
122
+ Returns:
123
+ List of LangChain message objects
124
+ """
125
+ messages = []
126
+ if chat_history:
127
+ for msg in chat_history:
128
+ if msg["role"] == "user":
129
+ messages.append(HumanMessage(content=msg["content"]))
130
+ elif msg["role"] == "assistant":
131
+ messages.append(AIMessage(content=msg["content"]))
132
+ return messages
133
+
134
+ def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
135
+ """Process a message using the tool-using agent.
136
+
137
+ The ReAct agent will:
138
+ 1. Consider the full conversation history
139
+ 2. Analyze the query in context
140
+ 3. Decide which tool(s) to use
141
+ 4. Execute the tool(s)
142
+ 5. Formulate a response
143
+
144
+ Args:
145
+ message: The user's input message
146
+ chat_history: List of previous chat messages
147
+
148
+ Returns:
149
+ str: The assistant's response
150
+ """
151
+ try:
152
+ # Convert chat history to messages
153
+ history_messages = self._convert_history_to_messages(chat_history)
154
+
155
+ # Add the current message
156
+ history_messages.append(HumanMessage(content=message))
157
+
158
+ # Run the graph with the full conversation history
159
+ result = self.graph.invoke({"messages": history_messages})
160
+
161
+ # Run the graph with the user's message
162
+ # result = self.graph.invoke({"messages": [("user", message)]})
163
+
164
+ # Extract the final response
165
+ if result.get("messages"):
166
+ final_message = result["messages"][-1]
167
+ # Handle different message formats
168
+ if hasattr(final_message, 'content'):
169
+ return final_message.content
170
+ else:
171
+ return str(final_message)
172
+ else:
173
+ return "I couldn't process that request. Please try rephrasing."
174
+
175
+ except Exception as e:
176
+ print(f"Error in tool agent: {e}")
177
+ return f"I encountered an error while processing your request: {str(e)}. Please try again."
agents/unified_chat.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unified Chat Implementation combining all agent functionalities.
2
+
3
+ This implementation combines:
4
+ - Tool-using agent (calculator, datetime, weather)
5
+ - Agentic RAG for document queries
6
+ - Deep research for comprehensive analysis
7
+ """
8
+
9
+ import os.path as osp
10
+ import io
11
+ import contextlib
12
+ from typing import Dict, List, Optional, Any, Annotated
13
+ from enum import Enum
14
+
15
+ from langchain_core.messages import HumanMessage, AIMessage
16
+ from langchain_core.tools import tool
17
+ from langchain.chat_models import init_chat_model
18
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
19
+ from langgraph.prebuilt import create_react_agent
20
+ from langgraph.graph import MessagesState
21
+ from langchain_community.tools.tavily_search import TavilySearchResults
22
+ from langchain_core.prompts import PromptTemplate
23
+
24
+ from core.chat_interface import ChatInterface
25
+ from tools.calculator import Calculator
26
+ from agents.prompts import QUERY_CLASSIFIER_PROMPT
27
+
28
+ # Import components from individual agents
29
+ from agents.tool_agent import ToolUsingAgentChat
30
+ from agents.rag_agent import AgenticRAGChat
31
+ from agents.research_agent import DeepResearchChat
32
+ from dotenv import load_dotenv
33
+ load_dotenv()
34
+
35
+
36
+ class QueryType(Enum):
37
+ """Types of queries the system can handle."""
38
+ SIMPLE_TOOL = "simple_tool" # Calculator, datetime, weather
39
+ AGENTIC_RAG = "agentic_rag" # OPM document queries
40
+ DEEP_RESEARCH = "deep_research" # Comprehensive research
41
+ GENERAL = "general" # General conversation
42
+
43
+
44
+ class UnifiedChatState(MessagesState):
45
+ """Unified state that uses MessagesState for proper message handling."""
46
+ query_type: Optional[str] = None
47
+ current_agent: Optional[str] = None
48
+
49
+
50
+ class UnifiedChat(ChatInterface):
51
+ """Unified chat implementation that routes queries to appropriate handlers."""
52
+
53
+ def __init__(self):
54
+ self.router_llm = None
55
+ self.tool_agent = None
56
+ self.rag_agent = None
57
+ self.research_agent = None
58
+ self.query_classifier = None
59
+
60
+ def initialize(self) -> None:
61
+ """Initialize all components for the unified system."""
62
+ print("Initializing Nexus AI Unified System...")
63
+
64
+ # Initialize router LLM for query classification
65
+ self.router_llm = ChatOpenAI(model="gpt-4o", temperature=0)
66
+
67
+ # Initialize query classifier
68
+ self._create_query_classifier()
69
+
70
+ # Initialize all sub-agents
71
+ print("Initializing Tool-Using Agent...")
72
+ self.tool_agent = ToolUsingAgentChat()
73
+ self.tool_agent.initialize()
74
+
75
+ print("Initializing Agentic RAG...")
76
+ self.rag_agent = AgenticRAGChat()
77
+ self.rag_agent.initialize()
78
+
79
+ print("Initializing Deep Research Agent...")
80
+ self.research_agent = DeepResearchChat()
81
+ self.research_agent.initialize()
82
+
83
+ print("Nexus AI System initialized successfully!")
84
+
85
+ def _create_query_classifier(self):
86
+ """Create the query classifier that routes to appropriate handlers."""
87
+ self.query_classifier = QUERY_CLASSIFIER_PROMPT | self.router_llm
88
+
89
+ def _classify_query(self, query: str) -> QueryType:
90
+ """Classify the query to determine which handler to use."""
91
+ try:
92
+ result = self.query_classifier.invoke({"query": query})
93
+ classification = result.content.strip().upper()
94
+
95
+ print(f"Query Classification: {classification}")
96
+
97
+ # Map to enum
98
+ if classification == "SIMPLE_TOOL":
99
+ return QueryType.SIMPLE_TOOL
100
+ elif classification == "AGENTIC_RAG":
101
+ return QueryType.AGENTIC_RAG
102
+ elif classification == "DEEP_RESEARCH":
103
+ return QueryType.DEEP_RESEARCH
104
+ else:
105
+ return QueryType.GENERAL
106
+
107
+ except Exception as e:
108
+ print(f"Error in query classification: {e}")
109
+ # Default to general tool agent for safety
110
+ return QueryType.GENERAL
111
+
112
+ def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
113
+ """Process a message by routing to the appropriate handler.
114
+
115
+ Uses MessagesState internally for proper state management when needed.
116
+
117
+ Args:
118
+ message: The user's input message
119
+ chat_history: List of previous chat messages
120
+
121
+ Returns:
122
+ str: The assistant's response
123
+ """
124
+ print(f"\n{'='*50}")
125
+ print(f"Processing query: {message}")
126
+ print(f"{'='*50}")
127
+
128
+ # Classify the query
129
+ query_type = self._classify_query(message)
130
+
131
+ # Create state with messages for tracking
132
+ state = UnifiedChatState(
133
+ messages=[HumanMessage(content=message)],
134
+ query_type=query_type.value,
135
+ current_agent=None
136
+ )
137
+
138
+ # Route to appropriate handler
139
+ try:
140
+ if query_type == QueryType.SIMPLE_TOOL or query_type == QueryType.GENERAL:
141
+ print("→ Routing to Tool-Using Agent")
142
+ state["current_agent"] = "tool_agent"
143
+ return self.tool_agent.process_message(message, chat_history)
144
+
145
+ elif query_type == QueryType.AGENTIC_RAG:
146
+ print("→ Routing to Agentic RAG")
147
+ state["current_agent"] = "rag_agent"
148
+ return self.rag_agent.process_message(message, chat_history)
149
+
150
+ elif query_type == QueryType.DEEP_RESEARCH:
151
+ print("→ Routing to Deep Research Agent")
152
+ state["current_agent"] = "research_agent"
153
+ return self.research_agent.process_message(message, chat_history)
154
+
155
+ else:
156
+ # Fallback to tool agent for general queries
157
+ print("→ Routing to Tool-Using Agent (fallback)")
158
+ state["current_agent"] = "tool_agent"
159
+ return self.tool_agent.process_message(message, chat_history)
160
+
161
+ except Exception as e:
162
+ error_msg = f"Error processing message: {str(e)}"
163
+ print(error_msg)
164
+ # Add error to state messages
165
+ state["messages"].append(AIMessage(content=f"I encountered an error: {str(e)}"))
166
+ return f"I encountered an error while processing your request: {str(e)}. Please try rephrasing your question."
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main application file for Nexus AI Assistant.
2
+
3
+ This file handles the Gradio interface and orchestrates the chat implementations.
4
+ """
5
+
6
+
7
+ from typing import List, Dict, Tuple
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+ from dotenv import load_dotenv
12
+
13
+ # Add the parent directory to Python path so imports work correctly
14
+ # root_path = Path(__file__).resolve().parent
15
+ # sys.path.append(str(root_path))
16
+ # print(f"root path: {root_path}")
17
+
18
+ import gradio as gr
19
+ # Import the unified chat implementation
20
+ from agents.unified_chat import UnifiedChat
21
+ load_dotenv()
22
+
23
+ def create_demo():
24
+ """Create the Gradio demo for the unified chat system."""
25
+
26
+ # Initialize the unified chat implementation
27
+ chat_impl = UnifiedChat()
28
+
29
+ # Initialize the chat implementation
30
+ try:
31
+ chat_impl.initialize()
32
+ init_status = "✅ All systems ready!"
33
+ except Exception as e:
34
+ init_status = f"❌ Error initializing: {str(e)}"
35
+ print(init_status)
36
+
37
+ def respond(message: str, history: List[Tuple[str, str]]) -> str:
38
+ """Process a message and return the response.
39
+
40
+ Args:
41
+ message: The user's input message
42
+ history: List of tuples containing (user_message, assistant_response)
43
+
44
+ Returns:
45
+ str: The assistant's response
46
+ """
47
+ if not message:
48
+ return "Please enter a message."
49
+
50
+ # Convert history to the format expected by the chat implementation
51
+ history_dicts = []
52
+ for user_msg, assistant_msg in history:
53
+ history_dicts.append({"role": "user", "content": user_msg})
54
+ history_dicts.append({"role": "assistant", "content": assistant_msg})
55
+
56
+ try:
57
+ # Process the message
58
+ response = chat_impl.process_message(message, history_dicts)
59
+ return response
60
+ except Exception as e:
61
+ return f"Error processing message: {str(e)}"
62
+
63
+
64
+ # Create the Gradio interface using ChatInterface
65
+ demo = gr.ChatInterface(
66
+ fn=respond,
67
+ title="🤖 Nexus AI - Unified Intelligent Assistant",
68
+ description=f"""
69
+ {init_status}
70
+
71
+ I combine multiple AI capabilities:
72
+ • 🧮 **Calculator & Math** - Complex calculations
73
+ • 📅 **Date & Time** - Current date, time calculations
74
+ • 🌤️ **Weather** - Real-time weather information
75
+ • 📚 **Document Analysis** - RAG-powered document search
76
+ • 🔬 **Deep Research** - Comprehensive multi-source analysis
77
+ • 💬 **General Chat** - Conversational AI
78
+
79
+ The system automatically routes your query to the most appropriate handler.
80
+ """,
81
+ examples=[
82
+ "What is 847 * 293?",
83
+ "What's today's date?",
84
+ "What's the weather in San Francisco?",
85
+ "Explain quantum computing in simple terms",
86
+ "Research the impact of AI on healthcare",
87
+ ],
88
+ theme=gr.themes.Soft(),
89
+ analytics_enabled=False,
90
+ )
91
+
92
+ return demo
93
+
94
+
95
+ if __name__ == "__main__":
96
+ # Create and launch the demo
97
+ demo = create_demo()
98
+
99
+ # Check if running in Hugging Face Spaces
100
+ if os.environ.get("SPACE_ID"):
101
+ # Hugging Face Spaces configuration
102
+ demo.launch(
103
+ server_name="0.0.0.0",
104
+ server_port=int(os.environ.get("PORT", 7860))
105
+ )
106
+ else:
107
+ # Local development - use simple defaults
108
+ demo.launch()
core/__init__.py ADDED
File without changes
core/chat_interface.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, List
3
+
4
+ class ChatInterface(ABC):
5
+ """Abstract base class defining the core chat interface functionality.
6
+
7
+ This interface is designed to be flexible enough to support different
8
+ implementations across various assignments, from basic query handling
9
+ to complex information retrieval and tool usage.
10
+ """
11
+
12
+ @abstractmethod
13
+ def initialize(self) -> None:
14
+ """Initialize any models, tools, or components needed for chat processing.
15
+
16
+ This method should be called after instantiation to set up any necessary
17
+ components like language models, memory, tools, etc. This separation allows
18
+ for proper error handling during initialization and lazy loading of resources.
19
+ """
20
+ pass
21
+
22
+ @abstractmethod
23
+ def process_message(self, message: str, chat_history: List[Dict[str, str]]) -> str:
24
+ """Process a message and return a response.
25
+
26
+ This is the core method that all implementations must define. Different
27
+ implementations can handle the message processing in their own way, such as:
28
+ - Week 1: Query classification, basic tools, and memory
29
+ - Week 2: RAG, web search, and knowledge synthesis
30
+ - Week 3: Advanced tool calling and agentic behavior
31
+
32
+ Args:
33
+ message: The user's input message
34
+ chat_history: Optional list of previous chat messages, where each message
35
+ is a dict with 'role' (user/assistant) and 'content' keys
36
+
37
+ Returns:
38
+ str: The assistant's response
39
+ """
40
+ pass
data/2019-annual-performance-report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c03ac0b77ef7bc82433049e4dd7783cd56855b51f62d3255cf0d2ee7c8e7c2
3
+ size 2357476
data/2020-annual-performance-report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5de716f9e6bd1bad9b0b64768a8af4a7e528b1ea73798ef3f630d1b079091e7b
3
+ size 2838017
data/2021-annual-performance-report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e216c3fe1ff2d31ef7c06eb019b65723cfc53b7a935d68b5c7dea2638bd830
3
+ size 9822451
data/2022-annual-performance-report.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6200aff36bfcbfa955af57d8fdbebf01a233474949421adb1dc59efd8c4ea8d
3
+ size 4134068
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tavily-python==0.7.5
2
+ langchain-community==0.3.25
3
+ pypdf==5.1.0
4
+ langchain==0.3.25
5
+ langgraph==0.4.5
6
+ langchain-openai==0.3.18
7
+ python-dotenv==1.1.0
8
+ gradio==5.34.2
9
+ langgraph-prebuilt==0.2.2
10
+ opik
11
+ fastmcp==2.8.1
12
+ langchain-mcp-adapters==0.1.7
13
+ gradio==5.34.2
14
+ huggingface_hub[cli]
tools/__init__.py ADDED
File without changes
tools/calculator.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Union
3
+
4
+ class Calculator:
5
+ """A simple calculator tool for evaluating basic arithmetic expressions."""
6
+
7
+ @staticmethod
8
+ def evaluate_expression(expression: str) -> Union[float, str]:
9
+ """Evaluate a basic arithmetic expression.
10
+
11
+ Supports only basic arithmetic operations (+, -, *, /) and parentheses.
12
+ Returns an error message if the expression is invalid or cannot be
13
+ evaluated safely.
14
+
15
+ Args:
16
+ expression: A string containing a mathematical expression
17
+ e.g. "5 + 3" or "10 * (2 + 3)"
18
+
19
+ Returns:
20
+ Union[float, str]: The result of the evaluation, or an error message
21
+ if the expression is invalid
22
+
23
+ Examples:
24
+ >>> Calculator.evaluate_expression("5 + 3")
25
+ 8.0
26
+ >>> Calculator.evaluate_expression("10 * (2 + 3)")
27
+ 50.0
28
+ >>> Calculator.evaluate_expression("15 / 3")
29
+ 5.0
30
+ """
31
+ try:
32
+ # Clean up the expression
33
+ expression = expression.strip()
34
+
35
+ # Only allow safe characters (digits, basic operators, parentheses, spaces)
36
+ if not re.match(r'^[\d\s\+\-\*\/\(\)\.]*$', expression):
37
+ return "Error: Invalid characters in expression"
38
+
39
+ # Evaluate the expression
40
+ result = eval(expression, {"__builtins__": {}})
41
+
42
+ # Convert to float and handle division by zero
43
+ return float(result)
44
+
45
+ except ZeroDivisionError:
46
+ return "Error: Division by zero"
47
+ except (SyntaxError, TypeError, NameError):
48
+ return "Error: Invalid expression"
49
+ except Exception as e:
50
+ return f"Error: {str(e)}"