Spaces:
Sleeping
Sleeping
Commit
·
d6ea378
1
Parent(s):
597af3c
tract pdfs with git lfs
Browse files- .gitattributes +1 -0
- .gitignore +11 -0
- .python-version +1 -0
- README.md +1 -0
- agents/__init__.py +0 -0
- agents/factory.py +40 -0
- agents/prompts.py +203 -0
- agents/rag_agent.py +380 -0
- agents/research_agent.py +451 -0
- agents/tool_agent.py +177 -0
- agents/unified_chat.py +166 -0
- app.py +108 -0
- core/__init__.py +0 -0
- core/chat_interface.py +40 -0
- data/2019-annual-performance-report.pdf +3 -0
- data/2020-annual-performance-report.pdf +3 -0
- data/2021-annual-performance-report.pdf +3 -0
- data/2022-annual-performance-report.pdf +3 -0
- requirements.txt +14 -0
- tools/__init__.py +0 -0
- tools/calculator.py +50 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
.env
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.11.8
|
README.md
CHANGED
|
@@ -6,6 +6,7 @@ colorTo: pink
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.44.1
|
| 8 |
app_file: app.py
|
|
|
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
---
|
|
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.44.1
|
| 8 |
app_file: app.py
|
| 9 |
+
python_version: 3.11.8
|
| 10 |
pinned: false
|
| 11 |
license: mit
|
| 12 |
---
|
agents/__init__.py
ADDED
|
File without changes
|
agents/factory.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Factory for creating agent implementations.
|
| 2 |
+
|
| 3 |
+
This module contains factory methods for creating the appropriate chat implementation
|
| 4 |
+
based on the selected agent mode.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from enum import Enum
|
| 8 |
+
from nexus_ai.core.chat_interface import ChatInterface
|
| 9 |
+
from nexus_ai.agents.research_agent import DeepResearchChat
|
| 10 |
+
from nexus_ai.agents.rag_agent import AgenticRAGChat
|
| 11 |
+
from nexus_ai.agents.tool_agent import ToolUsingAgentChat
|
| 12 |
+
from nexus_ai.agents.unified_chat import UnifiedChat
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class AgentMode(Enum):
|
| 16 |
+
"""Enum for different agent implementation modes."""
|
| 17 |
+
TOOL_AGENT = "tool"
|
| 18 |
+
RAG_AGENT = "rag"
|
| 19 |
+
RESEARCH_AGENT = "research"
|
| 20 |
+
UNIFIED = "unified" # Default unified mode
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def create_chat_implementation(mode: AgentMode = AgentMode.UNIFIED) -> ChatInterface:
|
| 24 |
+
"""Create a chat implementation for the specified agent mode.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
mode: Which agent implementation to use (defaults to UNIFIED)
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
ChatInterface: The initialized chat implementation
|
| 31 |
+
"""
|
| 32 |
+
if mode == AgentMode.TOOL_AGENT:
|
| 33 |
+
return ToolUsingAgentChat()
|
| 34 |
+
elif mode == AgentMode.RAG_AGENT:
|
| 35 |
+
return AgenticRAGChat()
|
| 36 |
+
elif mode == AgentMode.RESEARCH_AGENT:
|
| 37 |
+
return DeepResearchChat()
|
| 38 |
+
else:
|
| 39 |
+
# Default to unified mode
|
| 40 |
+
return UnifiedChat()
|
agents/prompts.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prompts for various agents in the Nexus AI system.
|
| 2 |
+
|
| 3 |
+
This module contains all the prompt templates used by different agents
|
| 4 |
+
for their specific tasks.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from langchain_core.prompts import PromptTemplate
|
| 8 |
+
|
| 9 |
+
# ============================================================================
|
| 10 |
+
# RAG Agent Prompts
|
| 11 |
+
# ============================================================================
|
| 12 |
+
|
| 13 |
+
DOCUMENT_EVALUATOR_PROMPT = PromptTemplate.from_template(
|
| 14 |
+
"""
|
| 15 |
+
You are a grader assessing relevance and completeness of retrieved documents
|
| 16 |
+
to answer a user question.
|
| 17 |
+
|
| 18 |
+
Here is the user question: {question}
|
| 19 |
+
|
| 20 |
+
Here are the retrieved documents:
|
| 21 |
+
{retrieved_docs}
|
| 22 |
+
|
| 23 |
+
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
|
| 24 |
+
If the document contains keyword(s) or semantic meaning related to the user question, and is useful
|
| 25 |
+
to answer the user question, grade it as relevant.
|
| 26 |
+
|
| 27 |
+
If the answer is NO, then provide feedback on what information is missing from the document and
|
| 28 |
+
what additional information is needed.
|
| 29 |
+
"""
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
DOCUMENT_SYNTHESIZER_PROMPT = PromptTemplate.from_template(
|
| 33 |
+
"""
|
| 34 |
+
You are a document synthesizer. Create a comprehensive answer using
|
| 35 |
+
the retrieved documents. Focus on accuracy and clarity.
|
| 36 |
+
|
| 37 |
+
Here is the user question: {question}
|
| 38 |
+
|
| 39 |
+
Here are the retrieved documents:
|
| 40 |
+
{retrieved_docs}
|
| 41 |
+
|
| 42 |
+
Provide a detailed and accurate answer based solely on the information in the documents.
|
| 43 |
+
If the documents don't contain enough information to fully answer the question,
|
| 44 |
+
clearly state what information is available and what is missing.
|
| 45 |
+
"""
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
QUERY_REWRITER_PROMPT = PromptTemplate.from_template(
|
| 49 |
+
"""
|
| 50 |
+
You are a query rewriter. Rewrite the user question based on the feedback.
|
| 51 |
+
The new query should maintain the same semantic meaning as the original
|
| 52 |
+
query but augment it with more specific information to improve retrieval.
|
| 53 |
+
|
| 54 |
+
The new query should not be very long - it should be a single sentence since
|
| 55 |
+
it'll be used to query the vector database or a web search.
|
| 56 |
+
|
| 57 |
+
Here is the user question: {question}
|
| 58 |
+
Here is the previously retrieved documents: {retrieved_docs}
|
| 59 |
+
Here is the feedback: {feedback}
|
| 60 |
+
|
| 61 |
+
New query:
|
| 62 |
+
"""
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# ============================================================================
|
| 66 |
+
# Deep Research Agent Prompts
|
| 67 |
+
# ============================================================================
|
| 68 |
+
|
| 69 |
+
RESEARCH_MANAGER_PROMPT = PromptTemplate.from_template(
|
| 70 |
+
"""
|
| 71 |
+
You are a Research Manager responsible for planning comprehensive research reports.
|
| 72 |
+
|
| 73 |
+
Your task is to:
|
| 74 |
+
1. Take a broad research topic
|
| 75 |
+
2. Break it down into 3-5 specific research questions/sections
|
| 76 |
+
3. Create a research plan with a clear structure
|
| 77 |
+
|
| 78 |
+
For each research question, provide:
|
| 79 |
+
- A clear title
|
| 80 |
+
- A description of what should be researched
|
| 81 |
+
|
| 82 |
+
DO NOT conduct the actual research. You are only planning the structure.
|
| 83 |
+
|
| 84 |
+
The report structure should follow:
|
| 85 |
+
- Executive Summary
|
| 86 |
+
- Key Findings
|
| 87 |
+
- Detailed Analysis (sections for each research question)
|
| 88 |
+
- Limitations and Further Research
|
| 89 |
+
|
| 90 |
+
Return your answer as a structured research plan.
|
| 91 |
+
|
| 92 |
+
Research Topic: {topic}
|
| 93 |
+
"""
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
RESEARCH_SPECIALIST_PROMPT = PromptTemplate.from_template(
|
| 97 |
+
"""
|
| 98 |
+
You are a Specialized Research Agent responsible for thoroughly researching a specific topic section.
|
| 99 |
+
|
| 100 |
+
Process:
|
| 101 |
+
1. Analyze the research question and description
|
| 102 |
+
2. Generate effective search queries to gather information
|
| 103 |
+
3. Use the web_search tool to find relevant information
|
| 104 |
+
4. Synthesize findings into a comprehensive section
|
| 105 |
+
5. Include proper citations to your sources
|
| 106 |
+
|
| 107 |
+
Your response should be:
|
| 108 |
+
- Thorough (at least 500 words)
|
| 109 |
+
- Well-structured with subsections
|
| 110 |
+
- Based on factual information (not made up)
|
| 111 |
+
- Include proper citations to sources
|
| 112 |
+
|
| 113 |
+
Always critically evaluate information and ensure you cover the topic comprehensively.
|
| 114 |
+
|
| 115 |
+
Research Question: {question}
|
| 116 |
+
Description: {description}
|
| 117 |
+
"""
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
REPORT_FINALIZER_PROMPT = PromptTemplate.from_template(
|
| 121 |
+
"""
|
| 122 |
+
You are a Report Finalizer responsible for completing a research report.
|
| 123 |
+
|
| 124 |
+
Based on the detailed analysis sections that have been researched, you need to generate:
|
| 125 |
+
|
| 126 |
+
1. Executive Summary (Brief overview of the entire report, ~150 words)
|
| 127 |
+
2. Key Findings (3-5 most important insights, in bullet points)
|
| 128 |
+
3. Limitations and Further Research (Identify gaps and suggest future areas of study)
|
| 129 |
+
|
| 130 |
+
Your content should be:
|
| 131 |
+
- Concise and clear
|
| 132 |
+
- Properly formatted
|
| 133 |
+
- Based strictly on the researched content
|
| 134 |
+
|
| 135 |
+
Do not introduce new information not found in the research.
|
| 136 |
+
|
| 137 |
+
Research Topic: {topic}
|
| 138 |
+
|
| 139 |
+
Detailed Analysis Sections:
|
| 140 |
+
{detailed_analysis}
|
| 141 |
+
|
| 142 |
+
Generate the Executive Summary, Key Findings, and Limitations sections to complete the report.
|
| 143 |
+
"""
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# ============================================================================
|
| 147 |
+
# Tool Agent Prompts (if needed in the future)
|
| 148 |
+
# ============================================================================
|
| 149 |
+
|
| 150 |
+
TOOL_SELECTION_PROMPT = PromptTemplate.from_template(
|
| 151 |
+
"""
|
| 152 |
+
You are an intelligent assistant with access to various tools.
|
| 153 |
+
Based on the user's query, select and use the appropriate tool(s) to provide an accurate response.
|
| 154 |
+
|
| 155 |
+
Available tools:
|
| 156 |
+
- Calculator: For mathematical computations
|
| 157 |
+
- DateTime: For date and time related queries
|
| 158 |
+
- Weather: For weather information
|
| 159 |
+
|
| 160 |
+
User Query: {query}
|
| 161 |
+
|
| 162 |
+
Think step by step about which tool(s) to use and how to best answer the query.
|
| 163 |
+
"""
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
# ============================================================================
|
| 167 |
+
# Query Classification Prompt (used in unified_chat.py)
|
| 168 |
+
# ============================================================================
|
| 169 |
+
|
| 170 |
+
QUERY_CLASSIFIER_PROMPT = PromptTemplate.from_template(
|
| 171 |
+
"""
|
| 172 |
+
You are a query classifier that determines which system should handle a user's query.
|
| 173 |
+
|
| 174 |
+
Analyze the user's query and classify it into one of these categories:
|
| 175 |
+
|
| 176 |
+
1. SIMPLE_TOOL - Use for:
|
| 177 |
+
- Mathematical calculations or expressions
|
| 178 |
+
- Date/time queries
|
| 179 |
+
- Weather queries
|
| 180 |
+
- Any query that can be answered with a simple tool call
|
| 181 |
+
|
| 182 |
+
2. AGENTIC_RAG - Use for:
|
| 183 |
+
- Questions about specific documents
|
| 184 |
+
- Queries requiring document retrieval
|
| 185 |
+
- Questions about content from your knowledge base
|
| 186 |
+
|
| 187 |
+
3. DEEP_RESEARCH - Use for:
|
| 188 |
+
- Requests for comprehensive research or analysis
|
| 189 |
+
- Topics requiring multiple sources and detailed investigation
|
| 190 |
+
- Keywords: "deep dive", "comprehensive analysis", "research", "detailed report"
|
| 191 |
+
|
| 192 |
+
4. GENERAL - Use for:
|
| 193 |
+
- General conversation and questions
|
| 194 |
+
- Simple factual queries
|
| 195 |
+
- Anything that doesn't fit the above categories
|
| 196 |
+
|
| 197 |
+
Return ONLY one of these exact words: SIMPLE_TOOL, AGENTIC_RAG, DEEP_RESEARCH, or GENERAL
|
| 198 |
+
|
| 199 |
+
User Query: {query}
|
| 200 |
+
|
| 201 |
+
Classification:
|
| 202 |
+
"""
|
| 203 |
+
)
|
agents/rag_agent.py
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agentic RAG implementation.
|
| 2 |
+
|
| 3 |
+
This implementation focuses on:
|
| 4 |
+
- Building an Agentic RAG system with dynamic search strategy
|
| 5 |
+
- Using LangGraph for controlling the RAG workflow
|
| 6 |
+
- Evaluating retrieved information quality
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os.path as osp
|
| 10 |
+
from typing import Dict, List, Optional, Any
|
| 11 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 12 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 13 |
+
from langchain_core.documents import Document
|
| 14 |
+
from pydantic import BaseModel, Field
|
| 15 |
+
|
| 16 |
+
from langgraph.graph import StateGraph, END, START, MessagesState
|
| 17 |
+
from langgraph.graph.message import add_messages
|
| 18 |
+
|
| 19 |
+
# For document retrieval
|
| 20 |
+
from langchain_core.vectorstores import InMemoryVectorStore
|
| 21 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 22 |
+
from langchain_core.tools import tool
|
| 23 |
+
from langchain_core.tools.retriever import create_retriever_tool
|
| 24 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 25 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 26 |
+
from core.chat_interface import ChatInterface
|
| 27 |
+
from agents.prompts import (
|
| 28 |
+
DOCUMENT_EVALUATOR_PROMPT,
|
| 29 |
+
DOCUMENT_SYNTHESIZER_PROMPT,
|
| 30 |
+
QUERY_REWRITER_PROMPT,
|
| 31 |
+
)
|
| 32 |
+
from langchain_core.prompts import PromptTemplate
|
| 33 |
+
from langgraph.prebuilt import ToolNode, tools_condition
|
| 34 |
+
from dotenv import load_dotenv
|
| 35 |
+
load_dotenv()
|
| 36 |
+
|
| 37 |
+
# NOTE: Update this to the path of your documents
|
| 38 |
+
# For Hugging Face Spaces, use: "/home/user/app/documents/"
|
| 39 |
+
# For local development, use your local path
|
| 40 |
+
BASE_DIR = "data/"
|
| 41 |
+
|
| 42 |
+
# Add your document files here
|
| 43 |
+
FILE_PATHS = [
|
| 44 |
+
# Example for OPM documents (you can replace with your own documents):
|
| 45 |
+
osp.join(BASE_DIR, "2019-annual-performance-report.pdf"),
|
| 46 |
+
osp.join(BASE_DIR, "2020-annual-performance-report.pdf"),
|
| 47 |
+
osp.join(BASE_DIR, "2021-annual-performance-report.pdf"),
|
| 48 |
+
osp.join(BASE_DIR, "2022-annual-performance-report.pdf"),
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class DocumentEvaluation(BaseModel):
|
| 55 |
+
"""Evaluation result for retrieved documents."""
|
| 56 |
+
is_sufficient: bool = Field(description="Whether the documents provide sufficient information")
|
| 57 |
+
feedback: str = Field(description="Feedback about the document quality and what's missing")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class AgenticRAGState(MessagesState):
|
| 61 |
+
"""State for the Agentic RAG workflow using MessagesState as base."""
|
| 62 |
+
# MessagesState already handles messages with add_messages reducer
|
| 63 |
+
feedback: str = ""
|
| 64 |
+
is_sufficient: bool = False
|
| 65 |
+
retry_count: int = 0 # Track number of retries to prevent infinite loops
|
| 66 |
+
max_retries: int = 3 # Maximum number of query rewrites allowed
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class AgenticRAGChat(ChatInterface):
|
| 70 |
+
"""Agentic RAG implementation with dynamic retrieval and evaluation."""
|
| 71 |
+
|
| 72 |
+
def __init__(self):
|
| 73 |
+
self.llm = None
|
| 74 |
+
self.embeddings = None
|
| 75 |
+
self.evaluator_llm = None
|
| 76 |
+
self.vector_store = None
|
| 77 |
+
self.tools = []
|
| 78 |
+
self.graph = None
|
| 79 |
+
|
| 80 |
+
def initialize(self) -> None:
|
| 81 |
+
"""Initialize components for the Agentic RAG system."""
|
| 82 |
+
# Initialize models
|
| 83 |
+
self.llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 84 |
+
self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
| 85 |
+
self.evaluator_llm = self.llm.with_structured_output(DocumentEvaluation)
|
| 86 |
+
|
| 87 |
+
# Check if documents are configured
|
| 88 |
+
if FILE_PATHS and all(osp.exists(f) for f in FILE_PATHS):
|
| 89 |
+
# Load documents and create vector store
|
| 90 |
+
docs = self._load_and_process_documents()
|
| 91 |
+
print(f"Loading {len(docs)} documents into vector store")
|
| 92 |
+
self.vector_store = InMemoryVectorStore(embedding=self.embeddings)
|
| 93 |
+
self.vector_store.add_documents(docs)
|
| 94 |
+
else:
|
| 95 |
+
print("Warning: No documents configured for RAG. Add document paths to FILE_PATHS.")
|
| 96 |
+
# Create empty vector store
|
| 97 |
+
self.vector_store = InMemoryVectorStore(embedding=self.embeddings)
|
| 98 |
+
|
| 99 |
+
# Create tools
|
| 100 |
+
self.tools = self._create_tools()
|
| 101 |
+
|
| 102 |
+
# Create the graph
|
| 103 |
+
self.graph = self._create_graph()
|
| 104 |
+
|
| 105 |
+
def _load_and_process_documents(self) -> List[Document]:
|
| 106 |
+
"""Load and process documents for RAG."""
|
| 107 |
+
docs = []
|
| 108 |
+
for file_path in FILE_PATHS:
|
| 109 |
+
if not osp.exists(file_path):
|
| 110 |
+
print(f"Warning: File not found - {file_path}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
print(f"Loading document from {file_path}")
|
| 114 |
+
try:
|
| 115 |
+
loader = PyPDFLoader(file_path)
|
| 116 |
+
page_docs = loader.load()
|
| 117 |
+
|
| 118 |
+
# Combine all pages and split into chunks
|
| 119 |
+
combined_doc = "\n".join([doc.page_content for doc in page_docs])
|
| 120 |
+
|
| 121 |
+
# Use RecursiveCharacterTextSplitter for better chunking
|
| 122 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 123 |
+
chunk_size=1000,
|
| 124 |
+
chunk_overlap=200,
|
| 125 |
+
separators=["\n\n", "\n", ".", " ", ""]
|
| 126 |
+
)
|
| 127 |
+
chunks = text_splitter.split_text(combined_doc)
|
| 128 |
+
|
| 129 |
+
# Convert chunks to Document objects with metadata
|
| 130 |
+
docs.extend([
|
| 131 |
+
Document(
|
| 132 |
+
page_content=chunk,
|
| 133 |
+
metadata={"source": osp.basename(file_path)}
|
| 134 |
+
) for chunk in chunks
|
| 135 |
+
])
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"Error loading {file_path}: {e}")
|
| 138 |
+
|
| 139 |
+
return docs
|
| 140 |
+
|
| 141 |
+
def _create_tools(self) -> List[Any]:
|
| 142 |
+
"""Create retriever and search tools."""
|
| 143 |
+
tools = []
|
| 144 |
+
|
| 145 |
+
# Create retriever tool if we have documents
|
| 146 |
+
if self.vector_store:
|
| 147 |
+
retriever = self.vector_store.as_retriever(
|
| 148 |
+
search_type="similarity",
|
| 149 |
+
search_kwargs={"k": 3}
|
| 150 |
+
)
|
| 151 |
+
retriever_tool = create_retriever_tool(
|
| 152 |
+
retriever,
|
| 153 |
+
name="search_documents",
|
| 154 |
+
description=(
|
| 155 |
+
"Search through the document database. "
|
| 156 |
+
"Use this for questions about content in the loaded documents."
|
| 157 |
+
)
|
| 158 |
+
)
|
| 159 |
+
tools.append(retriever_tool)
|
| 160 |
+
|
| 161 |
+
# Create web search tool
|
| 162 |
+
@tool("web_search")
|
| 163 |
+
def search_web(query: str) -> list[dict]:
|
| 164 |
+
"""
|
| 165 |
+
Search the web for the latest information on any topic.
|
| 166 |
+
|
| 167 |
+
Args:
|
| 168 |
+
query: The search query to look up
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
List of search results with title, content, and URL
|
| 172 |
+
"""
|
| 173 |
+
search = TavilySearchResults(max_results=3)
|
| 174 |
+
return search.invoke(query)
|
| 175 |
+
|
| 176 |
+
tools.append(search_web)
|
| 177 |
+
|
| 178 |
+
return tools
|
| 179 |
+
|
| 180 |
+
def _generate_query_or_respond(self, state: AgenticRAGState):
|
| 181 |
+
"""Generate a query or respond based on the current state."""
|
| 182 |
+
print("Generating query or responding...")
|
| 183 |
+
|
| 184 |
+
prompt = PromptTemplate.from_template(
|
| 185 |
+
"""
|
| 186 |
+
You are a helpful assistant that can answer questions using the provided tools.
|
| 187 |
+
|
| 188 |
+
Available tools:
|
| 189 |
+
- search_documents: Search through loaded documents
|
| 190 |
+
- web_search: Search the web for information
|
| 191 |
+
|
| 192 |
+
Based on the user's query, decide whether to use tools or respond directly.
|
| 193 |
+
Use tools when you need specific information to answer the question accurately.
|
| 194 |
+
|
| 195 |
+
Query: {question}
|
| 196 |
+
"""
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# Get the latest message (either original or rewritten query)
|
| 200 |
+
question = state["messages"][-1].content
|
| 201 |
+
chain = prompt | self.llm.bind_tools(self.tools)
|
| 202 |
+
|
| 203 |
+
response = chain.invoke({"question": question})
|
| 204 |
+
return {"messages": [response]}
|
| 205 |
+
|
| 206 |
+
def _evaluate_documents(self, state: AgenticRAGState):
|
| 207 |
+
"""Evaluate the documents retrieved from the retriever tool."""
|
| 208 |
+
print("Evaluating documents...")
|
| 209 |
+
|
| 210 |
+
# Get original user question and retrieved docs
|
| 211 |
+
user_question = state["messages"][0].content
|
| 212 |
+
retrieved_docs = state["messages"][-1].content
|
| 213 |
+
|
| 214 |
+
chain = DOCUMENT_EVALUATOR_PROMPT | self.evaluator_llm
|
| 215 |
+
evaluation = chain.invoke({
|
| 216 |
+
"question": user_question,
|
| 217 |
+
"retrieved_docs": retrieved_docs
|
| 218 |
+
})
|
| 219 |
+
|
| 220 |
+
print(f"Evaluation result: {evaluation}")
|
| 221 |
+
return {
|
| 222 |
+
"is_sufficient": evaluation.is_sufficient,
|
| 223 |
+
"feedback": evaluation.feedback
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
def _synthesize_answer(self, state: AgenticRAGState):
|
| 227 |
+
"""Synthesize the final answer from retrieved documents."""
|
| 228 |
+
print("Synthesizing answer...")
|
| 229 |
+
|
| 230 |
+
user_question = state["messages"][0].content
|
| 231 |
+
retrieved_docs = state["messages"][-1].content
|
| 232 |
+
|
| 233 |
+
chain = DOCUMENT_SYNTHESIZER_PROMPT | self.llm
|
| 234 |
+
answer = chain.invoke({
|
| 235 |
+
"question": user_question,
|
| 236 |
+
"retrieved_docs": retrieved_docs
|
| 237 |
+
})
|
| 238 |
+
|
| 239 |
+
return {"messages": [answer]}
|
| 240 |
+
|
| 241 |
+
def _query_rewriter(self, state: AgenticRAGState):
|
| 242 |
+
"""Rewrite the query based on evaluation feedback."""
|
| 243 |
+
print("Rewriting query...")
|
| 244 |
+
|
| 245 |
+
user_question = state["messages"][0].content
|
| 246 |
+
retrieved_docs = state["messages"][-1].content
|
| 247 |
+
feedback = state["feedback"]
|
| 248 |
+
|
| 249 |
+
chain = QUERY_REWRITER_PROMPT | self.llm
|
| 250 |
+
new_query = chain.invoke({
|
| 251 |
+
"question": user_question,
|
| 252 |
+
"feedback": feedback,
|
| 253 |
+
"retrieved_docs": retrieved_docs
|
| 254 |
+
})
|
| 255 |
+
|
| 256 |
+
print(f"Rewritten query: {new_query.content}")
|
| 257 |
+
return {"messages": [new_query]}
|
| 258 |
+
|
| 259 |
+
def _create_graph(self) -> Any:
|
| 260 |
+
"""Create the agentic RAG graph."""
|
| 261 |
+
# Create the graph builder
|
| 262 |
+
graph_builder = StateGraph(AgenticRAGState)
|
| 263 |
+
|
| 264 |
+
# Add nodes
|
| 265 |
+
graph_builder.add_node("generate_query_or_respond", self._generate_query_or_respond)
|
| 266 |
+
graph_builder.add_node("retrieve_documents", ToolNode(self.tools))
|
| 267 |
+
graph_builder.add_node("evaluate_documents", self._evaluate_documents)
|
| 268 |
+
graph_builder.add_node("synthesize_answer", self._synthesize_answer)
|
| 269 |
+
graph_builder.add_node("query_rewriter", self._query_rewriter)
|
| 270 |
+
|
| 271 |
+
# Add edges
|
| 272 |
+
graph_builder.add_edge(START, "generate_query_or_respond")
|
| 273 |
+
|
| 274 |
+
# Conditional edge: if tools were called, retrieve documents; else end
|
| 275 |
+
graph_builder.add_conditional_edges(
|
| 276 |
+
"generate_query_or_respond",
|
| 277 |
+
tools_condition,
|
| 278 |
+
{
|
| 279 |
+
"tools": "retrieve_documents",
|
| 280 |
+
END: END,
|
| 281 |
+
},
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
# After retrieval, evaluate documents
|
| 285 |
+
graph_builder.add_edge("retrieve_documents", "evaluate_documents")
|
| 286 |
+
|
| 287 |
+
# Conditional edge: if sufficient, synthesize; else rewrite query
|
| 288 |
+
graph_builder.add_conditional_edges(
|
| 289 |
+
"evaluate_documents",
|
| 290 |
+
lambda x: "synthesize_answer" if x["is_sufficient"] else "query_rewriter",
|
| 291 |
+
{
|
| 292 |
+
"synthesize_answer": "synthesize_answer",
|
| 293 |
+
"query_rewriter": "query_rewriter",
|
| 294 |
+
},
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# After rewriting, generate new query
|
| 298 |
+
graph_builder.add_edge("query_rewriter", "generate_query_or_respond")
|
| 299 |
+
|
| 300 |
+
# After synthesizing, end
|
| 301 |
+
graph_builder.add_edge("synthesize_answer", END)
|
| 302 |
+
|
| 303 |
+
return graph_builder.compile()
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
|
| 307 |
+
"""Convert chat history to LangChain message format.
|
| 308 |
+
|
| 309 |
+
Args:
|
| 310 |
+
chat_history: List of dicts with 'role' and 'content' keys
|
| 311 |
+
|
| 312 |
+
Returns:
|
| 313 |
+
List of LangChain message objects
|
| 314 |
+
"""
|
| 315 |
+
messages = []
|
| 316 |
+
if chat_history:
|
| 317 |
+
for msg in chat_history:
|
| 318 |
+
if msg["role"] == "user":
|
| 319 |
+
messages.append(HumanMessage(content=msg["content"]))
|
| 320 |
+
elif msg["role"] == "assistant":
|
| 321 |
+
messages.append(AIMessage(content=msg["content"]))
|
| 322 |
+
return messages
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
|
| 327 |
+
"""Process a message using the Agentic RAG system."""
|
| 328 |
+
print("\n=== STARTING AGENTIC RAG QUERY ===")
|
| 329 |
+
print(f"Query: {message}")
|
| 330 |
+
|
| 331 |
+
# Convert chat history to messages
|
| 332 |
+
history_messages = self._convert_history_to_messages(chat_history)
|
| 333 |
+
|
| 334 |
+
# Add the current message
|
| 335 |
+
history_messages.append(HumanMessage(content=message))
|
| 336 |
+
|
| 337 |
+
# Create initial state with full conversation history
|
| 338 |
+
state = AgenticRAGState(
|
| 339 |
+
messages=history_messages, # Include full history instead of just current message
|
| 340 |
+
feedback="",
|
| 341 |
+
is_sufficient=False,
|
| 342 |
+
retry_count=0,
|
| 343 |
+
max_retries=3
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
# state = AgenticRAGState(
|
| 347 |
+
# messages=[HumanMessage(content=message)],
|
| 348 |
+
# feedback="",
|
| 349 |
+
# is_sufficient=False,
|
| 350 |
+
# retry_count=0,
|
| 351 |
+
# max_retries=3 # Limit to 3 retries to prevent infinite loops
|
| 352 |
+
# )
|
| 353 |
+
|
| 354 |
+
try:
|
| 355 |
+
# Run the workflow
|
| 356 |
+
# Run the workflow with increased recursion limit
|
| 357 |
+
config = {"recursion_limit": 3} # Increased but reasonable limit
|
| 358 |
+
result = self.graph.invoke(state, config=config)
|
| 359 |
+
|
| 360 |
+
print("\n=== RAG QUERY COMPLETED ===")
|
| 361 |
+
|
| 362 |
+
# Return the final answer
|
| 363 |
+
if result.get("messages"):
|
| 364 |
+
final_message = result["messages"][-1]
|
| 365 |
+
if hasattr(final_message, 'content'):
|
| 366 |
+
return final_message.content
|
| 367 |
+
else:
|
| 368 |
+
return str(final_message)
|
| 369 |
+
else:
|
| 370 |
+
return "I couldn't find relevant information to answer your question."
|
| 371 |
+
|
| 372 |
+
except Exception as e:
|
| 373 |
+
print(f"Error in RAG processing: {e}")
|
| 374 |
+
# Provide a more helpful fallback response
|
| 375 |
+
if "recursion" in str(e).lower():
|
| 376 |
+
return ("I had difficulty finding the exact information you're looking for in the documents. "
|
| 377 |
+
"Based on the available documents, I can see references to various offices and services, "
|
| 378 |
+
"but I couldn't find specific details about Mission Support Services. "
|
| 379 |
+
"You might want to try asking about a specific aspect or department.")
|
| 380 |
+
return f"I encountered an error while searching for information: {str(e)}"
|
agents/research_agent.py
ADDED
|
@@ -0,0 +1,451 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Deep Research Multi-Agent System implementation.
|
| 2 |
+
|
| 3 |
+
This implementation focuses on:
|
| 4 |
+
- Building a multi-agent system for comprehensive research
|
| 5 |
+
- Using LangGraph with MessagesState for proper state management
|
| 6 |
+
- Synthesizing research findings into structured reports
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from typing import Dict, List, Optional, Any, Annotated
|
| 10 |
+
from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
|
| 11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 12 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
+
from langchain_openai import ChatOpenAI
|
| 14 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 15 |
+
from langchain_core.tools import tool
|
| 16 |
+
from langgraph.graph import StateGraph, END, START, MessagesState
|
| 17 |
+
from langgraph.prebuilt import create_react_agent
|
| 18 |
+
from pydantic import BaseModel, Field
|
| 19 |
+
|
| 20 |
+
from core.chat_interface import ChatInterface
|
| 21 |
+
from opik.integrations.langchain import OpikTracer
|
| 22 |
+
from agents.prompts import (
|
| 23 |
+
RESEARCH_MANAGER_PROMPT,
|
| 24 |
+
REPORT_FINALIZER_PROMPT,
|
| 25 |
+
)
|
| 26 |
+
from dotenv import load_dotenv
|
| 27 |
+
load_dotenv()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class ResearchQuestion(BaseModel):
|
| 31 |
+
"""A research question with a title and description."""
|
| 32 |
+
title: str = Field(description="The title of the research question/section")
|
| 33 |
+
description: str = Field(description="Description of what to research for this section")
|
| 34 |
+
completed: bool = Field(default=False, description="Whether research has been completed for this section")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class ResearchPlan(BaseModel):
|
| 38 |
+
"""The overall research plan created by the Research Manager."""
|
| 39 |
+
topic: str = Field(description="The main research topic")
|
| 40 |
+
questions: List[ResearchQuestion] = Field(description="The list of research questions to investigate")
|
| 41 |
+
current_question_index: int = Field(default=0, description="Index of the current question being researched")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class Report(BaseModel):
|
| 45 |
+
"""The final research report structure."""
|
| 46 |
+
executive_summary: Optional[str] = Field(default=None, description="Executive summary of the research")
|
| 47 |
+
key_findings: Optional[str] = Field(default=None, description="Key findings from the research")
|
| 48 |
+
detailed_analysis: List[Dict[str, Any]] = Field(default_factory=list, description="Detailed analysis sections")
|
| 49 |
+
limitations: Optional[str] = Field(default=None, description="Limitations and further research")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class ResearchState(MessagesState):
|
| 53 |
+
"""State tracking for the deep research workflow using MessagesState as base."""
|
| 54 |
+
research_plan: Optional[ResearchPlan] = None
|
| 55 |
+
report: Optional[Report] = None
|
| 56 |
+
next_step: str = "research_manager"
|
| 57 |
+
|
| 58 |
+
# MessagesState already handles messages with add_messages reducer
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class DeepResearchChat(ChatInterface):
|
| 62 |
+
"""Deep research implementation using multi-agent system with proper state management."""
|
| 63 |
+
|
| 64 |
+
def __init__(self):
|
| 65 |
+
self.llm = None
|
| 66 |
+
self.research_manager = None
|
| 67 |
+
self.specialized_research_agent = None
|
| 68 |
+
self.finalizer = None
|
| 69 |
+
self.workflow = None
|
| 70 |
+
self.tavily_search_tool = None
|
| 71 |
+
|
| 72 |
+
def initialize(self) -> None:
|
| 73 |
+
"""Initialize components for the deep research system."""
|
| 74 |
+
# Initialize LLM model
|
| 75 |
+
self.llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 76 |
+
|
| 77 |
+
# Create Tavily search tool for agents
|
| 78 |
+
self.tavily_search_tool = TavilySearchResults(max_results=5)
|
| 79 |
+
|
| 80 |
+
# Create components
|
| 81 |
+
self.research_manager = self._create_research_manager()
|
| 82 |
+
self.specialized_research_agent = self._create_specialized_research_agent()
|
| 83 |
+
self.finalizer = self._create_finalizer()
|
| 84 |
+
|
| 85 |
+
# Create the workflow graph using these agents
|
| 86 |
+
self.workflow = self._create_workflow()
|
| 87 |
+
|
| 88 |
+
# Optional: Create Opik Tracer for monitoring
|
| 89 |
+
try:
|
| 90 |
+
self.tracer = OpikTracer(
|
| 91 |
+
graph=self.workflow.get_graph(xray=True),
|
| 92 |
+
project_name="nexus-research-workflow"
|
| 93 |
+
)
|
| 94 |
+
except:
|
| 95 |
+
self.tracer = None
|
| 96 |
+
print("Opik tracer not available, continuing without monitoring")
|
| 97 |
+
|
| 98 |
+
def _create_research_manager(self) -> Any:
|
| 99 |
+
"""Create the research manager agent."""
|
| 100 |
+
research_manager = (
|
| 101 |
+
RESEARCH_MANAGER_PROMPT
|
| 102 |
+
| self.llm.with_structured_output(ResearchPlan)
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
return research_manager
|
| 106 |
+
|
| 107 |
+
def _create_specialized_research_agent(self) -> Any:
|
| 108 |
+
"""Create specialized research agents."""
|
| 109 |
+
# Create search tool for the agent
|
| 110 |
+
@tool("web_search")
|
| 111 |
+
def search_web(query: str) -> str:
|
| 112 |
+
"""Search the web for information on the research topic."""
|
| 113 |
+
results = self.tavily_search_tool.invoke(query)
|
| 114 |
+
formatted_results = []
|
| 115 |
+
|
| 116 |
+
for i, result in enumerate(results, 1):
|
| 117 |
+
formatted_results.append(f"Result {i}:")
|
| 118 |
+
formatted_results.append(f"Title: {result.get('title', 'N/A')}")
|
| 119 |
+
formatted_results.append(f"Content: {result.get('content', 'N/A')}")
|
| 120 |
+
formatted_results.append(f"URL: {result.get('url', 'N/A')}")
|
| 121 |
+
formatted_results.append("")
|
| 122 |
+
|
| 123 |
+
return "\n".join(formatted_results)
|
| 124 |
+
|
| 125 |
+
# Create the specialized agent
|
| 126 |
+
tools = [search_web]
|
| 127 |
+
|
| 128 |
+
# Define the system message for the specialized research agent
|
| 129 |
+
system_message = """You are a Specialized Research Agent responsible for thoroughly researching a specific topic section.
|
| 130 |
+
|
| 131 |
+
Process:
|
| 132 |
+
1. Analyze the research question and description
|
| 133 |
+
2. Generate effective search queries to gather information
|
| 134 |
+
3. Use the web_search tool to find relevant information
|
| 135 |
+
4. Synthesize findings into a comprehensive section
|
| 136 |
+
5. Include proper citations to your sources
|
| 137 |
+
|
| 138 |
+
Your response should be:
|
| 139 |
+
- Thorough (at least 500 words)
|
| 140 |
+
- Well-structured with subsections
|
| 141 |
+
- Based on factual information (not made up)
|
| 142 |
+
- Include proper citations to sources
|
| 143 |
+
|
| 144 |
+
Always critically evaluate information and ensure you cover the topic comprehensively.
|
| 145 |
+
"""
|
| 146 |
+
|
| 147 |
+
# Create the specialized research agent
|
| 148 |
+
specialized_agent = create_react_agent(
|
| 149 |
+
model=self.llm,
|
| 150 |
+
tools=tools,
|
| 151 |
+
prompt=system_message
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return specialized_agent
|
| 155 |
+
|
| 156 |
+
def _create_finalizer(self) -> Any:
|
| 157 |
+
"""Create the finalizer component."""
|
| 158 |
+
finalizer = REPORT_FINALIZER_PROMPT | self.llm | StrOutputParser()
|
| 159 |
+
return finalizer
|
| 160 |
+
|
| 161 |
+
def _create_workflow(self) -> Any:
|
| 162 |
+
"""Create the multi-agent deep research workflow with proper MessagesState usage."""
|
| 163 |
+
# Create a state graph
|
| 164 |
+
workflow = StateGraph(ResearchState)
|
| 165 |
+
|
| 166 |
+
# Define the nodes
|
| 167 |
+
|
| 168 |
+
# Research Manager Node
|
| 169 |
+
def research_manager_node(state: ResearchState) -> Dict:
|
| 170 |
+
"""Create the research plan and update messages."""
|
| 171 |
+
print("\n=== RESEARCH MANAGER NODE ===")
|
| 172 |
+
|
| 173 |
+
# Get the topic from the LAST user message (not first)
|
| 174 |
+
# This handles conversation context better
|
| 175 |
+
user_messages = [msg for msg in state["messages"] if isinstance(msg, HumanMessage)]
|
| 176 |
+
topic = user_messages[-1].content if user_messages else state["messages"][-1].content
|
| 177 |
+
|
| 178 |
+
print(f"Planning research for topic: {topic}")
|
| 179 |
+
|
| 180 |
+
# Generate research plan
|
| 181 |
+
research_plan = self.research_manager.invoke({"topic": topic})
|
| 182 |
+
print(f"Created research plan with {len(research_plan.questions)} questions")
|
| 183 |
+
|
| 184 |
+
# Initialize empty report structure
|
| 185 |
+
report = Report(
|
| 186 |
+
detailed_analysis=[
|
| 187 |
+
{"title": q.title, "content": None, "sources": []}
|
| 188 |
+
for q in research_plan.questions
|
| 189 |
+
]
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Add planning message to state
|
| 193 |
+
planning_msg = AIMessage(
|
| 194 |
+
content=f"Research plan created with {len(research_plan.questions)} sections to investigate."
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
return {
|
| 198 |
+
"messages": [planning_msg],
|
| 199 |
+
"research_plan": research_plan,
|
| 200 |
+
"report": report,
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
# Specialized Research Node
|
| 204 |
+
def specialized_research_node(state: ResearchState) -> Dict:
|
| 205 |
+
"""Conduct research on the current question and update messages."""
|
| 206 |
+
print("\n=== SPECIALIZED RESEARCH NODE ===")
|
| 207 |
+
|
| 208 |
+
research_plan = state["research_plan"]
|
| 209 |
+
assert research_plan is not None, "Research plan is None"
|
| 210 |
+
current_index = research_plan.current_question_index
|
| 211 |
+
|
| 212 |
+
if current_index >= len(research_plan.questions):
|
| 213 |
+
print("All research questions completed")
|
| 214 |
+
return {}
|
| 215 |
+
|
| 216 |
+
current_question = research_plan.questions[current_index]
|
| 217 |
+
print(f"Researching question {current_index + 1}/{len(research_plan.questions)}: "
|
| 218 |
+
f"{current_question.title}")
|
| 219 |
+
|
| 220 |
+
# Create input for the specialized agent
|
| 221 |
+
research_input = {
|
| 222 |
+
"messages": [
|
| 223 |
+
("user", f"""Research the following topic thoroughly:
|
| 224 |
+
|
| 225 |
+
Topic: {current_question.title}
|
| 226 |
+
|
| 227 |
+
Description: {current_question.description}
|
| 228 |
+
|
| 229 |
+
Provide a detailed analysis with proper citations to sources.
|
| 230 |
+
""")
|
| 231 |
+
]
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
# Invoke the specialized agent
|
| 235 |
+
result = self.specialized_research_agent.invoke(research_input)
|
| 236 |
+
|
| 237 |
+
# Extract content from the result
|
| 238 |
+
last_message = result["messages"][-1]
|
| 239 |
+
if isinstance(last_message, tuple):
|
| 240 |
+
content = last_message[1]
|
| 241 |
+
else:
|
| 242 |
+
content = last_message.content
|
| 243 |
+
|
| 244 |
+
# Parse out sources from the content
|
| 245 |
+
sources = []
|
| 246 |
+
for line in content.split("\n"):
|
| 247 |
+
if "http" in line and "://" in line:
|
| 248 |
+
sources.append(line.strip())
|
| 249 |
+
|
| 250 |
+
# Update the research plan
|
| 251 |
+
research_plan.questions[current_index].completed = True
|
| 252 |
+
|
| 253 |
+
# Update the report
|
| 254 |
+
report = state["report"]
|
| 255 |
+
assert report is not None, "Report is None"
|
| 256 |
+
report.detailed_analysis[current_index]["content"] = content
|
| 257 |
+
report.detailed_analysis[current_index]["sources"] = sources
|
| 258 |
+
|
| 259 |
+
# Move to the next question
|
| 260 |
+
research_plan.current_question_index += 1
|
| 261 |
+
|
| 262 |
+
# Add research progress message
|
| 263 |
+
progress_msg = AIMessage(
|
| 264 |
+
content=f"Completed research for section: {current_question.title}"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
return {
|
| 268 |
+
"messages": [progress_msg],
|
| 269 |
+
"research_plan": research_plan,
|
| 270 |
+
"report": report,
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# Research Evaluator Node
|
| 274 |
+
def evaluator_node(state: ResearchState) -> Dict:
|
| 275 |
+
"""Evaluate the research progress and determine next steps."""
|
| 276 |
+
print("\n=== EVALUATOR NODE ===")
|
| 277 |
+
|
| 278 |
+
research_plan = state["research_plan"]
|
| 279 |
+
assert research_plan is not None, "Research plan is None"
|
| 280 |
+
|
| 281 |
+
# Check if we've completed all questions
|
| 282 |
+
all_completed = research_plan.current_question_index >= len(research_plan.questions)
|
| 283 |
+
|
| 284 |
+
if all_completed:
|
| 285 |
+
print("All research questions have been addressed. Moving to finalizer.")
|
| 286 |
+
eval_msg = AIMessage(content="All research sections completed. Finalizing report...")
|
| 287 |
+
return {
|
| 288 |
+
"messages": [eval_msg],
|
| 289 |
+
"next_step": "finalize"
|
| 290 |
+
}
|
| 291 |
+
else:
|
| 292 |
+
# We have more sections to research
|
| 293 |
+
next_section = research_plan.questions[research_plan.current_question_index].title
|
| 294 |
+
print(f"More research needed. Moving to next section: {next_section}")
|
| 295 |
+
eval_msg = AIMessage(content=f"Moving to research section: {next_section}")
|
| 296 |
+
return {
|
| 297 |
+
"messages": [eval_msg],
|
| 298 |
+
"next_step": "research"
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
# Finalizer Node
|
| 302 |
+
def finalizer_node(state: ResearchState) -> Dict:
|
| 303 |
+
"""Finalize the research report and update messages."""
|
| 304 |
+
print("\n=== FINALIZER NODE ===")
|
| 305 |
+
|
| 306 |
+
research_plan = state["research_plan"]
|
| 307 |
+
report = state["report"]
|
| 308 |
+
assert report is not None, "Report is None"
|
| 309 |
+
assert research_plan is not None, "Research plan is None"
|
| 310 |
+
|
| 311 |
+
# Prepare the detailed analysis for the finalizer
|
| 312 |
+
detailed_analysis = "\n\n".join([
|
| 313 |
+
f"## {section['title']}\n{section['content']}"
|
| 314 |
+
for section in report.detailed_analysis
|
| 315 |
+
if section['content'] is not None
|
| 316 |
+
])
|
| 317 |
+
|
| 318 |
+
# Generate the final sections
|
| 319 |
+
final_sections = self.finalizer.invoke({
|
| 320 |
+
"topic": research_plan.topic,
|
| 321 |
+
"detailed_analysis": detailed_analysis
|
| 322 |
+
})
|
| 323 |
+
|
| 324 |
+
# Parse the final sections
|
| 325 |
+
sections = final_sections.split("\n\n")
|
| 326 |
+
|
| 327 |
+
# Update the report
|
| 328 |
+
if len(sections) >= 3:
|
| 329 |
+
report.executive_summary = sections[0].replace("# Executive Summary", "").strip()
|
| 330 |
+
report.key_findings = sections[1].replace("# Key Findings", "").strip()
|
| 331 |
+
report.limitations = sections[2].replace("# Limitations and Further Research", "").strip()
|
| 332 |
+
|
| 333 |
+
# Format the final report
|
| 334 |
+
report_message = self._format_report(report)
|
| 335 |
+
|
| 336 |
+
return {
|
| 337 |
+
"messages": [report_message],
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
# Add nodes to the graph
|
| 341 |
+
workflow.add_node("research_manager", research_manager_node)
|
| 342 |
+
workflow.add_node("specialized_research", specialized_research_node)
|
| 343 |
+
workflow.add_node("evaluate", evaluator_node)
|
| 344 |
+
workflow.add_node("finalizer", finalizer_node)
|
| 345 |
+
|
| 346 |
+
# Add edges
|
| 347 |
+
workflow.add_edge(START, "research_manager")
|
| 348 |
+
workflow.add_edge("research_manager", "specialized_research")
|
| 349 |
+
workflow.add_edge("specialized_research", "evaluate")
|
| 350 |
+
|
| 351 |
+
# Add conditional edges from evaluator
|
| 352 |
+
workflow.add_conditional_edges(
|
| 353 |
+
"evaluate",
|
| 354 |
+
lambda x: x["next_step"],
|
| 355 |
+
{
|
| 356 |
+
"research": "specialized_research",
|
| 357 |
+
"finalize": "finalizer"
|
| 358 |
+
}
|
| 359 |
+
)
|
| 360 |
+
workflow.add_edge("finalizer", END)
|
| 361 |
+
|
| 362 |
+
# Compile the workflow
|
| 363 |
+
return workflow.compile()
|
| 364 |
+
|
| 365 |
+
def _format_report(self, report: Report) -> AIMessage:
|
| 366 |
+
"""Format the research report for presentation."""
|
| 367 |
+
sections = [
|
| 368 |
+
"# Research Report\n",
|
| 369 |
+
|
| 370 |
+
"## Executive Summary\n" + (report.executive_summary or "N/A"),
|
| 371 |
+
|
| 372 |
+
"## Key Findings\n" + (report.key_findings or "N/A"),
|
| 373 |
+
|
| 374 |
+
"## Detailed Analysis"
|
| 375 |
+
]
|
| 376 |
+
|
| 377 |
+
# Add detailed analysis sections
|
| 378 |
+
for section in report.detailed_analysis:
|
| 379 |
+
if section["content"]:
|
| 380 |
+
sections.append(f"### {section['title']}\n{section['content']}")
|
| 381 |
+
|
| 382 |
+
if section["sources"]:
|
| 383 |
+
sources = "\n".join([f"- {source}" for source in section["sources"]])
|
| 384 |
+
sections.append(f"**Sources:**\n{sources}")
|
| 385 |
+
|
| 386 |
+
# Add limitations
|
| 387 |
+
sections.append("## Limitations and Further Research\n" + (report.limitations or "N/A"))
|
| 388 |
+
|
| 389 |
+
return AIMessage(content="\n\n".join(sections))
|
| 390 |
+
|
| 391 |
+
def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
|
| 392 |
+
"""Convert chat history to LangChain message format.
|
| 393 |
+
|
| 394 |
+
Args:
|
| 395 |
+
chat_history: List of dicts with 'role' and 'content' keys
|
| 396 |
+
|
| 397 |
+
Returns:
|
| 398 |
+
List of LangChain message objects
|
| 399 |
+
"""
|
| 400 |
+
messages = []
|
| 401 |
+
if chat_history:
|
| 402 |
+
for msg in chat_history:
|
| 403 |
+
if msg["role"] == "user":
|
| 404 |
+
messages.append(HumanMessage(content=msg["content"]))
|
| 405 |
+
elif msg["role"] == "assistant":
|
| 406 |
+
messages.append(AIMessage(content=msg["content"]))
|
| 407 |
+
return messages
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
|
| 411 |
+
"""Process a message using the deep research system with proper state management."""
|
| 412 |
+
print("\n=== STARTING DEEP RESEARCH ===")
|
| 413 |
+
print(f"Research Topic: {message}")
|
| 414 |
+
|
| 415 |
+
# Convert chat history to messages
|
| 416 |
+
history_messages = self._convert_history_to_messages(chat_history)
|
| 417 |
+
|
| 418 |
+
# Add the current message
|
| 419 |
+
history_messages.append(HumanMessage(content=message))
|
| 420 |
+
|
| 421 |
+
# Create initial state with full conversation history
|
| 422 |
+
initial_state = ResearchState(
|
| 423 |
+
messages=history_messages, # Include full history instead of just current message
|
| 424 |
+
research_plan=None,
|
| 425 |
+
report=None,
|
| 426 |
+
next_step="research_manager"
|
| 427 |
+
)
|
| 428 |
+
# # Create initial state using MessagesState
|
| 429 |
+
# initial_state = ResearchState(
|
| 430 |
+
# messages=[HumanMessage(content=message)],
|
| 431 |
+
# research_plan=None,
|
| 432 |
+
# report=None,
|
| 433 |
+
# next_step="research_manager"
|
| 434 |
+
# )
|
| 435 |
+
|
| 436 |
+
# Run workflow with optional tracing
|
| 437 |
+
config = {"callbacks": [self.tracer]} if self.tracer else {}
|
| 438 |
+
result = self.workflow.invoke(initial_state, config=config)
|
| 439 |
+
|
| 440 |
+
print("\n=== RESEARCH COMPLETED ===")
|
| 441 |
+
|
| 442 |
+
# Write the final report to a file
|
| 443 |
+
try:
|
| 444 |
+
with open("final_report.md", "w") as f:
|
| 445 |
+
f.write(result["messages"][-1].content)
|
| 446 |
+
print("Report saved to final_report.md")
|
| 447 |
+
except Exception as e:
|
| 448 |
+
print(f"Could not save report to file: {e}")
|
| 449 |
+
|
| 450 |
+
# Return the final report
|
| 451 |
+
return result["messages"][-1].content
|
agents/tool_agent.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tool-Using Agent implementation.
|
| 2 |
+
|
| 3 |
+
This implementation focuses on:
|
| 4 |
+
- Converting tools to use with LangGraph
|
| 5 |
+
- Using the ReAct pattern for autonomous tool selection
|
| 6 |
+
- Handling calculator, datetime, and weather queries
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from typing import Dict, List, Optional, Any, Annotated
|
| 10 |
+
import io
|
| 11 |
+
import contextlib
|
| 12 |
+
from langchain_core.tools import tool
|
| 13 |
+
from langchain.chat_models import init_chat_model
|
| 14 |
+
from langgraph.prebuilt import create_react_agent
|
| 15 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 16 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 17 |
+
|
| 18 |
+
from core.chat_interface import ChatInterface
|
| 19 |
+
from tools.calculator import Calculator
|
| 20 |
+
from dotenv import load_dotenv
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ToolUsingAgentChat(ChatInterface):
|
| 25 |
+
"""Tool-using agent implementation with calculator, datetime, and weather tools."""
|
| 26 |
+
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.llm = None
|
| 29 |
+
self.tools = []
|
| 30 |
+
self.graph = None
|
| 31 |
+
|
| 32 |
+
def initialize(self) -> None:
|
| 33 |
+
"""Initialize components for the tool-using agent.
|
| 34 |
+
|
| 35 |
+
This sets up:
|
| 36 |
+
- The chat model
|
| 37 |
+
- Tools for calculator, DateTime, and weather
|
| 38 |
+
- The ReAct agent using LangGraph
|
| 39 |
+
"""
|
| 40 |
+
# Initialize chat model
|
| 41 |
+
self.llm = init_chat_model("gpt-4o", model_provider="openai")
|
| 42 |
+
|
| 43 |
+
# Create tools
|
| 44 |
+
self.tools = self._create_tools()
|
| 45 |
+
|
| 46 |
+
# Create the ReAct agent graph with the tools
|
| 47 |
+
# The agent will autonomously decide which tools to use based on the query
|
| 48 |
+
self.graph = create_react_agent(
|
| 49 |
+
model=self.llm,
|
| 50 |
+
tools=self.tools,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
def _create_tools(self) -> List[Any]:
|
| 54 |
+
"""Create and return the list of tools for the agent.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
List: List of tool objects
|
| 58 |
+
"""
|
| 59 |
+
# Calculator tool for mathematical operations
|
| 60 |
+
@tool
|
| 61 |
+
def calculator(expression: Annotated[str, "The mathematical expression to evaluate"]) -> str:
|
| 62 |
+
"""Evaluate a mathematical expression using basic arithmetic operations (+, -, *, /, %, //).
|
| 63 |
+
Examples: '5 + 3', '10 * (2 + 3)', '15 / 3', '17 % 5', '20 // 3'
|
| 64 |
+
"""
|
| 65 |
+
result = Calculator.evaluate_expression(expression)
|
| 66 |
+
if isinstance(result, str) and result.startswith("Error"):
|
| 67 |
+
raise ValueError(result)
|
| 68 |
+
return str(result)
|
| 69 |
+
|
| 70 |
+
# DateTime tool for date/time operations
|
| 71 |
+
@tool
|
| 72 |
+
def execute_datetime_code(code: Annotated[str, "Python code to execute for datetime operations"]) -> str:
|
| 73 |
+
"""Execute Python code for datetime operations. The code should use datetime or time modules.
|
| 74 |
+
Examples:
|
| 75 |
+
- 'print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))'
|
| 76 |
+
- 'print(datetime.datetime.now().strftime("%A, %B %d, %Y"))'
|
| 77 |
+
- 'print(datetime.datetime.now().year)'
|
| 78 |
+
- 'print((datetime.datetime(2025, 12, 25) - datetime.datetime.now()).days)'
|
| 79 |
+
"""
|
| 80 |
+
output_buffer = io.StringIO()
|
| 81 |
+
code = f"import datetime\nimport time\n{code}"
|
| 82 |
+
try:
|
| 83 |
+
with contextlib.redirect_stdout(output_buffer):
|
| 84 |
+
exec(code)
|
| 85 |
+
return output_buffer.getvalue().strip()
|
| 86 |
+
except Exception as e:
|
| 87 |
+
raise ValueError(f"Error executing datetime code: {str(e)}")
|
| 88 |
+
|
| 89 |
+
# Weather tool using Tavily search
|
| 90 |
+
@tool
|
| 91 |
+
def get_weather(location: Annotated[str, "The location to get weather for (city, country)"]) -> str:
|
| 92 |
+
"""Get the current weather for a given location using web search.
|
| 93 |
+
Examples: 'New York, USA', 'London, UK', 'Tokyo, Japan', 'Paris, France'
|
| 94 |
+
"""
|
| 95 |
+
search = TavilySearchResults(max_results=3)
|
| 96 |
+
query = f"current weather temperature conditions {location} today"
|
| 97 |
+
results = search.invoke(query)
|
| 98 |
+
|
| 99 |
+
if not results:
|
| 100 |
+
return f"Could not find weather information for {location}"
|
| 101 |
+
|
| 102 |
+
# Combine results for better coverage
|
| 103 |
+
weather_info = []
|
| 104 |
+
for result in results[:2]: # Use top 2 results
|
| 105 |
+
content = result.get("content", "")
|
| 106 |
+
if content:
|
| 107 |
+
weather_info.append(content)
|
| 108 |
+
|
| 109 |
+
if weather_info:
|
| 110 |
+
return " ".join(weather_info)
|
| 111 |
+
else:
|
| 112 |
+
return f"Could not find detailed weather information for {location}"
|
| 113 |
+
|
| 114 |
+
return [calculator, execute_datetime_code, get_weather]
|
| 115 |
+
|
| 116 |
+
def _convert_history_to_messages(self, chat_history: Optional[List[Dict[str, str]]]) -> List:
|
| 117 |
+
"""Convert chat history to LangChain message format.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
chat_history: List of dicts with 'role' and 'content' keys
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
List of LangChain message objects
|
| 124 |
+
"""
|
| 125 |
+
messages = []
|
| 126 |
+
if chat_history:
|
| 127 |
+
for msg in chat_history:
|
| 128 |
+
if msg["role"] == "user":
|
| 129 |
+
messages.append(HumanMessage(content=msg["content"]))
|
| 130 |
+
elif msg["role"] == "assistant":
|
| 131 |
+
messages.append(AIMessage(content=msg["content"]))
|
| 132 |
+
return messages
|
| 133 |
+
|
| 134 |
+
def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
|
| 135 |
+
"""Process a message using the tool-using agent.
|
| 136 |
+
|
| 137 |
+
The ReAct agent will:
|
| 138 |
+
1. Consider the full conversation history
|
| 139 |
+
2. Analyze the query in context
|
| 140 |
+
3. Decide which tool(s) to use
|
| 141 |
+
4. Execute the tool(s)
|
| 142 |
+
5. Formulate a response
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
message: The user's input message
|
| 146 |
+
chat_history: List of previous chat messages
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
str: The assistant's response
|
| 150 |
+
"""
|
| 151 |
+
try:
|
| 152 |
+
# Convert chat history to messages
|
| 153 |
+
history_messages = self._convert_history_to_messages(chat_history)
|
| 154 |
+
|
| 155 |
+
# Add the current message
|
| 156 |
+
history_messages.append(HumanMessage(content=message))
|
| 157 |
+
|
| 158 |
+
# Run the graph with the full conversation history
|
| 159 |
+
result = self.graph.invoke({"messages": history_messages})
|
| 160 |
+
|
| 161 |
+
# Run the graph with the user's message
|
| 162 |
+
# result = self.graph.invoke({"messages": [("user", message)]})
|
| 163 |
+
|
| 164 |
+
# Extract the final response
|
| 165 |
+
if result.get("messages"):
|
| 166 |
+
final_message = result["messages"][-1]
|
| 167 |
+
# Handle different message formats
|
| 168 |
+
if hasattr(final_message, 'content'):
|
| 169 |
+
return final_message.content
|
| 170 |
+
else:
|
| 171 |
+
return str(final_message)
|
| 172 |
+
else:
|
| 173 |
+
return "I couldn't process that request. Please try rephrasing."
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"Error in tool agent: {e}")
|
| 177 |
+
return f"I encountered an error while processing your request: {str(e)}. Please try again."
|
agents/unified_chat.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unified Chat Implementation combining all agent functionalities.
|
| 2 |
+
|
| 3 |
+
This implementation combines:
|
| 4 |
+
- Tool-using agent (calculator, datetime, weather)
|
| 5 |
+
- Agentic RAG for document queries
|
| 6 |
+
- Deep research for comprehensive analysis
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os.path as osp
|
| 10 |
+
import io
|
| 11 |
+
import contextlib
|
| 12 |
+
from typing import Dict, List, Optional, Any, Annotated
|
| 13 |
+
from enum import Enum
|
| 14 |
+
|
| 15 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 16 |
+
from langchain_core.tools import tool
|
| 17 |
+
from langchain.chat_models import init_chat_model
|
| 18 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
| 19 |
+
from langgraph.prebuilt import create_react_agent
|
| 20 |
+
from langgraph.graph import MessagesState
|
| 21 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 22 |
+
from langchain_core.prompts import PromptTemplate
|
| 23 |
+
|
| 24 |
+
from core.chat_interface import ChatInterface
|
| 25 |
+
from tools.calculator import Calculator
|
| 26 |
+
from agents.prompts import QUERY_CLASSIFIER_PROMPT
|
| 27 |
+
|
| 28 |
+
# Import components from individual agents
|
| 29 |
+
from agents.tool_agent import ToolUsingAgentChat
|
| 30 |
+
from agents.rag_agent import AgenticRAGChat
|
| 31 |
+
from agents.research_agent import DeepResearchChat
|
| 32 |
+
from dotenv import load_dotenv
|
| 33 |
+
load_dotenv()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class QueryType(Enum):
|
| 37 |
+
"""Types of queries the system can handle."""
|
| 38 |
+
SIMPLE_TOOL = "simple_tool" # Calculator, datetime, weather
|
| 39 |
+
AGENTIC_RAG = "agentic_rag" # OPM document queries
|
| 40 |
+
DEEP_RESEARCH = "deep_research" # Comprehensive research
|
| 41 |
+
GENERAL = "general" # General conversation
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class UnifiedChatState(MessagesState):
|
| 45 |
+
"""Unified state that uses MessagesState for proper message handling."""
|
| 46 |
+
query_type: Optional[str] = None
|
| 47 |
+
current_agent: Optional[str] = None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class UnifiedChat(ChatInterface):
|
| 51 |
+
"""Unified chat implementation that routes queries to appropriate handlers."""
|
| 52 |
+
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.router_llm = None
|
| 55 |
+
self.tool_agent = None
|
| 56 |
+
self.rag_agent = None
|
| 57 |
+
self.research_agent = None
|
| 58 |
+
self.query_classifier = None
|
| 59 |
+
|
| 60 |
+
def initialize(self) -> None:
|
| 61 |
+
"""Initialize all components for the unified system."""
|
| 62 |
+
print("Initializing Nexus AI Unified System...")
|
| 63 |
+
|
| 64 |
+
# Initialize router LLM for query classification
|
| 65 |
+
self.router_llm = ChatOpenAI(model="gpt-4o", temperature=0)
|
| 66 |
+
|
| 67 |
+
# Initialize query classifier
|
| 68 |
+
self._create_query_classifier()
|
| 69 |
+
|
| 70 |
+
# Initialize all sub-agents
|
| 71 |
+
print("Initializing Tool-Using Agent...")
|
| 72 |
+
self.tool_agent = ToolUsingAgentChat()
|
| 73 |
+
self.tool_agent.initialize()
|
| 74 |
+
|
| 75 |
+
print("Initializing Agentic RAG...")
|
| 76 |
+
self.rag_agent = AgenticRAGChat()
|
| 77 |
+
self.rag_agent.initialize()
|
| 78 |
+
|
| 79 |
+
print("Initializing Deep Research Agent...")
|
| 80 |
+
self.research_agent = DeepResearchChat()
|
| 81 |
+
self.research_agent.initialize()
|
| 82 |
+
|
| 83 |
+
print("Nexus AI System initialized successfully!")
|
| 84 |
+
|
| 85 |
+
def _create_query_classifier(self):
|
| 86 |
+
"""Create the query classifier that routes to appropriate handlers."""
|
| 87 |
+
self.query_classifier = QUERY_CLASSIFIER_PROMPT | self.router_llm
|
| 88 |
+
|
| 89 |
+
def _classify_query(self, query: str) -> QueryType:
|
| 90 |
+
"""Classify the query to determine which handler to use."""
|
| 91 |
+
try:
|
| 92 |
+
result = self.query_classifier.invoke({"query": query})
|
| 93 |
+
classification = result.content.strip().upper()
|
| 94 |
+
|
| 95 |
+
print(f"Query Classification: {classification}")
|
| 96 |
+
|
| 97 |
+
# Map to enum
|
| 98 |
+
if classification == "SIMPLE_TOOL":
|
| 99 |
+
return QueryType.SIMPLE_TOOL
|
| 100 |
+
elif classification == "AGENTIC_RAG":
|
| 101 |
+
return QueryType.AGENTIC_RAG
|
| 102 |
+
elif classification == "DEEP_RESEARCH":
|
| 103 |
+
return QueryType.DEEP_RESEARCH
|
| 104 |
+
else:
|
| 105 |
+
return QueryType.GENERAL
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"Error in query classification: {e}")
|
| 109 |
+
# Default to general tool agent for safety
|
| 110 |
+
return QueryType.GENERAL
|
| 111 |
+
|
| 112 |
+
def process_message(self, message: str, chat_history: Optional[List[Dict[str, str]]] = None) -> str:
|
| 113 |
+
"""Process a message by routing to the appropriate handler.
|
| 114 |
+
|
| 115 |
+
Uses MessagesState internally for proper state management when needed.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
message: The user's input message
|
| 119 |
+
chat_history: List of previous chat messages
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
str: The assistant's response
|
| 123 |
+
"""
|
| 124 |
+
print(f"\n{'='*50}")
|
| 125 |
+
print(f"Processing query: {message}")
|
| 126 |
+
print(f"{'='*50}")
|
| 127 |
+
|
| 128 |
+
# Classify the query
|
| 129 |
+
query_type = self._classify_query(message)
|
| 130 |
+
|
| 131 |
+
# Create state with messages for tracking
|
| 132 |
+
state = UnifiedChatState(
|
| 133 |
+
messages=[HumanMessage(content=message)],
|
| 134 |
+
query_type=query_type.value,
|
| 135 |
+
current_agent=None
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Route to appropriate handler
|
| 139 |
+
try:
|
| 140 |
+
if query_type == QueryType.SIMPLE_TOOL or query_type == QueryType.GENERAL:
|
| 141 |
+
print("→ Routing to Tool-Using Agent")
|
| 142 |
+
state["current_agent"] = "tool_agent"
|
| 143 |
+
return self.tool_agent.process_message(message, chat_history)
|
| 144 |
+
|
| 145 |
+
elif query_type == QueryType.AGENTIC_RAG:
|
| 146 |
+
print("→ Routing to Agentic RAG")
|
| 147 |
+
state["current_agent"] = "rag_agent"
|
| 148 |
+
return self.rag_agent.process_message(message, chat_history)
|
| 149 |
+
|
| 150 |
+
elif query_type == QueryType.DEEP_RESEARCH:
|
| 151 |
+
print("→ Routing to Deep Research Agent")
|
| 152 |
+
state["current_agent"] = "research_agent"
|
| 153 |
+
return self.research_agent.process_message(message, chat_history)
|
| 154 |
+
|
| 155 |
+
else:
|
| 156 |
+
# Fallback to tool agent for general queries
|
| 157 |
+
print("→ Routing to Tool-Using Agent (fallback)")
|
| 158 |
+
state["current_agent"] = "tool_agent"
|
| 159 |
+
return self.tool_agent.process_message(message, chat_history)
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
error_msg = f"Error processing message: {str(e)}"
|
| 163 |
+
print(error_msg)
|
| 164 |
+
# Add error to state messages
|
| 165 |
+
state["messages"].append(AIMessage(content=f"I encountered an error: {str(e)}"))
|
| 166 |
+
return f"I encountered an error while processing your request: {str(e)}. Please try rephrasing your question."
|
app.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main application file for Nexus AI Assistant.
|
| 2 |
+
|
| 3 |
+
This file handles the Gradio interface and orchestrates the chat implementations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
from typing import List, Dict, Tuple
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Add the parent directory to Python path so imports work correctly
|
| 14 |
+
# root_path = Path(__file__).resolve().parent
|
| 15 |
+
# sys.path.append(str(root_path))
|
| 16 |
+
# print(f"root path: {root_path}")
|
| 17 |
+
|
| 18 |
+
import gradio as gr
|
| 19 |
+
# Import the unified chat implementation
|
| 20 |
+
from agents.unified_chat import UnifiedChat
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
def create_demo():
|
| 24 |
+
"""Create the Gradio demo for the unified chat system."""
|
| 25 |
+
|
| 26 |
+
# Initialize the unified chat implementation
|
| 27 |
+
chat_impl = UnifiedChat()
|
| 28 |
+
|
| 29 |
+
# Initialize the chat implementation
|
| 30 |
+
try:
|
| 31 |
+
chat_impl.initialize()
|
| 32 |
+
init_status = "✅ All systems ready!"
|
| 33 |
+
except Exception as e:
|
| 34 |
+
init_status = f"❌ Error initializing: {str(e)}"
|
| 35 |
+
print(init_status)
|
| 36 |
+
|
| 37 |
+
def respond(message: str, history: List[Tuple[str, str]]) -> str:
|
| 38 |
+
"""Process a message and return the response.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
message: The user's input message
|
| 42 |
+
history: List of tuples containing (user_message, assistant_response)
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
str: The assistant's response
|
| 46 |
+
"""
|
| 47 |
+
if not message:
|
| 48 |
+
return "Please enter a message."
|
| 49 |
+
|
| 50 |
+
# Convert history to the format expected by the chat implementation
|
| 51 |
+
history_dicts = []
|
| 52 |
+
for user_msg, assistant_msg in history:
|
| 53 |
+
history_dicts.append({"role": "user", "content": user_msg})
|
| 54 |
+
history_dicts.append({"role": "assistant", "content": assistant_msg})
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
# Process the message
|
| 58 |
+
response = chat_impl.process_message(message, history_dicts)
|
| 59 |
+
return response
|
| 60 |
+
except Exception as e:
|
| 61 |
+
return f"Error processing message: {str(e)}"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# Create the Gradio interface using ChatInterface
|
| 65 |
+
demo = gr.ChatInterface(
|
| 66 |
+
fn=respond,
|
| 67 |
+
title="🤖 Nexus AI - Unified Intelligent Assistant",
|
| 68 |
+
description=f"""
|
| 69 |
+
{init_status}
|
| 70 |
+
|
| 71 |
+
I combine multiple AI capabilities:
|
| 72 |
+
• 🧮 **Calculator & Math** - Complex calculations
|
| 73 |
+
• 📅 **Date & Time** - Current date, time calculations
|
| 74 |
+
• 🌤️ **Weather** - Real-time weather information
|
| 75 |
+
• 📚 **Document Analysis** - RAG-powered document search
|
| 76 |
+
• 🔬 **Deep Research** - Comprehensive multi-source analysis
|
| 77 |
+
• 💬 **General Chat** - Conversational AI
|
| 78 |
+
|
| 79 |
+
The system automatically routes your query to the most appropriate handler.
|
| 80 |
+
""",
|
| 81 |
+
examples=[
|
| 82 |
+
"What is 847 * 293?",
|
| 83 |
+
"What's today's date?",
|
| 84 |
+
"What's the weather in San Francisco?",
|
| 85 |
+
"Explain quantum computing in simple terms",
|
| 86 |
+
"Research the impact of AI on healthcare",
|
| 87 |
+
],
|
| 88 |
+
theme=gr.themes.Soft(),
|
| 89 |
+
analytics_enabled=False,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
return demo
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
# Create and launch the demo
|
| 97 |
+
demo = create_demo()
|
| 98 |
+
|
| 99 |
+
# Check if running in Hugging Face Spaces
|
| 100 |
+
if os.environ.get("SPACE_ID"):
|
| 101 |
+
# Hugging Face Spaces configuration
|
| 102 |
+
demo.launch(
|
| 103 |
+
server_name="0.0.0.0",
|
| 104 |
+
server_port=int(os.environ.get("PORT", 7860))
|
| 105 |
+
)
|
| 106 |
+
else:
|
| 107 |
+
# Local development - use simple defaults
|
| 108 |
+
demo.launch()
|
core/__init__.py
ADDED
|
File without changes
|
core/chat_interface.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Dict, List
|
| 3 |
+
|
| 4 |
+
class ChatInterface(ABC):
|
| 5 |
+
"""Abstract base class defining the core chat interface functionality.
|
| 6 |
+
|
| 7 |
+
This interface is designed to be flexible enough to support different
|
| 8 |
+
implementations across various assignments, from basic query handling
|
| 9 |
+
to complex information retrieval and tool usage.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
@abstractmethod
|
| 13 |
+
def initialize(self) -> None:
|
| 14 |
+
"""Initialize any models, tools, or components needed for chat processing.
|
| 15 |
+
|
| 16 |
+
This method should be called after instantiation to set up any necessary
|
| 17 |
+
components like language models, memory, tools, etc. This separation allows
|
| 18 |
+
for proper error handling during initialization and lazy loading of resources.
|
| 19 |
+
"""
|
| 20 |
+
pass
|
| 21 |
+
|
| 22 |
+
@abstractmethod
|
| 23 |
+
def process_message(self, message: str, chat_history: List[Dict[str, str]]) -> str:
|
| 24 |
+
"""Process a message and return a response.
|
| 25 |
+
|
| 26 |
+
This is the core method that all implementations must define. Different
|
| 27 |
+
implementations can handle the message processing in their own way, such as:
|
| 28 |
+
- Week 1: Query classification, basic tools, and memory
|
| 29 |
+
- Week 2: RAG, web search, and knowledge synthesis
|
| 30 |
+
- Week 3: Advanced tool calling and agentic behavior
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
message: The user's input message
|
| 34 |
+
chat_history: Optional list of previous chat messages, where each message
|
| 35 |
+
is a dict with 'role' (user/assistant) and 'content' keys
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
str: The assistant's response
|
| 39 |
+
"""
|
| 40 |
+
pass
|
data/2019-annual-performance-report.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9c03ac0b77ef7bc82433049e4dd7783cd56855b51f62d3255cf0d2ee7c8e7c2
|
| 3 |
+
size 2357476
|
data/2020-annual-performance-report.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5de716f9e6bd1bad9b0b64768a8af4a7e528b1ea73798ef3f630d1b079091e7b
|
| 3 |
+
size 2838017
|
data/2021-annual-performance-report.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11e216c3fe1ff2d31ef7c06eb019b65723cfc53b7a935d68b5c7dea2638bd830
|
| 3 |
+
size 9822451
|
data/2022-annual-performance-report.pdf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6200aff36bfcbfa955af57d8fdbebf01a233474949421adb1dc59efd8c4ea8d
|
| 3 |
+
size 4134068
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tavily-python==0.7.5
|
| 2 |
+
langchain-community==0.3.25
|
| 3 |
+
pypdf==5.1.0
|
| 4 |
+
langchain==0.3.25
|
| 5 |
+
langgraph==0.4.5
|
| 6 |
+
langchain-openai==0.3.18
|
| 7 |
+
python-dotenv==1.1.0
|
| 8 |
+
gradio==5.34.2
|
| 9 |
+
langgraph-prebuilt==0.2.2
|
| 10 |
+
opik
|
| 11 |
+
fastmcp==2.8.1
|
| 12 |
+
langchain-mcp-adapters==0.1.7
|
| 13 |
+
gradio==5.34.2
|
| 14 |
+
huggingface_hub[cli]
|
tools/__init__.py
ADDED
|
File without changes
|
tools/calculator.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import Union
|
| 3 |
+
|
| 4 |
+
class Calculator:
|
| 5 |
+
"""A simple calculator tool for evaluating basic arithmetic expressions."""
|
| 6 |
+
|
| 7 |
+
@staticmethod
|
| 8 |
+
def evaluate_expression(expression: str) -> Union[float, str]:
|
| 9 |
+
"""Evaluate a basic arithmetic expression.
|
| 10 |
+
|
| 11 |
+
Supports only basic arithmetic operations (+, -, *, /) and parentheses.
|
| 12 |
+
Returns an error message if the expression is invalid or cannot be
|
| 13 |
+
evaluated safely.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
expression: A string containing a mathematical expression
|
| 17 |
+
e.g. "5 + 3" or "10 * (2 + 3)"
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Union[float, str]: The result of the evaluation, or an error message
|
| 21 |
+
if the expression is invalid
|
| 22 |
+
|
| 23 |
+
Examples:
|
| 24 |
+
>>> Calculator.evaluate_expression("5 + 3")
|
| 25 |
+
8.0
|
| 26 |
+
>>> Calculator.evaluate_expression("10 * (2 + 3)")
|
| 27 |
+
50.0
|
| 28 |
+
>>> Calculator.evaluate_expression("15 / 3")
|
| 29 |
+
5.0
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
# Clean up the expression
|
| 33 |
+
expression = expression.strip()
|
| 34 |
+
|
| 35 |
+
# Only allow safe characters (digits, basic operators, parentheses, spaces)
|
| 36 |
+
if not re.match(r'^[\d\s\+\-\*\/\(\)\.]*$', expression):
|
| 37 |
+
return "Error: Invalid characters in expression"
|
| 38 |
+
|
| 39 |
+
# Evaluate the expression
|
| 40 |
+
result = eval(expression, {"__builtins__": {}})
|
| 41 |
+
|
| 42 |
+
# Convert to float and handle division by zero
|
| 43 |
+
return float(result)
|
| 44 |
+
|
| 45 |
+
except ZeroDivisionError:
|
| 46 |
+
return "Error: Division by zero"
|
| 47 |
+
except (SyntaxError, TypeError, NameError):
|
| 48 |
+
return "Error: Invalid expression"
|
| 49 |
+
except Exception as e:
|
| 50 |
+
return f"Error: {str(e)}"
|