|
|
""" |
|
|
Research Agent - Information gathering and research tasks |
|
|
|
|
|
The Research Agent is responsible for: |
|
|
1. Gathering information from multiple sources (web, Wikipedia, arXiv) |
|
|
2. Searching for relevant context and facts |
|
|
3. Compiling research results in a structured format |
|
|
4. Returning citations and source information |
|
|
""" |
|
|
|
|
|
import os |
|
|
from typing import Dict, Any, List |
|
|
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage, AIMessage |
|
|
from langgraph.types import Command |
|
|
from langchain_groq import ChatGroq |
|
|
from langchain_core.tools import BaseTool |
|
|
from observability import agent_span, tool_span |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
from langgraph_tools import get_research_tools |
|
|
|
|
|
load_dotenv("env.local") |
|
|
|
|
|
|
|
|
def load_research_prompt() -> str: |
|
|
"""Load the research-specific prompt""" |
|
|
try: |
|
|
with open("archive/prompts/retrieval_prompt.txt", "r") as f: |
|
|
return f.read() |
|
|
except FileNotFoundError: |
|
|
return """ |
|
|
You are a research specialist focused on gathering accurate information. |
|
|
|
|
|
Your goals: |
|
|
1. Search for factual, current, and relevant information |
|
|
2. Use multiple sources to verify facts |
|
|
3. Provide clear citations and sources |
|
|
4. Structure findings in an organized manner |
|
|
|
|
|
When researching: |
|
|
- Use web search for current information and facts |
|
|
- Use Wikipedia for encyclopedic knowledge |
|
|
- Use ArXiv for academic and technical topics |
|
|
- Cross-reference information across sources |
|
|
- Note any conflicting information found |
|
|
|
|
|
Format your response as: |
|
|
### Research Strategy |
|
|
[Describe what searches are needed] |
|
|
|
|
|
### Findings |
|
|
[Key information discovered] |
|
|
|
|
|
### Key Facts |
|
|
- Fact 1 |
|
|
- Fact 2 |
|
|
- Fact 3 |
|
|
|
|
|
### Sources |
|
|
- Source 1 |
|
|
- Source 2 |
|
|
""" |
|
|
|
|
|
|
|
|
def research_agent(state: Dict[str, Any]) -> Command: |
|
|
""" |
|
|
Research Agent node that gathers information using LangChain tools. |
|
|
|
|
|
Returns Command with research results appended to research_notes. |
|
|
""" |
|
|
|
|
|
print("🔍 Research Agent: Gathering information...") |
|
|
|
|
|
try: |
|
|
|
|
|
research_prompt = load_research_prompt() |
|
|
|
|
|
|
|
|
llm = ChatGroq( |
|
|
model="llama-3.3-70b-versatile", |
|
|
temperature=0.3, |
|
|
max_tokens=2048 |
|
|
) |
|
|
|
|
|
|
|
|
research_tools = get_research_tools() |
|
|
|
|
|
|
|
|
llm_with_tools = llm.bind_tools(research_tools) |
|
|
|
|
|
|
|
|
with agent_span( |
|
|
"research", |
|
|
metadata={ |
|
|
"tools_available": len(research_tools), |
|
|
"user_id": state.get("user_id", "unknown"), |
|
|
"session_id": state.get("session_id", "unknown") |
|
|
} |
|
|
) as span: |
|
|
|
|
|
|
|
|
messages = state.get("messages", []) |
|
|
user_query = "" |
|
|
for msg in messages: |
|
|
if isinstance(msg, HumanMessage): |
|
|
user_query = msg.content |
|
|
break |
|
|
|
|
|
|
|
|
research_request = f""" |
|
|
You must research the following question using the available tools. Do not answer from memory alone. |
|
|
|
|
|
Question: {user_query} |
|
|
|
|
|
Current research status: {len(state.get('research_notes', ''))} characters already gathered |
|
|
|
|
|
CRITICAL: You MUST use the available research tools to gather information. Do not provide an answer without using tools. |
|
|
|
|
|
Available tools: |
|
|
- tavily_search_results_json: For current web information |
|
|
- wikipedia_search: For encyclopedic knowledge |
|
|
- arxiv_search: For academic papers |
|
|
|
|
|
Instructions: |
|
|
1. ALWAYS use tavily_search_results_json for current information |
|
|
2. Use wikipedia_search for general knowledge topics |
|
|
3. Use arxiv_search for academic/technical topics if relevant |
|
|
4. You must call at least one tool - preferably multiple tools |
|
|
5. Analyze and synthesize the information from the tools |
|
|
6. Provide structured findings with sources |
|
|
|
|
|
Start by calling the appropriate research tools to gather information about this question. |
|
|
""" |
|
|
|
|
|
|
|
|
research_messages = [ |
|
|
SystemMessage(content=research_prompt), |
|
|
HumanMessage(content=research_request) |
|
|
] |
|
|
|
|
|
|
|
|
response = llm_with_tools.invoke(research_messages) |
|
|
|
|
|
|
|
|
print(f"🔍 Research response type: {type(response)}") |
|
|
print(f"🔍 Has tool_calls attribute: {hasattr(response, 'tool_calls')}") |
|
|
if hasattr(response, 'tool_calls'): |
|
|
print(f"🔍 Tool calls: {response.tool_calls}") |
|
|
else: |
|
|
print(f"🔍 Response content preview: {str(response)[:200]}...") |
|
|
|
|
|
|
|
|
tool_results = [] |
|
|
if hasattr(response, 'tool_calls') and response.tool_calls: |
|
|
print(f"🛠️ Executing {len(response.tool_calls)} research tools") |
|
|
|
|
|
for tool_call in response.tool_calls: |
|
|
try: |
|
|
|
|
|
tool = next((t for t in research_tools if t.name == tool_call['name']), None) |
|
|
if tool: |
|
|
|
|
|
with tool_span(tool.name, metadata={"args": tool_call.get('args', {})}) as tool_span_ctx: |
|
|
result = tool.invoke(tool_call.get('args', {})) |
|
|
tool_results.append(f"**{tool.name}**: {result}") |
|
|
if tool_span_ctx: |
|
|
tool_span_ctx.update_trace(output={"result": str(result)[:200] + "..."}) |
|
|
else: |
|
|
tool_results.append(f"**{tool_call['name']}**: Tool not found") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠️ Tool {tool_call.get('name', 'unknown')} failed: {e}") |
|
|
tool_results.append(f"**{tool_call.get('name', 'unknown')}**: Error - {str(e)}") |
|
|
else: |
|
|
print("⚠️ No tool calls detected - LLM did not choose to use any tools") |
|
|
|
|
|
print("🔧 Forcing tool usage for research...") |
|
|
|
|
|
|
|
|
forced_tools = [] |
|
|
|
|
|
|
|
|
tavily_tool = next((t for t in research_tools if t.name == "tavily_search_results_json"), None) |
|
|
if tavily_tool: |
|
|
try: |
|
|
print("🌐 Forcing Tavily web search...") |
|
|
result = tavily_tool.invoke({"query": user_query}) |
|
|
forced_tools.append(f"**tavily_search_results_json (forced)**: {result}") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Forced Tavily search failed: {e}") |
|
|
forced_tools.append(f"**tavily_search_results_json (forced)**: Error - {str(e)}") |
|
|
|
|
|
|
|
|
wiki_tool = next((t for t in research_tools if t.name == "wikipedia_search"), None) |
|
|
if wiki_tool: |
|
|
try: |
|
|
print("📚 Forcing Wikipedia search...") |
|
|
result = wiki_tool.invoke({"query": user_query}) |
|
|
forced_tools.append(f"**wikipedia_search (forced)**: {result}") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Forced Wikipedia search failed: {e}") |
|
|
forced_tools.append(f"**wikipedia_search (forced)**: Error - {str(e)}") |
|
|
|
|
|
tool_results = forced_tools |
|
|
|
|
|
|
|
|
if tool_results: |
|
|
research_findings = "\n\n".join(tool_results) |
|
|
|
|
|
|
|
|
analysis_request = f""" |
|
|
Based on the research results below, provide a structured analysis: |
|
|
|
|
|
Original Question: {user_query} |
|
|
|
|
|
Research Results: |
|
|
{research_findings} |
|
|
|
|
|
Current research status: {len(state.get('research_notes', ''))} characters already gathered |
|
|
|
|
|
Instructions: |
|
|
1. Analyze the search results for relevant information |
|
|
2. Extract key facts that help answer the question |
|
|
3. Note any important details or findings |
|
|
4. Identify if additional specific searches might be needed |
|
|
5. Structure your findings clearly with citations |
|
|
|
|
|
Please provide a comprehensive analysis of the research findings. |
|
|
""" |
|
|
|
|
|
analysis_messages = [ |
|
|
SystemMessage(content=research_prompt), |
|
|
HumanMessage(content=analysis_request) |
|
|
] |
|
|
|
|
|
analysis_response = llm.invoke(analysis_messages) |
|
|
analysis_content = analysis_response.content if hasattr(analysis_response, 'content') else str(analysis_response) |
|
|
|
|
|
|
|
|
formatted_results = f""" |
|
|
### Research Iteration {state.get('loop_counter', 0) + 1} |
|
|
|
|
|
{analysis_content} |
|
|
|
|
|
### Raw Tool Results |
|
|
{research_findings} |
|
|
|
|
|
--- |
|
|
""" |
|
|
else: |
|
|
|
|
|
response_content = response.content if hasattr(response, 'content') else str(response) |
|
|
formatted_results = f""" |
|
|
### Research Iteration {state.get('loop_counter', 0) + 1} |
|
|
|
|
|
{response_content} |
|
|
|
|
|
--- |
|
|
""" |
|
|
|
|
|
print(f"📝 Research Agent: Gathered {len(formatted_results)} characters") |
|
|
|
|
|
|
|
|
if span: |
|
|
span.update_trace(output={ |
|
|
"research_length": len(formatted_results), |
|
|
"tools_used": len(tool_results), |
|
|
"findings_preview": formatted_results[:300] + "..." |
|
|
}) |
|
|
|
|
|
|
|
|
return Command( |
|
|
goto="lead", |
|
|
update={ |
|
|
"research_notes": formatted_results |
|
|
} |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Research Agent Error: {e}") |
|
|
|
|
|
|
|
|
error_result = f""" |
|
|
### Research Error |
|
|
An error occurred during research: {str(e)} |
|
|
|
|
|
""" |
|
|
return Command( |
|
|
goto="lead", |
|
|
update={ |
|
|
"research_notes": error_result |
|
|
} |
|
|
) |