| import os |
| from llama_index.core.tools import QueryEngineTool, ToolMetadata |
| from llama_index.core.agent import ReActAgent |
| from llama_index.tools.tavily_research import TavilyToolSpec |
|
|
| |
|
|
| METHODOLOGY_PROMPT = """You are a world-class expert in scientific methodology. |
| Your sole purpose is to analyze the 'Methods' section of the provided research paper. |
| Break down the methodology, experimental setup, and any datasets used. Be critical and precise in your analysis.""" |
|
|
| RESULTS_PROMPT = """You are a data-driven analyst. |
| Your only job is to scrutinize the 'Results' and 'Discussion' sections of the paper. |
| Summarize the key findings, reported performance metrics, and the authors' interpretation of the results.""" |
|
|
| CITATION_PROMPT = """You are a seasoned academic with a deep knowledge of this field. |
| Your task is to analyze the 'Introduction' and 'Related Work' sections. |
| Identify the 2-3 most foundational papers cited and explain why they are critical for understanding this work's context.""" |
|
|
| FUTURE_WORK_PROMPT = """You are a creative and forward-thinking researcher. |
| Your goal is to find opportunities for new research based on the 'Conclusion' and 'Future Work' sections. |
| List the potential research gaps, open questions, and suggested next steps identified by the authors.""" |
|
|
| SCOUT_PROMPT = """You are a highly skilled research scout for an AI research team. |
| Your sole purpose is to find the most relevant and recent research papers based on a user's query. |
| You must use your search tool to find 2 of the most relevant papers from the last 2-3 years, prioritizing sources like arxiv.org. |
| |
| **If the user's query is very broad (e.g., 'Software Engineering', 'Machine Learning'), you should refine your search to look for 'survey papers' or 'review articles' on that topic.** |
| |
| You MUST return ONLY a list of direct links to the PDF versions of these papers, separated by newlines. Do not add any commentary or explanation. |
| |
| |
| Example Output: |
| https://arxiv.org/pdf/2305.12345.pdf |
| https://arxiv.org/pdf/2401.54321.pdf |
| """ |
|
|
| CITATION_EXTRACTOR_PROMPT = """You are a machine designed for bibliographic data extraction. |
| From the provided text of a research paper's first page, identify the following fields: Title, a list of all Authors, and the Year of publication. |
| You MUST respond ONLY with a single, minified JSON object containing these fields. Do not add any explanation or conversational text. |
| |
| Example Input: |
| 'RepairAgent: An Autonomous, LLM-Based Agent for Program Repair Islem Bouzenia University of Stuttgart... Michael Pradel University of Stuttgart... 2024...' |
| |
| Example Output: |
| {"title": "RepairAgent: An Autonomous, LLM-Based Agent for Program Repair", "authors": ["Islem Bouzenia", "Michael Pradel"], "year": "2024"} |
| """ |
|
|
| |
|
|
| def get_query_tool(index): |
| """Provides the tool to query the knowledge base.""" |
| query_engine = index.as_query_engine(similarity_top_k=3) |
| query_tool = QueryEngineTool( |
| query_engine=query_engine, |
| metadata=ToolMetadata( |
| name="research_paper_query_tool", |
| description="A tool for querying specific information from the indexed research paper.", |
| ), |
| ) |
| return query_tool |
|
|
| def create_specialist_agent(system_prompt: str, llm, query_tool, verbose=False): |
| """Factory to create a specialist agent with a specific role.""" |
| return ReActAgent.from_tools( |
| tools=[query_tool], |
| llm=llm, |
| system_prompt=system_prompt, |
| verbose=verbose |
| ) |
|
|
| def create_scout_agent(llm, verbose=True): |
| """Creates the agent responsible for finding papers.""" |
| |
| |
| tavily_api_key = os.getenv("TAVILY_API_KEY") |
| |
| |
| if not tavily_api_key: |
| raise ValueError("TAVILY_API_KEY environment variable not set. Please add it to your Space secrets.") |
| |
| |
| tavily_tool_spec = TavilyToolSpec(api_key=tavily_api_key) |
| |
| agent = ReActAgent.from_tools( |
| tools=tavily_tool_spec.to_tool_list(), |
| llm=llm, |
| system_prompt=SCOUT_PROMPT, |
| verbose=verbose |
| ) |
| return agent |