import os import gradio as gr from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain.prompts import PromptTemplate from langchain.agents import initialize_agent, AgentType from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.huggingface import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.tools import Tool from langchain.tools import DuckDuckGoSearchRun from langchain_core.documents import Document # Import agents and functions from retriever import retriever_agent from retriever import retrieve_and_extract_papers from summarizer import summarizer_agent from critique import critique_agent from synthesizer import synthesizer_agent # Initialize embedding model embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5") # Define prompt templates retriever_template = PromptTemplate( input_variables=["user_prompt"], template="""You are a retriever agent tasked with creating an efficient search small query to retrieve academic papers from arxiv relevant to a user’s request. Based on the user’s input prompt, generate a concise and precise search query (a string of keywords or phrases) that will be used by the function `retrieve_and_extract_papers(query: str, max_papers: int = 3) -> str` to fetch up to 3 relevant papers. The query should focus on key concepts, avoid ambiguity, and prioritize relevance to ensure the extracted text is suitable for summarization. User Input Prompt: {user_prompt} Instructions: 1. Identify the core concepts, topics, or questions in the user prompt. 2. Formulate a search query using relevant keywords or short phrases. 3. Exclude overly broad or irrelevant terms to improve precision. 4. Output only the search query as a string. Example: - User Prompt: "Recent advancements in large language models for natural language processing" - Search Query: "large language models NLP advancements" Generate the search query for the provided user prompt. """ ) summarizer_template = PromptTemplate( input_variables=["user_prompt", "retriever_query"], template="""You are a summarizer agent tasked with generating a concise summary of academic papers retrieved from a RAG pipeline. Using the user’s prompt and the retriever query used for searching, fetch relevant document content with the RAG tool and summarize the key points, findings, or insights relevant to the user’s request. User Input Prompt: {user_prompt} Retriever Query: {retriever_query} Instructions: 1. Use the RAG tool to retrieve document content based on the retriever query. 2. Analyze the user prompt to identify the focus or specific aspects of interest. 3. Summarize the main ideas, results, or trends from the retrieved documents, excluding unnecessary details. 4. Ensure the summary is clear, coherent, and no longer than 1500 words. 5. If the RAG tool returns irrelevant or insufficient documents, note this briefly and provide a general response based on the prompt. 6. Output only the summary as a string. Example: - User Prompt: "Recent advancements in large language models for natural language processing" - Retriever Query: "large language models NLP advancements" - Summary: "Recent advancements in large language models (LLMs) focus on improved efficiency and performance in NLP tasks. Techniques like fine-tuning and transformer architectures enhance accuracy in text generation and understanding." Generate the summary for the provided user prompt and retriever query. """ ) critique_template = PromptTemplate( input_variables=["user_query", "summary"], template="""You are a critique agent tasked with strictly evaluating a summary of academic papers based on a user query. Using the provided user query and summary, analyze the content for accuracy, completeness, and biases. Use the FAISSRetriever and WebSearch tools to cross-reference information and validate claims. Provide a detailed critique, including recommendations, identified biases, and a relevance rating (1–5, where 5 is highly relevant and accurate). User Query: {user_query} Summary: {summary} Instructions: 1. Use the FAISSRetriever tool to fetch document chunks related to the user query for additional context from academic papers. 2. Use the WebSearch tool to verify claims in the summary or find recent developments. 3. Evaluate the summary for: - Accuracy: Are claims supported by evidence from the tools? - Completeness: Does it cover key aspects of the user query? - Biases: Identify methodological, dataset, author, or other biases (e.g., overemphasis on positive results, lack of diverse perspectives). 4. Provide recommendations to improve the summary (e.g., additional topics, clearer explanations). 5. Assign a relevance rating (1–5) based on how well the summary addresses the user query and its reliability. 6. Be strict in identifying biases and unsupported claims. 7. Output a structured response with sections: Critique, Recommendations, Biases, Relevance Rating. Example: - User Query: "Advancements in diffusion models for image generation" - Summary: "Diffusion models outperform GANs in image quality." - Output: Critique: The summary claims diffusion models outperform GANs but lacks evidence or metrics. Recommendations: Include specific metrics (e.g., FID scores) and compare computational costs. Biases: Potential bias toward diffusion models; ignores GANs’ strengths in training speed. Relevance Rating: 2/5 (lacks depth and evidence). Generate the critique for the provided user query and summary. """ ) synthesizer_template = PromptTemplate( input_variables=["user_prompt", "summaries", "critiques"], template="""You are a Synthesizer Agent assisting in generating a structured research overview for a given topic. You are provided with: 1. The user’s original prompt. 2. A set of summaries extracted from multiple research papers. 3. Critique analysis, including commentary on contradictions, outdated information, and biased claims, along with associated critique scores (from 1 to 5, where 5 is most critical). Your task is to: - Analyze the summaries to extract core findings and organize them into a clear, topic-wise outline. - Incorporate the critiques along with their scores in a dedicated section to highlight research limitations or cautionary notes. User Prompt: {user_prompt} Paper Summaries: {summaries} Critique Analysis (with scores): {critiques} Instructions: 1. Begin with a concise introduction to the research topic based on the user prompt. 2. Generate a structured outline capturing key themes, methodologies, findings, and notable contributions. 3. Follow up with a \"Critical Reflections\" section that includes: - A list of critical observations. - Their corresponding critique scores. - Any specific recommendations or warnings. 4. Maintain a professional and academic tone throughout. 5. The final response should be suitable for inclusion in a research literature review or academic discussion. Output Format: - **Introduction** - **Outline** - Theme 1: ... - Theme 2: ... - **Critical Reflections** - Observation 1 (Score: X): ... - Observation 2 (Score: X): ... - Outdated or Contradictory Claims (if any): ... - Recommendations: ... Now generate the structured synthesis below: """ ) # Main processing function def process_research_query(user_prompt: str) -> str: # Step 1: Retriever Agent retriever_prompt = retriever_template.format(user_prompt=user_prompt) retriever_output = retriever_agent.invoke({"input": retriever_prompt})["output"] # Step 2: Retrieve papers and create FAISS index pdftext = retrieve_and_extract_papers(retriever_output) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50) document = Document(page_content=pdftext) documents = text_splitter.split_documents([document]) faiss_index = FAISS.from_documents(documents, embedding_model) faiss_index.save_local("faiss_index") retriever = faiss_index.as_retriever(search_kwargs={"k": 3}) # Step 3: Summarizer Agent summarizer_prompt = summarizer_template.format(user_prompt=user_prompt, retriever_query=retriever_output) summary = summarizer_agent.invoke({"input": summarizer_prompt})["output"] # Step 4: Critique Agent critique_prompt = critique_template.format(user_query=user_prompt, summary=summary) critique = critique_agent.invoke({"input": critique_prompt})["output"] # Step 5: Synthesizer Agent synthesizer_prompt = synthesizer_template.format(user_prompt=user_prompt, summaries=summary, critiques=critique) final_response = synthesizer_agent.invoke({"input": synthesizer_prompt})["output"] return final_response # Gradio chat interface function def research_assistant_chat(message, history): user_prompt = message # Capture user input from chat try: response = process_research_query(user_prompt) return response except Exception as e: return f"Error: {str(e)}" # Create Gradio chat interface iface = gr.ChatInterface( fn=research_assistant_chat, title="Multi-Agent Research Assistant", description="Enter a research query to get a synthesized response based on retrieved papers, summaries, and critiques.", theme="soft" ) # Launch the app if __name__ == "__main__": iface.launch()