PDF_Agent / app.py
NajmiHassan1's picture
Update app.py
672ecad verified
raw
history blame
7.37 kB
import streamlit as st
import os
import time
from dotenv import load_dotenv
import PyPDF2
from langchain_groq import ChatGroq
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import LLMChain, RetrievalQA
from langchain_core.prompts import ChatPromptTemplate
# Load environment variables
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
# Streamlit UI setup
st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
st.title("πŸ€– Multi-Agent Research Assistant")
st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, citation, and interactive Q&A. Upload a research paper and let our agents do the thinking!")
# Load Groq LLM (Llama3)
llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
# Load embedding model
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# Prompt Templates
summary_prompt = ChatPromptTemplate.from_template("""
You are a helpful assistant. Summarize the following document clearly and accurately:
<context>
{context}
</context>
""")
gap_prompt = ChatPromptTemplate.from_template("""
Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
{summary}
""")
idea_prompt = ChatPromptTemplate.from_template("""
Given the research gaps:
{gaps}
Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
""")
debate_prompt = ChatPromptTemplate.from_template("""
Act as two researchers discussing a paper.
Supporter: Defends the core idea of the document.
Critic: Challenges its assumptions, methods, or impact.
Use the following summary as reference:
{summary}
Generate a short conversation between them.
""")
translate_prompt = ChatPromptTemplate.from_template("""
Translate the following content into {language}, preserving meaning and academic tone:
{content}
""")
citation_prompt = ChatPromptTemplate.from_template("""
Generate an APA-style citation based on the document content:
<context>
{context}
</context>
""")
# Extract & process PDFs
def process_pdfs(uploaded_files):
documents = []
for file in uploaded_files:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text() or ""
documents.append(Document(page_content=text, metadata={"source": file.name}))
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
return splitter.split_documents(documents)
# Create vector store
def create_vector_store(documents):
return FAISS.from_documents(documents, embedding)
# Chain runner helpers
def run_chain(chain, input_dict):
return chain.invoke(input_dict)
# File uploader
uploaded_files = st.file_uploader("πŸ“ Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
if uploaded_files and st.button("πŸ“š Process Documents"):
with st.spinner("Processing documents and generating vector store..."):
documents = process_pdfs(uploaded_files)
st.session_state.documents = documents
st.session_state.vectorstore = create_vector_store(documents)
st.success("βœ… Document vector store created!")
# Agent Activation
if "documents" in st.session_state:
st.subheader("πŸŽ“ Master Agent: What would you like me to do?")
task = st.selectbox("Choose a task:", [
"Summarize document",
"Identify research gaps",
"Suggest research ideas",
"Simulate a debate",
"Generate citation",
"Chat with paper"
])
# Handle Chat with paper separately
if task == "Chat with paper":
query = st.text_input("πŸ’¬ Ask a question about the paper:")
if query and st.button("πŸš€ Ask Question"):
with st.spinner("Searching paper for answer..."):
retriever = st.session_state.vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
output = qa_chain.run(query)
st.session_state["last_agent_output"] = output
# Handle other tasks
elif st.button("πŸš€ Run Agent"):
with st.spinner("Running agents..."):
docs = st.session_state.documents[:10]
output = ""
if task == "Summarize document":
chain = create_stuff_documents_chain(llm, summary_prompt)
output = run_chain(chain, {"context": docs})
elif task == "Identify research gaps":
chain1 = create_stuff_documents_chain(llm, summary_prompt)
summary = run_chain(chain1, {"context": docs})
chain2 = LLMChain(llm=llm, prompt=gap_prompt)
output = run_chain(chain2, {"summary": summary})
elif task == "Suggest research ideas":
chain1 = create_stuff_documents_chain(llm, summary_prompt)
summary = run_chain(chain1, {"context": docs})
chain2 = LLMChain(llm=llm, prompt=gap_prompt)
gaps = run_chain(chain2, {"summary": summary})
chain3 = LLMChain(llm=llm, prompt=idea_prompt)
output = run_chain(chain3, {"gaps": gaps})
elif task == "Simulate a debate":
chain = create_stuff_documents_chain(llm, summary_prompt)
summary = run_chain(chain, {"context": docs})
debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
output = run_chain(debate_chain, {"summary": summary})
elif task == "Generate citation":
citation_chain = create_stuff_documents_chain(llm, citation_prompt)
output = run_chain(citation_chain, {"context": docs})
if output:
st.session_state["last_agent_output"] = output
# Final Display Section with Translation Option
if "last_agent_output" in st.session_state:
output = st.session_state["last_agent_output"]
translate_toggle = st.toggle("🌍 Translate the response?")
if not translate_toggle:
st.markdown("### πŸ€– Agent Response")
st.write(output)
if translate_toggle:
default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
selected_language = st.selectbox("Choose translation language:", default_languages)
if selected_language == "Other":
user_language = st.text_input("Please enter your desired language:", key="custom_lang")
else:
user_language = selected_language
if user_language:
if isinstance(output, dict):
combined_text = "\n\n".join(str(v) for v in output.values())
else:
combined_text = str(output)
translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
translated = translate_chain.invoke({
"language": user_language,
"content": combined_text
})
st.markdown(f"### 🌐 Translated Response ({user_language})")
st.write(translated)