Spaces:

chburhan64
/

PDF_Agent

Sleeping

App Files Files Community

PDF_Agent / app.py

NajmiHassan1

Update app.py

672ecad verified 7 months ago

raw

history blame

7.37 kB

	import streamlit as st
	import os
	import time
	from dotenv import load_dotenv
	import PyPDF2

	from langchain_groq import ChatGroq
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_core.documents import Document
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain.chains import LLMChain, RetrievalQA
	from langchain_core.prompts import ChatPromptTemplate

	# Load environment variables
	load_dotenv()
	groq_api_key = os.getenv("GROQ_API_KEY")

	# Streamlit UI setup
	st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
	st.title("🤖 Multi-Agent Research Assistant")
	st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, citation, and interactive Q&A. Upload a research paper and let our agents do the thinking!")

	# Load Groq LLM (Llama3)
	llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")

	# Load embedding model
	embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

	# Prompt Templates
	summary_prompt = ChatPromptTemplate.from_template("""
	You are a helpful assistant. Summarize the following document clearly and accurately:
	<context>
	{context}
	</context>
	""")

	gap_prompt = ChatPromptTemplate.from_template("""
	Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
	{summary}
	""")

	idea_prompt = ChatPromptTemplate.from_template("""
	Given the research gaps:
	{gaps}
	Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
	""")

	debate_prompt = ChatPromptTemplate.from_template("""
	Act as two researchers discussing a paper.
	Supporter: Defends the core idea of the document.
	Critic: Challenges its assumptions, methods, or impact.
	Use the following summary as reference:
	{summary}
	Generate a short conversation between them.
	""")

	translate_prompt = ChatPromptTemplate.from_template("""
	Translate the following content into {language}, preserving meaning and academic tone:
	{content}
	""")

	citation_prompt = ChatPromptTemplate.from_template("""
	Generate an APA-style citation based on the document content:
	<context>
	{context}
	</context>
	""")

	# Extract & process PDFs
	def process_pdfs(uploaded_files):
	documents = []
	for file in uploaded_files:
	reader = PyPDF2.PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text() or ""
	documents.append(Document(page_content=text, metadata={"source": file.name}))
	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	return splitter.split_documents(documents)

	# Create vector store
	def create_vector_store(documents):
	return FAISS.from_documents(documents, embedding)

	# Chain runner helpers
	def run_chain(chain, input_dict):
	return chain.invoke(input_dict)

	# File uploader
	uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)

	if uploaded_files and st.button("📚 Process Documents"):
	with st.spinner("Processing documents and generating vector store..."):
	documents = process_pdfs(uploaded_files)
	st.session_state.documents = documents
	st.session_state.vectorstore = create_vector_store(documents)
	st.success("✅ Document vector store created!")

	# Agent Activation
	if "documents" in st.session_state:
	st.subheader("🎓 Master Agent: What would you like me to do?")
	task = st.selectbox("Choose a task:", [
	"Summarize document",
	"Identify research gaps",
	"Suggest research ideas",
	"Simulate a debate",
	"Generate citation",
	"Chat with paper"
	])

	# Handle Chat with paper separately
	if task == "Chat with paper":
	query = st.text_input("💬 Ask a question about the paper:")
	if query and st.button("🚀 Ask Question"):
	with st.spinner("Searching paper for answer..."):
	retriever = st.session_state.vectorstore.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
	output = qa_chain.run(query)
	st.session_state["last_agent_output"] = output

	# Handle other tasks
	elif st.button("🚀 Run Agent"):
	with st.spinner("Running agents..."):
	docs = st.session_state.documents[:10]
	output = ""

	if task == "Summarize document":
	chain = create_stuff_documents_chain(llm, summary_prompt)
	output = run_chain(chain, {"context": docs})

	elif task == "Identify research gaps":
	chain1 = create_stuff_documents_chain(llm, summary_prompt)
	summary = run_chain(chain1, {"context": docs})
	chain2 = LLMChain(llm=llm, prompt=gap_prompt)
	output = run_chain(chain2, {"summary": summary})

	elif task == "Suggest research ideas":
	chain1 = create_stuff_documents_chain(llm, summary_prompt)
	summary = run_chain(chain1, {"context": docs})
	chain2 = LLMChain(llm=llm, prompt=gap_prompt)
	gaps = run_chain(chain2, {"summary": summary})
	chain3 = LLMChain(llm=llm, prompt=idea_prompt)
	output = run_chain(chain3, {"gaps": gaps})

	elif task == "Simulate a debate":
	chain = create_stuff_documents_chain(llm, summary_prompt)
	summary = run_chain(chain, {"context": docs})
	debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
	output = run_chain(debate_chain, {"summary": summary})

	elif task == "Generate citation":
	citation_chain = create_stuff_documents_chain(llm, citation_prompt)
	output = run_chain(citation_chain, {"context": docs})

	if output:
	st.session_state["last_agent_output"] = output

	# Final Display Section with Translation Option
	if "last_agent_output" in st.session_state:
	output = st.session_state["last_agent_output"]

	translate_toggle = st.toggle("🌍 Translate the response?")

	if not translate_toggle:
	st.markdown("### 🤖 Agent Response")
	st.write(output)

	if translate_toggle:
	default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
	selected_language = st.selectbox("Choose translation language:", default_languages)
	if selected_language == "Other":
	user_language = st.text_input("Please enter your desired language:", key="custom_lang")
	else:
	user_language = selected_language

	if user_language:
	if isinstance(output, dict):
	combined_text = "\n\n".join(str(v) for v in output.values())
	else:
	combined_text = str(output)

	translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
	translated = translate_chain.invoke({
	"language": user_language,
	"content": combined_text
	})

	st.markdown(f"### 🌐 Translated Response ({user_language})")
	st.write(translated)