Spaces:

chburhan64
/

PDF_Agent

Sleeping

App Files Files Community

chburhan64 commited on Jul 13, 2025

Commit

ca7640e

verified ·

1 Parent(s): 2947fa1

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -45

app.py CHANGED Viewed

@@ -18,76 +18,158 @@ load_dotenv()
 groq_api_key = os.getenv("GROQ_API_KEY")
 # Streamlit UI setup
-st.set_page_config(page_title="Document Q&A with Llama3")
-st.title("📄 Document Q&A with Llama3 (via Groq)")
 # Load Groq LLM (Llama3)
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
-# Prompt template
-prompt = ChatPromptTemplate.from_template("""
-Answer the question based only on the provided context.
 <context>
 {context}
 </context>
-Question: {input}
 """)
-# Use HuggingFace Embeddings
-embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-# Function to extract text and split into chunks
 def process_pdfs(uploaded_files):
     documents = []
     for file in uploaded_files:
-        pdf_reader = PyPDF2.PdfReader(file)
         text = ""
-        for page in pdf_reader.pages:
             text += page.extract_text() or ""
         documents.append(Document(page_content=text, metadata={"source": file.name}))
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     return splitter.split_documents(documents)
-# Function to build FAISS index
 def create_vector_store(documents):
-    vectorstore = FAISS.from_documents(documents, embedding)
-    return vectorstore
 # File uploader
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
-# Button to process files
 if uploaded_files and st.button("📚 Process Documents"):
-    with st.spinner("Processing documents..."):
         documents = process_pdfs(uploaded_files)
-        st.session_state.vectors = create_vector_store(documents)
-        st.success("✅ Document vector store created!")
-# Question input
-query = st.text_input("💬 Ask a question about the uploaded documents")
-# If user asks a question
-if query and "vectors" in st.session_state:
-    with st.spinner("Generating answer..."):
-        document_chain = create_stuff_documents_chain(llm, prompt)
-        retriever = st.session_state.vectors.as_retriever()
-        retrieval_chain = create_retrieval_chain(retriever, document_chain)
-        start = time.process_time()
-        response = retrieval_chain.invoke({'input': query})
-        end = time.process_time()
-    st.markdown("### ✅ Answer:")
-    st.write(response['answer'])
-    st.markdown(f"⏱️ Response time: {end - start:.2f} seconds")
-    with st.expander("🔍 Relevant Document Chunks"):
-        for i, doc in enumerate(response.get("context", [])):
-            st.write(doc.page_content)
-            st.write("---")
-elif query and "vectors" not in st.session_state:
-    st.warning("⚠️ Please upload and process PDF documents first.")

 groq_api_key = os.getenv("GROQ_API_KEY")
 # Streamlit UI setup
+st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
+st.title("🤖 Multi-Agent Research Assistant")
+st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, and citation. Upload a research paper and let our agents do the thinking!")
 # Load Groq LLM (Llama3)
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
+# Load embedding model
+embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+# Prompt Templates
+summary_prompt = ChatPromptTemplate.from_template("""
+You are a helpful assistant. Summarize the following document clearly and accurately:
 <context>
 {context}
 </context>
+""")
+gap_prompt = ChatPromptTemplate.from_template("""
+Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
+{summary}
 """)
+idea_prompt = ChatPromptTemplate.from_template("""
+Given the research gaps:
+{gaps}
+Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
+""")
+debate_prompt = ChatPromptTemplate.from_template("""
+Act as two researchers discussing a paper.
+Supporter: Defends the core idea of the document.
+Critic: Challenges its assumptions, methods, or impact.
+Use the following summary as reference:
+{summary}
+Generate a short conversation between them.
+""")
+translate_prompt = ChatPromptTemplate.from_template("""
+Translate the following content into {language}, preserving meaning and academic tone:
+{content}
+""")
+citation_prompt = ChatPromptTemplate.from_template("""
+Generate an APA-style citation based on the document content:
+{content}
+""")
+# Extract & process PDFs
 def process_pdfs(uploaded_files):
     documents = []
     for file in uploaded_files:
+        reader = PyPDF2.PdfReader(file)
         text = ""
+        for page in reader.pages:
             text += page.extract_text() or ""
         documents.append(Document(page_content=text, metadata={"source": file.name}))
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     return splitter.split_documents(documents)
+# Create vector store
 def create_vector_store(documents):
+    return FAISS.from_documents(documents, embedding)
+# Chain runner helpers
+def run_chain(chain, input_dict):
+    return chain.invoke(input_dict)
 # File uploader
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
 if uploaded_files and st.button("📚 Process Documents"):
+    with st.spinner("Processing documents and generating vector store..."):
         documents = process_pdfs(uploaded_files)
+        st.session_state.documents = documents
+        st.session_state.vectorstore = create_vector_store(documents)
+    st.success("✅ Document vector store created!")
+# Agent Activation
+if "documents" in st.session_state:
+    st.subheader("🎓 Master Agent: What would you like me to do?")
+    task = st.selectbox("Choose a task:", [
+        "Summarize document",
+        "Identify research gaps",
+        "Suggest research ideas",
+        "Simulate a debate",
+        "Translate summary",
+        "Generate citation"
+    ])
+    user_language = st.selectbox("🌍 Choose translation language (only for Translate task):", ["Spanish", "French", "German", "Chinese", "Urdu"])
+    if st.button("🚀 Run Agent"):
+        with st.spinner("Running agents..."):
+            context = "\n".join([doc.page_content for doc in st.session_state.documents[:10]])
+            results = {}
+            # Summarization
+            if task == "Summarize document":
+                chain = create_stuff_documents_chain(llm, summary_prompt)
+                summary = run_chain(chain, {"context": context})
+                results["summary"] = summary
+                st.markdown("### 📝 Summary")
+                st.write(summary)
+            # Gap analysis
+            elif task == "Identify research gaps":
+                chain1 = create_stuff_documents_chain(llm, summary_prompt)
+                summary = run_chain(chain1, {"context": context})
+                chain2 = create_stuff_documents_chain(llm, gap_prompt)
+                gaps = run_chain(chain2, {"summary": summary})
+                results["gaps"] = gaps
+                st.markdown("### 🔍 Identified Gaps")
+                st.write(gaps)
+            # Idea generation
+            elif task == "Suggest research ideas":
+                chain1 = create_stuff_documents_chain(llm, summary_prompt)
+                summary = run_chain(chain1, {"context": context})
+                chain2 = create_stuff_documents_chain(llm, gap_prompt)
+                gaps = run_chain(chain2, {"summary": summary})
+                chain3 = create_stuff_documents_chain(llm, idea_prompt)
+                ideas = run_chain(chain3, {"gaps": gaps})
+                st.markdown("### 💡 Research Ideas")
+                st.write(ideas)
+            # Debate agent
+            elif task == "Simulate a debate":
+                chain = create_stuff_documents_chain(llm, summary_prompt)
+                summary = run_chain(chain, {"context": context})
+                debate_chain = create_stuff_documents_chain(llm, debate_prompt)
+                debate = run_chain(debate_chain, {"summary": summary})
+                st.markdown("### 🎭 Debate")
+                st.write(debate)
+            # Translate agent
+            elif task == "Translate summary":
+                chain = create_stuff_documents_chain(llm, summary_prompt)
+                summary = run_chain(chain, {"context": context})
+                translate_chain = create_stuff_documents_chain(llm, translate_prompt)
+                translated = run_chain(translate_chain, {"language": user_language, "content": summary})
+                st.markdown(f"### 🌐 Translated Summary ({user_language})")
+                st.write(translated)
+            # Citation agent
+            elif task == "Generate citation":
+                citation_chain = create_stuff_documents_chain(llm, citation_prompt)
+                citation = run_chain(citation_chain, {"content": context})
+                st.markdown("### 📌 APA Citation")
+                st.code(citation, language="markdown")