Spaces:

chburhan64
/

PDF_Agent

Sleeping

App Files Files Community

chburhan64 commited on Jul 13, 2025

Commit

657926e

verified ·

1 Parent(s): 8687366

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -78

app.py CHANGED Viewed

@@ -1,31 +1,30 @@
 import streamlit as st
 import os
 import time
-from dotenv import load_dotenv
-import PyPDF2
 from langchain_groq import ChatGroq
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_core.documents import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain.chains import LLMChain
 from langchain_core.prompts import ChatPromptTemplate
 # Load environment variables
 load_dotenv()
 groq_api_key = os.getenv("GROQ_API_KEY")
-# Streamlit UI setup
 st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
 st.title("🤖 Multi-Agent Research Assistant")
-st.markdown("Enhance your research process with intelligent summarization, critique, debate, translation, and citation. Upload a research paper and let our agents do the thinking!")
-# Load Groq LLM (Llama3)
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
-# Load embedding model
 embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 # Prompt Templates
@@ -38,82 +37,64 @@ You are a helpful assistant. Summarize the following document clearly and accura
 gap_prompt = ChatPromptTemplate.from_template("""
 Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
 {summary}
 """)
 idea_prompt = ChatPromptTemplate.from_template("""
 Given the research gaps:
 {gaps}
 Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
 """)
 debate_prompt = ChatPromptTemplate.from_template("""
 Act as two researchers discussing a paper.
 Supporter: Defends the core idea of the document.
 Critic: Challenges its assumptions, methods, or impact.
 Use the following summary as reference:
 {summary}
 Generate a short conversation between them.
 """)
 translate_prompt = ChatPromptTemplate.from_template("""
 Translate the following content into {language}, preserving meaning and academic tone:
 {content}
 """)
-citation_prompt = ChatPromptTemplate.from_template("""
-Generate an APA-style citation based on the document content:
-<context>
-{context}
-</context>
-""")
-# Extract & process PDFs
 def process_pdfs(uploaded_files):
     documents = []
     for file in uploaded_files:
-        reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text() or ""
         documents.append(Document(page_content=text, metadata={"source": file.name}))
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     return splitter.split_documents(documents)
-# Create vector store
 def create_vector_store(documents):
     return FAISS.from_documents(documents, embedding)
-# Chain runner helpers
 def run_chain(chain, input_dict):
     return chain.invoke(input_dict)
-# File uploader
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
 if uploaded_files and st.button("📚 Process Documents"):
-    with st.spinner("Processing documents and generating vector store..."):
         documents = process_pdfs(uploaded_files)
         st.session_state.documents = documents
         st.session_state.vectorstore = create_vector_store(documents)
     st.success("✅ Document vector store created!")
-# Agent Activation
 if "documents" in st.session_state:
-    st.subheader("🎓 Master Agent: What would you like me to do?")
-    task = st.selectbox("Choose a task:", [
         "Summarize document",
         "Identify research gaps",
         "Suggest research ideas",
         "Simulate a debate",
-        "Generate citation"
     ])
     if st.button("🚀 Run Agent"):
@@ -121,73 +102,89 @@ if "documents" in st.session_state:
             docs = st.session_state.documents[:10]
             results = {}
-            # Summarization
             if task == "Summarize document":
                 chain = create_stuff_documents_chain(llm, summary_prompt)
                 summary = run_chain(chain, {"context": docs})
                 st.session_state["last_agent_output"] = summary
-            # Gap analysis
             elif task == "Identify research gaps":
-                chain1 = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain1, {"context": docs})
-                chain2 = LLMChain(llm=llm, prompt=gap_prompt)
-                gaps = run_chain(chain2, {"summary": summary})
                 st.session_state["last_agent_output"] = gaps
-            # Idea generation
             elif task == "Suggest research ideas":
-                chain1 = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain1, {"context": docs})
-                chain2 = LLMChain(llm=llm, prompt=gap_prompt)
-                gaps = run_chain(chain2, {"summary": summary})
-                chain3 = LLMChain(llm=llm, prompt=idea_prompt)
-                ideas = run_chain(chain3, {"gaps": gaps})
                 st.session_state["last_agent_output"] = ideas
-            # Debate agent
             elif task == "Simulate a debate":
-                chain = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain, {"context": docs})
-                debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
-                debate = run_chain(debate_chain, {"summary": summary})
                 st.session_state["last_agent_output"] = debate
-            # Citation agent
             elif task == "Generate citation":
-                citation_chain = create_stuff_documents_chain(llm, citation_prompt)
                 citation = run_chain(citation_chain, {"context": docs})
                 st.session_state["last_agent_output"] = citation
-# Final Display Section with Translation Option
 if "last_agent_output" in st.session_state:
-    output = st.session_state["last_agent_output"]
-    translate_toggle = st.toggle("🌍 Translate the response?")
-    if not translate_toggle:
-        st.markdown("### 🤖 Agent Response")
-        st.write(output)
-    if translate_toggle:
         default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
-        selected_language = st.selectbox("Choose translation language:", default_languages)
         if selected_language == "Other":
-            user_language = st.text_input("Please enter your desired language:", key="custom_lang")
         else:
             user_language = selected_language
         if user_language:
-            if isinstance(output, dict):
-                combined_text = "\n\n".join(str(v) for v in output.values())
-            else:
-                combined_text = str(output)
             translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
-            translated = translate_chain.invoke({
-                "language": user_language,
-                "content": combined_text
-            })
             st.markdown(f"### 🌐 Translated Response ({user_language})")
             st.write(translated)

 import streamlit as st
 import os
 import time
+import matplotlib.pyplot as plt
+import pandas as pd
+import pdfplumber
+from dotenv import load_dotenv
 from langchain_groq import ChatGroq
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_core.documents import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains import LLMChain, RetrievalQA
 from langchain_core.prompts import ChatPromptTemplate
 # Load environment variables
 load_dotenv()
 groq_api_key = os.getenv("GROQ_API_KEY")
 st.set_page_config(page_title="Multi-Agent Research Assistant", layout="wide")
 st.title("🤖 Multi-Agent Research Assistant")
+st.markdown("Upload your PDF research paper and explore multiple intelligent agents: summarize, question-answer, extract visuals, translate, and more!")
+# Load models
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
 embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 # Prompt Templates
 gap_prompt = ChatPromptTemplate.from_template("""
 Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
 {summary}
 """)
 idea_prompt = ChatPromptTemplate.from_template("""
 Given the research gaps:
 {gaps}
 Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
 """)
 debate_prompt = ChatPromptTemplate.from_template("""
 Act as two researchers discussing a paper.
 Supporter: Defends the core idea of the document.
 Critic: Challenges its assumptions, methods, or impact.
 Use the following summary as reference:
 {summary}
 Generate a short conversation between them.
 """)
 translate_prompt = ChatPromptTemplate.from_template("""
 Translate the following content into {language}, preserving meaning and academic tone:
 {content}
 """)
+# PDF processing
 def process_pdfs(uploaded_files):
     documents = []
     for file in uploaded_files:
+        with pdfplumber.open(file) as pdf:
+            text = "\n".join(page.extract_text() or "" for page in pdf.pages)
         documents.append(Document(page_content=text, metadata={"source": file.name}))
     splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     return splitter.split_documents(documents)
 def create_vector_store(documents):
     return FAISS.from_documents(documents, embedding)
 def run_chain(chain, input_dict):
     return chain.invoke(input_dict)
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
 if uploaded_files and st.button("📚 Process Documents"):
+    with st.spinner("Processing and embedding..."):
         documents = process_pdfs(uploaded_files)
         st.session_state.documents = documents
         st.session_state.vectorstore = create_vector_store(documents)
     st.success("✅ Document vector store created!")
 if "documents" in st.session_state:
+    st.subheader("🎓 Choose an agent task:")
+    task = st.selectbox("Task:", [
         "Summarize document",
         "Identify research gaps",
         "Suggest research ideas",
         "Simulate a debate",
+        "Generate citation",
+        "Chat with Paper",
+        "Generate Chart + Insight"
     ])
     if st.button("🚀 Run Agent"):
             docs = st.session_state.documents[:10]
             results = {}
             if task == "Summarize document":
                 chain = create_stuff_documents_chain(llm, summary_prompt)
                 summary = run_chain(chain, {"context": docs})
                 st.session_state["last_agent_output"] = summary
             elif task == "Identify research gaps":
+                summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
+                gaps = run_chain(LLMChain(llm=llm, prompt=gap_prompt), {"summary": summary})
                 st.session_state["last_agent_output"] = gaps
             elif task == "Suggest research ideas":
+                summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
+                gaps = run_chain(LLMChain(llm=llm, prompt=gap_prompt), {"summary": summary})
+                ideas = run_chain(LLMChain(llm=llm, prompt=idea_prompt), {"gaps": gaps})
                 st.session_state["last_agent_output"] = ideas
             elif task == "Simulate a debate":
+                summary = run_chain(create_stuff_documents_chain(llm, summary_prompt), {"context": docs})
+                debate = run_chain(LLMChain(llm=llm, prompt=debate_prompt), {"summary": summary})
                 st.session_state["last_agent_output"] = debate
             elif task == "Generate citation":
+                citation_chain = create_stuff_documents_chain(llm, translate_prompt)
                 citation = run_chain(citation_chain, {"context": docs})
                 st.session_state["last_agent_output"] = citation
+            elif task == "Chat with Paper":
+                user_question = st.text_input("Ask a question about the paper:")
+                if user_question:
+                    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=st.session_state.vectorstore.as_retriever())
+                    answer = qa_chain.run(user_question)
+                    st.session_state["last_agent_output"] = answer
+            elif task == "Generate Chart + Insight":
+                numbers = []
+                for doc in docs:
+                    for line in doc.page_content.split("\n"):
+                        for word in line.split():
+                            try:
+                                num = float(word)
+                                numbers.append(num)
+                            except:
+                                pass
+                if numbers:
+                    fig, ax = plt.subplots()
+                    pd.Series(numbers[:20]).plot(kind="bar", ax=ax)
+                    st.pyplot(fig)
+                    explain_prompt = ChatPromptTemplate.from_template("Analyze this data: {data}")
+                    insight = run_chain(LLMChain(llm=llm, prompt=explain_prompt), {"data": numbers[:20]})
+                    st.session_state["last_agent_output"] = insight
+                else:
+                    st.write("No numeric data found.")
+# Display Output
 if "last_agent_output" in st.session_state:
+    st.markdown("### 🤖 Agent Output")
+    st.write(st.session_state["last_agent_output"])
+    # Feedback agent (simple RLHF prototype)
+    st.markdown("#### 💬 Was this helpful?")
+    col1, col2 = st.columns(2)
+    if col1.button("👍 Yes"):
+        with open("feedback_log.csv", "a") as f:
+            f.write(f"{task},Yes\n")
+        st.success("Thanks for your feedback!")
+    if col2.button("👎 No"):
+        with open("feedback_log.csv", "a") as f:
+            f.write(f"{task},No\n")
+        st.info("Thanks! We'll improve it.")
+    # Translation Option
+    if st.toggle("🌍 Translate the response?"):
         default_languages = ["Spanish", "French", "German", "Chinese", "Urdu", "Other"]
+        selected_language = st.selectbox("Choose language:", default_languages)
         if selected_language == "Other":
+            user_language = st.text_input("Enter language:")
         else:
             user_language = selected_language
         if user_language:
             translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
+            content = st.session_state["last_agent_output"]
+            if isinstance(content, dict):
+                content = "\n".join(str(v) for v in content.values())
+            translated = translate_chain.invoke({"language": user_language, "content": content})
             st.markdown(f"### 🌐 Translated Response ({user_language})")
             st.write(translated)