Spaces:

chburhan64
/

PDF_Agent

Sleeping

App Files Files Community

chburhan64 commited on Jul 13, 2025

Commit

9ae830c

verified ·

1 Parent(s): bf06fa6

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -102

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import streamlit as st
 import os
-import time
 from dotenv import load_dotenv
-import PyPDF2
 from langchain_groq import ChatGroq
-from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_core.documents import Document
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain.chains import LLMChain, RetrievalQA
-from langchain_core.prompts import ChatPromptTemplate
 # Load environment variables
 load_dotenv()
@@ -28,66 +29,6 @@ llm = ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")
 # Load embedding model
 embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-# Prompt Templates
-summary_prompt = ChatPromptTemplate.from_template("""
-You are a helpful assistant. Summarize the following document clearly and accurately:
-<context>
-{context}
-</context>
-""")
-gap_prompt = ChatPromptTemplate.from_template("""
-Analyze the following summary and identify key research gaps, unanswered questions, or limitations:
-{summary}
-""")
-idea_prompt = ChatPromptTemplate.from_template("""
-Given the research gaps:
-{gaps}
-Suggest 2-3 original research project ideas or questions that address these gaps. Explain why they are valuable.
-""")
-debate_prompt = ChatPromptTemplate.from_template("""
-Act as two researchers discussing a paper.
-Supporter: Defends the core idea of the document.
-Critic: Challenges its assumptions, methods, or impact.
-Use the following summary as reference:
-{summary}
-Generate a short conversation between them.
-""")
-translate_prompt = ChatPromptTemplate.from_template("""
-Translate the following content into {language}, preserving meaning and academic tone:
-{content}
-""")
-citation_prompt = ChatPromptTemplate.from_template("""
-Generate an APA-style citation based on the document content:
-<context>
-{context}
-</context>
-""")
-# Extract & process PDFs
-def process_pdfs(uploaded_files):
-    documents = []
-    for file in uploaded_files:
-        reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text() or ""
-        documents.append(Document(page_content=text, metadata={"source": file.name}))
-    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    return splitter.split_documents(documents)
-# Create vector store
-def create_vector_store(documents):
-    return FAISS.from_documents(documents, embedding)
-# Chain runner helpers
-def run_chain(chain, input_dict):
-    return chain.invoke(input_dict)
 # File uploader
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
@@ -95,7 +36,7 @@ if uploaded_files and st.button("📚 Process Documents"):
     with st.spinner("Processing documents and generating vector store..."):
         documents = process_pdfs(uploaded_files)
         st.session_state.documents = documents
-        st.session_state.vectorstore = create_vector_store(documents)
     st.success("✅ Document vector store created!")
 # Agent Activation
@@ -115,9 +56,7 @@ if "documents" in st.session_state:
         query = st.text_input("💬 Ask a question about the paper:")
         if query and st.button("🚀 Ask Question"):
             with st.spinner("Searching paper for answer..."):
-                retriever = st.session_state.vectorstore.as_retriever()
-                qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
-                output = qa_chain.run(query)
                 st.session_state["last_agent_output"] = output
     # Handle other tasks
@@ -127,32 +66,19 @@ if "documents" in st.session_state:
             output = ""
             if task == "Summarize document":
-                chain = create_stuff_documents_chain(llm, summary_prompt)
-                output = run_chain(chain, {"context": docs})
             elif task == "Identify research gaps":
-                chain1 = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain1, {"context": docs})
-                chain2 = LLMChain(llm=llm, prompt=gap_prompt)
-                output = run_chain(chain2, {"summary": summary})
             elif task == "Suggest research ideas":
-                chain1 = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain1, {"context": docs})
-                chain2 = LLMChain(llm=llm, prompt=gap_prompt)
-                gaps = run_chain(chain2, {"summary": summary})
-                chain3 = LLMChain(llm=llm, prompt=idea_prompt)
-                output = run_chain(chain3, {"gaps": gaps})
             elif task == "Simulate a debate":
-                chain = create_stuff_documents_chain(llm, summary_prompt)
-                summary = run_chain(chain, {"context": docs})
-                debate_chain = LLMChain(llm=llm, prompt=debate_prompt)
-                output = run_chain(debate_chain, {"summary": summary})
             elif task == "Generate citation":
-                citation_chain = create_stuff_documents_chain(llm, citation_prompt)
-                output = run_chain(citation_chain, {"context": docs})
             if output:
                 st.session_state["last_agent_output"] = output
@@ -176,16 +102,6 @@ if "last_agent_output" in st.session_state:
             user_language = selected_language
         if user_language:
-            if isinstance(output, dict):
-                combined_text = "\n\n".join(str(v) for v in output.values())
-            else:
-                combined_text = str(output)
-            translate_chain = LLMChain(llm=llm, prompt=translate_prompt)
-            translated = translate_chain.invoke({
-                "language": user_language,
-                "content": combined_text
-            })
             st.markdown(f"### 🌐 Translated Response ({user_language})")
             st.write(translated)

 import streamlit as st
 import os
 from dotenv import load_dotenv
 from langchain_groq import ChatGroq
 from langchain_community.embeddings import HuggingFaceEmbeddings
+# Import all modules
+from document_processor import process_pdfs, create_vector_store
+from summarizer import summarize_document
+from gap_analyzer import identify_research_gaps
+from idea_generator import suggest_research_ideas
+from debate_simulator import simulate_debate
+from citation_generator import generate_citation
+from chat_handler import chat_with_paper
+from translator import translate_text
 # Load environment variables
 load_dotenv()
 # Load embedding model
 embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 # File uploader
 uploaded_files = st.file_uploader("📁 Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
     with st.spinner("Processing documents and generating vector store..."):
         documents = process_pdfs(uploaded_files)
         st.session_state.documents = documents
+        st.session_state.vectorstore = create_vector_store(documents, embedding)
     st.success("✅ Document vector store created!")
 # Agent Activation
         query = st.text_input("💬 Ask a question about the paper:")
         if query and st.button("🚀 Ask Question"):
             with st.spinner("Searching paper for answer..."):
+                output = chat_with_paper(llm, st.session_state.vectorstore, query)
                 st.session_state["last_agent_output"] = output
     # Handle other tasks
             output = ""
             if task == "Summarize document":
+                output = summarize_document(llm, docs)
             elif task == "Identify research gaps":
+                output = identify_research_gaps(llm, docs)
             elif task == "Suggest research ideas":
+                output = suggest_research_ideas(llm, docs)
             elif task == "Simulate a debate":
+                output = simulate_debate(llm, docs)
             elif task == "Generate citation":
+                output = generate_citation(llm, docs)
             if output:
                 st.session_state["last_agent_output"] = output
             user_language = selected_language
         if user_language:
+            translated = translate_text(llm, output, user_language)
             st.markdown(f"### 🌐 Translated Response ({user_language})")
             st.write(translated)