File size: 3,398 Bytes
362b129
234b651
362b129
 
9dfbe9c
362b129
9dfbe9c
362b129
229d8f2
234b651
362b129
 
9dfbe9c
362b129
 
229d8f2
 
 
 
362b129
 
9dfbe9c
362b129
 
 
9dfbe9c
362b129
 
 
 
 
 
 
 
9dfbe9c
362b129
 
 
9dfbe9c
362b129
 
 
0b042fb
e1dd2c4
 
 
 
 
 
 
362b129
 
e1dd2c4
362b129
 
e1dd2c4
362b129
 
 
 
 
 
e1dd2c4
 
362b129
9dfbe9c
 
362b129
 
 
 
 
9dfbe9c
362b129
 
 
 
 
 
 
 
9dfbe9c
362b129
 
 
 
9dfbe9c
 
 
 
 
 
 
362b129
9dfbe9c
 
362b129
9dfbe9c
362b129
9dfbe9c
362b129
 
9dfbe9c
 
 
362b129
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import tempfile
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"]="Research-Paper-Summarizer"

# Streamlit Page Config
st.set_page_config(
    page_title="Research Paper Summarizer",
    layout="centered"
)

st.title("📚 Research Paper Summarizer")

# File Uploader
uploaded_files = st.file_uploader(
    "Upload one or more research PDFs",
    type=["pdf"],
    accept_multiple_files=True
)

# Initialize vector store in session state
if "vector_store" not in st.session_state:
    st.session_state.vector_store = None

# Process PDFs and create/update the vector store
if st.button("Process PDFs") and uploaded_files:
    all_documents = []

    for file in uploaded_files:
        # Save the file temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_file.write(file.getvalue())
            temp_file_path = temp_file.name

        # Load the PDF using PyPDFLoader
        loader = PyPDFLoader(temp_file_path)
        pdf_docs = loader.load()

        # Split text into manageable chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=300,
            separators=["\n\n", "\n", " ", ""]
        )

        for doc in pdf_docs:
            chunks = text_splitter.split_text(doc.page_content)
            for chunk in chunks:
                # Create Document object for each chunk
                all_documents.append(Document(page_content=chunk, metadata=doc.metadata))

    # Create vector store from documents
    embeddings = OpenAIEmbeddings()
    st.session_state.vector_store = FAISS.from_documents(
        documents=all_documents,
        embedding=embeddings
    )

    st.success("PDFs processed and vector store created! ✅")

# Query + Summarize
query = st.text_input("Enter your question or summary request:")

if st.button("Get Summary/Answer"):
    if st.session_state.vector_store is None:
        st.warning("Please upload and process PDFs first.")
    else:
        # Create retriever and chain
        retriever = st.session_state.vector_store.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 5}
        )
        llm = OpenAI(temperature=0.0)
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            return_source_documents=True
        )

        # Execute query
        result = qa_chain({"query": query})

        # Display the result
        st.markdown("### Answer:")
        st.write(result["result"])

        with st.expander("Show source documents"):
            source_docs = result["source_documents"]
            for i, doc in enumerate(source_docs):
                st.markdown(f"**Source Document {i+1}:**")
                st.write(doc.page_content)
                st.write("---")