Spaces:
Build error
Build error
File size: 3,398 Bytes
362b129 234b651 362b129 9dfbe9c 362b129 9dfbe9c 362b129 229d8f2 234b651 362b129 9dfbe9c 362b129 229d8f2 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 0b042fb e1dd2c4 362b129 e1dd2c4 362b129 e1dd2c4 362b129 e1dd2c4 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 9dfbe9c 362b129 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | import os
import tempfile
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"]="Research-Paper-Summarizer"
# Streamlit Page Config
st.set_page_config(
page_title="Research Paper Summarizer",
layout="centered"
)
st.title("📚 Research Paper Summarizer")
# File Uploader
uploaded_files = st.file_uploader(
"Upload one or more research PDFs",
type=["pdf"],
accept_multiple_files=True
)
# Initialize vector store in session state
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
# Process PDFs and create/update the vector store
if st.button("Process PDFs") and uploaded_files:
all_documents = []
for file in uploaded_files:
# Save the file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(file.getvalue())
temp_file_path = temp_file.name
# Load the PDF using PyPDFLoader
loader = PyPDFLoader(temp_file_path)
pdf_docs = loader.load()
# Split text into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=300,
separators=["\n\n", "\n", " ", ""]
)
for doc in pdf_docs:
chunks = text_splitter.split_text(doc.page_content)
for chunk in chunks:
# Create Document object for each chunk
all_documents.append(Document(page_content=chunk, metadata=doc.metadata))
# Create vector store from documents
embeddings = OpenAIEmbeddings()
st.session_state.vector_store = FAISS.from_documents(
documents=all_documents,
embedding=embeddings
)
st.success("PDFs processed and vector store created! ✅")
# Query + Summarize
query = st.text_input("Enter your question or summary request:")
if st.button("Get Summary/Answer"):
if st.session_state.vector_store is None:
st.warning("Please upload and process PDFs first.")
else:
# Create retriever and chain
retriever = st.session_state.vector_store.as_retriever(
search_type="similarity",
search_kwargs={"k": 5}
)
llm = OpenAI(temperature=0.0)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
# Execute query
result = qa_chain({"query": query})
# Display the result
st.markdown("### Answer:")
st.write(result["result"])
with st.expander("Show source documents"):
source_docs = result["source_documents"]
for i, doc in enumerate(source_docs):
st.markdown(f"**Source Document {i+1}:**")
st.write(doc.page_content)
st.write("---") |